diff options
| author | Jake | 2018-03-25 13:40:05 -0400 |
|---|---|---|
| committer | Jake | 2018-03-25 13:40:05 -0400 |
| commit | 103cbbc604da6fc22681b29b0fea2658b86539dd (patch) | |
| tree | 00165bcd7eaab40f3ae6d857f262213d9e2d978c | |
| download | pdf_form-103cbbc604da6fc22681b29b0fea2658b86539dd.tar.bz2 | |
first commit
| -rw-r--r-- | Cargo.toml | 8 | ||||
| -rw-r--r-- | src/lib.rs | 410 |
2 files changed, 418 insertions, 0 deletions
diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..1ac7d1e --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "pdf_form" +version = "0.1.0" +authors = ["Jake <jsandler18@gmail.com>"] + +[dependencies] +lopdf = "0.15.1" +bitflags = "1.0.1" diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..e2e5847 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,410 @@ +//! This crate is for filling out PDFs with forms programatically. +extern crate lopdf; +#[macro_use] +extern crate bitflags; + +use lopdf::{Document, ObjectId, Object, StringFormat}; +use std::path::Path; +use std::collections::VecDeque; +use std::io; +use std::str; + +bitflags! { + struct ButtonFlags: u32 { + const NO_TOGGLE_TO_OFF = 0x8000; + const RADIO = 0x10000; + const PUSHBUTTON = 0x20000; + const RADIO_IN_UNISON = 0x4000000; + + } +} + +bitflags! { + struct ChoiceFlags: u32 { + const COBMO = 0x40000; + const EDIT = 0x80000; + const SORT = 0x100000; + const MULTISELECT = 0x400000; + const DO_NOT_SPELLCHECK = 0x800000; + const COMMIT_ON_CHANGE = 0x8000000; + } +} + +/// A PDF Form that contains fillable fields +/// +/// Use this struct to load an existing PDF with a fillable form using the `load` method. It will +/// analyze the PDF and identify the fields. Then you can get and set the content of the fields by +/// index. +pub struct Form { + doc: Document, + form_ids: Vec<ObjectId> +} + +/// The possible types of fillable form fields in a PDF +pub enum FieldType { + Button, + Radio, + CheckBox, + ListBox, + ComboBox, + Text +} + +/// The current state of a form field +#[derive(Debug)] +pub enum FieldState { + /// Push buttons have no state + Button, + /// `selected` is the sigular option from `options` that is selected + Radio { selected: String, options: Vec<String> }, + /// The toggle state of the checkbox + CheckBox { is_checked: bool }, + /// `selected` is the list of selected options from `options` + ListBox { selected: Vec<String>, options: Vec<String>, multiselect: bool }, + /// `selected` is the list of selected options from `options` + ComboBox { selected: Vec<String>, options: Vec<String>, multiselect: bool }, + /// User Text Input + Text { text: String } +} + +#[derive(Debug)] +/// Errors that may occur while loading a PDF +pub enum LoadError { + IoError(io::Error), + DictionaryKeyNotFound, + NoSuchReference(ObjectId), + NotAReference, + UnexpectedType +} + +/// Errors That may occur while setting values in a form +pub enum ValueError { + TypeMismatch, + InvalidSelection, + TooManySelected +} + +impl From<io::Error> for LoadError { + fn from(error: io::Error) -> Self { + LoadError::IoError(error) + } +} + +trait PdfObjectDeref { + fn deref<'a>(&self, doc: &'a Document) -> Result<&'a Object, LoadError>; +} + +impl PdfObjectDeref for Object { + fn deref<'a>(&self, doc: &'a Document) -> Result<&'a Object, LoadError> { + match self { + &Object::Reference(oid) => doc.objects.get(&oid).ok_or(LoadError::NoSuchReference(oid)), + _ => Err(LoadError::NotAReference) + } + } +} + +impl Form { + /// Takes a path to a PDF with a fillable form, analyzes the file, and attempts to identify all + /// of the fields the form has. This is the only way to create a `Form` + pub fn load<P: AsRef<Path>>(path: P) -> Result<Self, LoadError> { + let doc = Document::load(path)?; + let mut form_ids = Vec::new(); + let mut queue = VecDeque::new(); + // Block so borrow of doc ends before doc is moved into the result + { + // Get the form's top level fields + let catalog = doc.trailer.get("Root") + .ok_or(LoadError::DictionaryKeyNotFound)? + .deref(&doc)? + .as_dict().ok_or(LoadError::UnexpectedType)?; + let acroform = catalog.get("AcroForm") + .ok_or(LoadError::DictionaryKeyNotFound)? + .deref(&doc)? + .as_dict().ok_or(LoadError::UnexpectedType)?; + let fields_list = acroform.get("Fields") + .ok_or(LoadError::DictionaryKeyNotFound)? + // .deref(&doc)? + .as_array().ok_or(LoadError::UnexpectedType)?; + queue.append(&mut VecDeque::from(fields_list.clone())); + + // Iterate over the fields + while let Some(objref) = queue.pop_front() { + let obj = objref.deref(&doc)?; + if let &Object::Dictionary(ref dict) = obj { + // If the field has FT, it actually takes input. Save this + if let Some(_) = dict.get("FT") { + form_ids.push(objref.as_reference().unwrap()); + } + // If this field has kids, they might have FT, so add them to the queue + if let Some(&Object::Array(ref kids)) = dict.get("Kids") { + queue.append(&mut VecDeque::from(kids.clone())); + } + } + } + } + Ok(Form { doc, form_ids }) + } + + /// Returns the number of fields the form has + pub fn len(&self) -> usize { + self.form_ids.len() + } + + /// Gets the type of field of the given index + /// + /// # Panics + /// This function will panic if the index is greater than the number of fields + pub fn get_type(&self, n: usize) -> FieldType { + // unwraps should be fine because load should have verified everything exists + let field = self.doc.objects.get(&self.form_ids[n]).unwrap().as_dict().unwrap(); + let obj_zero = Object::Integer(0); + let type_str = field.get("FT").unwrap().as_name_str().unwrap(); + if type_str == "Btn" { + let flags = ButtonFlags::from_bits_truncate(field.get("Ff").unwrap_or(&obj_zero).as_i64().unwrap() as u32); + if flags.intersects(ButtonFlags::RADIO) { + FieldType::Radio + } else if flags.intersects(ButtonFlags::PUSHBUTTON) { + FieldType::Button + } else { + FieldType::CheckBox + } + } else if type_str == "Ch" { + let flags = ChoiceFlags::from_bits_truncate(field.get("Ff").unwrap_or(&obj_zero).as_i64().unwrap() as u32); + if flags.intersects(ChoiceFlags::COBMO) { + FieldType::ComboBox + } else { + FieldType::ListBox + } + } else { + FieldType::Text + } + } + + /// Gets the state of field of the given index + /// + /// # Panics + /// This function will panic if the index is greater than the number of fields + pub fn get_state(&self, n: usize) -> FieldState { + let field = self.doc.objects.get(&self.form_ids[n]).unwrap().as_dict().unwrap(); + match self.get_type(n) { + FieldType::Button => FieldState::Button, + FieldType::Radio => FieldState::Radio { + selected: match field.get("V") { + Some(name) => name.as_name_str().unwrap().to_owned(), + None => match field.get("AS") { + Some(name) => name.as_name_str().unwrap().to_owned(), + None => "".to_owned() + } + }, + options: self.get_possibilities(self.form_ids[n]) + }, + FieldType::CheckBox=> FieldState::CheckBox { is_checked: + match field.get("V") { + Some(name) => if name.as_name_str().unwrap() == "Yes" { true } else { false }, + None => match field.get("AS") { + Some(name) => if name.as_name_str().unwrap() == "Yes" { true } else { false }, + None => false + } + } + }, + FieldType::ListBox => FieldState::ListBox { + // V field in a list box can be either text for one option, an array for many + // options, or null + selected: match field.get("V") { + Some(selection) => match selection { + &Object::String(ref s,StringFormat::Literal) => vec![str::from_utf8(&s).unwrap().to_owned()], + &Object::Array(ref chosen) => { + let mut res = Vec::new(); + for obj in chosen { + if let &Object::String(ref s,StringFormat::Literal) = obj { + res.push(str::from_utf8(&s).unwrap().to_owned()); + } + } + res + } + _ => Vec::new() + }, + None => Vec::new() + }, + // The options is an array of either text elements or arrays where the second + // element is what we want + options: match field.get("Opt") { + Some(&Object::Array(ref options)) => options.iter().map(|x| { + match x { + &Object::String(ref s,StringFormat::Literal) => str::from_utf8(&s).unwrap().to_owned(), + &Object::Array(ref arr) => if let &Object::String(ref s,StringFormat::Literal) = &arr[1] { + str::from_utf8(&s).unwrap().to_owned() + } else { + String::new() + }, + _ => String::new() + } + }).filter(|x| x.len() > 0).collect(), + _ => Vec::new() + }, + multiselect: { + + let flags = ChoiceFlags::from_bits_truncate(field.get("Ff").unwrap().as_i64().unwrap() as u32); + flags.intersects(ChoiceFlags::MULTISELECT) + } + }, + FieldType::ComboBox => FieldState::ComboBox { + // V field in a list box can be either text for one option, an array for many + // options, or null + selected: match field.get("V") { + Some(selection) => match selection { + &Object::String(ref s,StringFormat::Literal) => vec![str::from_utf8(&s).unwrap().to_owned()], + &Object::Array(ref chosen) => { + let mut res = Vec::new(); + for obj in chosen { + if let &Object::String(ref s,StringFormat::Literal) = obj { + res.push(str::from_utf8(&s).unwrap().to_owned()); + } + } + res + } + _ => Vec::new() + }, + None => Vec::new() + }, + // The options is an array of either text elements or arrays where the second + // element is what we want + options: match field.get("Opt") { + Some(&Object::Array(ref options)) => options.iter().map(|x| { + match x { + &Object::String(ref s,StringFormat::Literal) => str::from_utf8(&s).unwrap().to_owned(), + &Object::Array(ref arr) => if let &Object::String(ref s,StringFormat::Literal) = &arr[1] { + str::from_utf8(&s).unwrap().to_owned() + } else { + String::new() + }, + _ => String::new() + } + }).filter(|x| x.len() > 0).collect(), + _ => Vec::new() + }, + multiselect: { + + let flags = ChoiceFlags::from_bits_truncate(field.get("Ff").unwrap().as_i64().unwrap() as u32); + flags.intersects(ChoiceFlags::MULTISELECT) + } + }, + FieldType::Text => FieldState::Text{ text: + match field.get("V") { + Some(&Object::String(ref s,StringFormat::Literal)) => + str::from_utf8(&s.clone()).unwrap().to_owned(), + _ => "".to_owned() + } + + } + } + } + + + /// If the field at index `n` is a text field, fills in that field with the text `s`. + /// If it is not a text field, returns ValueError + /// + /// # Panics + /// Will panic if n is larger than the number of fields + pub fn set_text(&mut self, n: usize, s: String) -> Result<(),ValueError> { + match self.get_type(n) { + FieldType::Text => { + let field = self.doc.objects.get_mut(&self.form_ids[n]).unwrap().as_dict_mut().unwrap(); + field.set("V",Object::String(s.into_bytes(),StringFormat::Literal)); + field.remove("AP"); + Ok(()) + }, + _ => Err(ValueError::TypeMismatch) + + } + } + + fn get_possibilities(&self, oid: ObjectId) -> Vec<String> { + let mut res = Vec::new(); + let kids_obj = self.doc.objects.get(&oid).unwrap().as_dict().unwrap().get("Kids"); + if let Some(&Object::Array(ref kids)) = kids_obj { + for kid in kids { + if let Some(&Object::Name(ref s)) = kid.deref(&self.doc).unwrap().as_dict().unwrap().get("AS") { + res.push(str::from_utf8(&s).unwrap().to_owned()); + } + } + } + res + } + + /// If the field at index `n` is a checkbox field, toggles the check box based on the value + /// `is_checked`. + /// If it is not a checkbox field, returns ValueError + /// + /// # Panics + /// Will panic if n is larger than the number of fields + pub fn set_check_box(&mut self, n: usize, is_checked: bool) -> Result<(),ValueError> { + match self.get_type(n) { + FieldType::CheckBox => { + let state = Object::Name({if is_checked {"Yes"} else {"Off"}}.to_owned().into_bytes()); + let field = self.doc.objects.get_mut(&self.form_ids[n]).unwrap().as_dict_mut().unwrap(); + field.set("V",state.clone()); + field.set("AS",state); + Ok(()) + }, + _ => Err(ValueError::TypeMismatch) + + } + } + + /// If the field at index `n` is a radio field, toggles the radio button based on the value + /// `choice` + /// If it is not a radio button field or the choice is not a valid option, returns ValueError + /// + /// # Panics + /// Will panic if n is larger than the number of fields + pub fn set_radio(&mut self, n: usize, choice: String) -> Result<(),ValueError> { + match self.get_state(n) { + FieldState::Radio { selected: _, options } => if options.contains(&choice) { + let field = self.doc.objects.get_mut(&self.form_ids[n]).unwrap().as_dict_mut().unwrap(); + field.set("V",Object::Name(choice.into_bytes())); + Ok(()) + } else { + Err(ValueError::InvalidSelection) + }, + _ => Err(ValueError::TypeMismatch) + + } + } + + /// If the field at index `n` is a listbox or comboox field, selects the options in `choice` + /// If it is not a listbox or combobox field or one of the choices is not a valid option, or if too many choices are selected, returns ValueError + /// + /// # Panics + /// Will panic if n is larger than the number of fields + pub fn set_choice(&mut self, n: usize, choices: Vec<String>) -> Result<(),ValueError> { + match self.get_state(n) { + FieldState::ListBox { selected: _, options, multiselect } | FieldState::ComboBox { selected: _, options, multiselect } => if choices.iter().fold(true, |a,h| options.contains(h) && a) { + if !multiselect && choices.len() > 1 { + Err(ValueError::TooManySelected) + } else { + let field = self.doc.objects.get_mut(&self.form_ids[n]).unwrap().as_dict_mut().unwrap(); + match choices.len() { + 0 => field.set("V", Object::Null), + 1 => field.set("V", Object::String(choices[0].clone().into_bytes(), + StringFormat::Literal)), + _ => field.set("V", Object::Array(choices.iter().map(|x| Object::String(x.clone().into_bytes(),StringFormat::Literal)).collect())) + + }; + Ok(()) + } + + } else { + Err(ValueError::InvalidSelection) + }, + _ => Err(ValueError::TypeMismatch) + + } + } + + + /// Saves the form to the specified path + pub fn save<P: AsRef<Path>>(&mut self, path: P) -> Result<(),io::Error> { + self.doc.save(path).map(|_| ()) + } +} |
