diff options
| author | Jake Sandler | 2020-09-28 14:35:52 -0400 | 
|---|---|---|
| committer | GitHub | 2020-09-28 14:35:52 -0400 | 
| commit | 3d42dcdc89ddf4b4982fecc64e5cd8eb94bb9026 (patch) | |
| tree | d292a6f88820d48236b5551e566cba59c83be7d8 /src | |
| parent | 2eff696d2dd769d04e4c95b9029c529c9e283c2d (diff) | |
| parent | 6a1c7e10cbe5cddaeae59a2d70e9d7256d2a5267 (diff) | |
| download | pdf_form-3d42dcdc89ddf4b4982fecc64e5cd8eb94bb9026.tar.bz2 | |
Merge pull request #6 from Emulator000/fields-appearance
Fix for Textbox and Checkbox fields appearance issue when filled
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib.rs | 183 | ||||
| -rw-r--r-- | src/utils.rs | 78 | 
2 files changed, 240 insertions, 21 deletions
| @@ -13,6 +13,7 @@ use std::str;  use bitflags::_core::str::from_utf8; +use lopdf::content::{Content, Operation};  use lopdf::{Document, Object, ObjectId, StringFormat};  use crate::utils::*; @@ -135,18 +136,27 @@ impl Form {          Self::load_doc(doc)      } -    fn load_doc(doc: Document) -> Result<Self, LoadError> { +    fn load_doc(mut doc: Document) -> Result<Self, LoadError> {          let mut form_ids = Vec::new();          let mut queue = VecDeque::new();          // Block so borrow of doc ends before doc is moved into the result          { -            // Get the form's top level fields -            let catalog = doc.trailer.get(b"Root")?.deref(&doc)?.as_dict()?; -            let acroform = catalog.get(b"AcroForm")?.deref(&doc)?.as_dict()?; -            let fields_list = acroform -                .get(b"Fields")? -                //    .deref(&doc)? -                .as_array()?; +            doc.decompress(); + +            let acroform = doc +                .objects +                .get_mut( +                    &doc.trailer +                        .get(b"Root")? +                        .deref(&doc)? +                        .as_dict()? +                        .get(b"AcroForm")? +                        .as_reference()?, +                ) +                .ok_or(LoadError::NotAReference)? +                .as_dict_mut()?; + +            let fields_list = acroform.get(b"Fields")?.as_array()?;              queue.append(&mut VecDeque::from(fields_list.clone()));              // Iterate over the fields @@ -423,8 +433,10 @@ impl Form {                      .as_dict_mut()                      .unwrap(); -                field.set("V", Object::String(s.into_bytes(), StringFormat::Literal)); -                field.remove(b"AP"); +                field.set("V", Object::string_literal(s.into_bytes())); + +                // Regenerate text appearance confoming the new text but ignore the result +                let _ = self.regenerate_text_appearance(n);                  Ok(())              } @@ -432,6 +444,139 @@ impl Form {          }      } +    /// Regenerates the appearance for the field at index `n` due to an alteration of the +    /// original TextField value, the AP will be updated accordingly. +    /// +    /// # Incomplete +    /// This function is not exhaustive as not parse the original TextField orientation +    /// or the text alignment and other kind of enrichments, also doesn't discover for +    /// the global document DA. +    /// +    /// A more sophisticated parser is needed here +    fn regenerate_text_appearance(&mut self, n: usize) -> Result<(), lopdf::Error> { +        let field = { +            self.doc +                .objects +                .get(&self.form_ids[n]) +                .unwrap() +                .as_dict() +                .unwrap() +        }; + +        // The value of the object (should be a string) +        let value = field.get(b"V")?.to_owned(); + +        // The default appearance of the object (should be a string) +        let da = field.get(b"DA")?.to_owned(); + +        // The default appearance of the object (should be a string) +        let rect = field +            .get(b"Rect")? +            .as_array()? +            .iter() +            .map(|object| { +                object +                    .as_f64() +                    .unwrap_or(object.as_i64().unwrap_or(0) as f64) as f32 +            }) +            .collect::<Vec<_>>(); + +        // Gets the object stream +        let object_id = field.get(b"AP")?.as_dict()?.get(b"N")?.as_reference()?; +        let stream = self.doc.get_object_mut(object_id)?.as_stream_mut()?; + +        // Decode and get the content, even if is compressed +        let mut content = { +            if let Ok(content) = stream.decompressed_content() { +                Content::decode(&content)? +            } else { +                Content::decode(&stream.content)? +            } +        }; + +        // Ignored operators +        let ignored_operators = vec![ +            "bt", "tc", "tw", "tz", "g", "tm", "tr", "tf", "tj", "et", "q", "bmc", "emc", +        ]; + +        // Remove these ignored operators as we have to generate the text and fonts again +        content.operations.retain(|operation| { +            !ignored_operators.contains(&operation.operator.to_lowercase().as_str()) +        }); + +        // Let's construct the text widget +        content.operations.append(&mut vec![ +            Operation::new("BMC", vec!["Tx".into()]), +            Operation::new("q", vec![]), +            Operation::new("BT", vec![]), +        ]); + +        let font = parse_font(match da { +            Object::String(ref bytes, _) => Some(from_utf8(bytes)?), +            _ => None, +        }); + +        // Define some helping font variables +        let font_name = (font.0).0; +        let font_size = (font.0).1; +        let font_color = font.1; + +        // Set the font type and size and color +        content.operations.append(&mut vec![ +            Operation::new("Tf", vec![font_name.into(), font_size.into()]), +            Operation::new( +                font_color.0, +                match font_color.0 { +                    "k" => vec![ +                        font_color.1.into(), +                        font_color.2.into(), +                        font_color.3.into(), +                        font_color.4.into(), +                    ], +                    "rg" => vec![ +                        font_color.1.into(), +                        font_color.2.into(), +                        font_color.3.into(), +                    ], +                    _ => vec![font_color.1.into()], +                }, +            ), +        ]); + +        // Calcolate the text offset +        let x = 2.0; // Suppose this fixed offset as we should have known the border here + +        // Formula picked up from Poppler +        let dy = rect[1] - rect[3]; +        let y = if dy > 0.0 { +            0.5 * dy - 0.4 * font_size as f32 +        } else { +            0.5 * font_size as f32 +        }; + +        // Set the text bounds, first are fixed at "1 0 0 1" and then the calculated x,y +        content.operations.append(&mut vec![Operation::new( +            "Tm", +            vec![1.into(), 0.into(), 0.into(), 1.into(), x.into(), y.into()], +        )]); + +        // Set the text value and some finalizing operations +        content.operations.append(&mut vec![ +            Operation::new("Tj", vec![value]), +            Operation::new("ET", vec![]), +            Operation::new("Q", vec![]), +            Operation::new("EMC", vec![]), +        ]); + +        // Set the new content to the original stream and compress it +        if let Ok(encoded_content) = content.encode() { +            stream.set_plain_content(encoded_content); +            let _ = stream.compress(); +        } + +        Ok(()) +    } +      /// If the field at index `n` is a checkbox field, toggles the check box based on the value      /// `is_checked`.      /// If it is not a checkbox field, returns ValueError @@ -441,17 +586,6 @@ impl Form {      pub fn set_check_box(&mut self, n: usize, is_checked: bool) -> Result<(), ValueError> {          match self.get_state(n) {              FieldState::CheckBox { .. } => { -                let state = Object::Name( -                    { -                        if is_checked { -                            "Yes" -                        } else { -                            "Off" -                        } -                    } -                    .to_owned() -                    .into_bytes(), -                );                  let field = self                      .doc                      .objects @@ -460,6 +594,13 @@ impl Form {                      .as_dict_mut()                      .unwrap(); +                let on = get_on_value(field); +                let state = Object::Name( +                    if is_checked { on.as_str() } else { "Off" } +                        .to_owned() +                        .into_bytes(), +                ); +                  field.set("V", state.clone());                  field.set("AS", state); diff --git a/src/utils.rs b/src/utils.rs index 7ff4e41..0aa342a 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,5 +1,7 @@  use lopdf::{Dictionary, Object}; +use crate::from_utf8; +  bitflags! {      pub struct FieldFlags: u32 {          const READONLY          = 0x1; @@ -47,3 +49,79 @@ pub fn get_field_flags(field: &Dictionary) -> u32 {          .as_i64()          .unwrap() as u32  } + +pub fn get_on_value(field: &Dictionary) -> String { +    let mut option = None; +    if let Ok(ap) = field.get(b"AP") { +        if let Ok(dict) = ap.as_dict() { +            if let Ok(values) = dict.get(b"N") { +                if let Ok(options) = values.as_dict() { +                    for (name, _) in options { +                        if let Ok(name) = from_utf8(name) { +                            if name != "Off" && option.is_none() { +                                option = Some(name.into()); +                            } +                        } +                    } +                } +            } +        } +    } + +    option.unwrap_or("Yes".into()) +} + +pub fn parse_font(font_string: Option<&str>) -> ((&str, i32), (&str, i32, i32, i32, i32)) { +    // The default font object (/Helv 12 Tf 0 g) +    let default_font = ("Helv", 12); +    let default_color = ("g", 0, 0, 0, 0); + +    // Build the font basing on the default appearance, if exists, if not, +    // assume a default font (surely to be improved!) +    match font_string { +        Some(font_string) => { +            let font = font_string +                .trim_start_matches('/') +                .split("Tf") +                .collect::<Vec<_>>(); + +            if font.len() < 2 { +                (default_font, default_color) +            } else { +                let font_family = font[0].trim().split(' ').collect::<Vec<_>>(); +                let font_color = font[1].trim().split(' ').collect::<Vec<_>>(); + +                let font = if font_family.len() >= 2 { +                    (font_family[0], font_family[1].parse::<i32>().unwrap_or(0)) +                } else { +                    default_font +                }; + +                let color = if font_color.len() == 2 { +                    ("g", font_color[0].parse::<i32>().unwrap_or(0), 0, 0, 0) +                } else if font_color.len() == 4 { +                    ( +                        "rg", +                        font_color[0].parse::<i32>().unwrap_or(0), +                        font_color[1].parse::<i32>().unwrap_or(0), +                        font_color[2].parse::<i32>().unwrap_or(0), +                        0, +                    ) +                } else if font_color.len() == 5 { +                    ( +                        "k", +                        font_color[0].parse::<i32>().unwrap_or(0), +                        font_color[1].parse::<i32>().unwrap_or(0), +                        font_color[2].parse::<i32>().unwrap_or(0), +                        font_color[3].parse::<i32>().unwrap_or(0), +                    ) +                } else { +                    default_color +                }; + +                (font, color) +            } +        } +        _ => (default_font, default_color), +    } +} | 
