diff options
| author | Teddy Wing | 2019-11-01 15:09:31 +0100 | 
|---|---|---|
| committer | Teddy Wing | 2019-11-01 15:09:31 +0100 | 
| commit | f19139b6c4fe212e36fcfae2575228b6575bda77 (patch) | |
| tree | 5105a5c5e8014e2985cfc9e96c37c5c5a453e46e /src | |
| parent | 612842e55541d112ae4dbf56d49a6026319d1165 (diff) | |
| download | pdf-urls-f19139b6c4fe212e36fcfae2575228b6575bda77.tar.bz2 | |
Find the PDF object that URLs are stored in
Thanks to plinth (https://stackoverflow.com/users/20481/plinth) on Stack
Overflow, learned that URLs are stored in /A entries in a PDF:
> To get the link to go somewhere you'll need either a /Dest or an /A
> entry in the link annot (but not both). /Dest is an older artifact for
> page-level navigation - you won't use this. Instead, use the /A entry
> which is an action dictionary. So if you wanted to navigate to the url
> http://www.google.com, you would make your annotation look like this:
>
> << /Type /Annot /Subtype /Link /Rect [ x1 y1 x2 y2 ]
>    /A << /Type /Action /S /URI /URI (http://www.google.com) >>
> >>
https://stackoverflow.com/questions/19492229/add-a-hyperlink-into-a-pdf-document/19496996#19496996
To extract URLs, find the /A objects and get the text value of their
`URI` fields.
Diffstat (limited to 'src')
| -rw-r--r-- | src/main.rs | 24 | 
1 files changed, 23 insertions, 1 deletions
diff --git a/src/main.rs b/src/main.rs index 95a6d1e..ac9a9c6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,7 +18,29 @@ fn main() {                  // dbg!(d);                  for (k, v) in d.iter() { -                    dbg!(::std::str::from_utf8(&k).unwrap(), v); +                    let key = ::std::str::from_utf8(&k).unwrap(); + +                    if key == "A" { +                        dbg!(v); + +                        for (k, v) in v.as_dict().unwrap() { +                            let key = ::std::str::from_utf8(&k).unwrap(); + +                            // dbg!(key, v); +                            if key == "URI" { +                                dbg!(v); + +                                match v { +                                    Object::String(s, _) => { +                                        dbg!(::std::str::from_utf8(s).unwrap()); + +                                        () +                                    }, +                                    _ => (), +                                } +                            } +                        } +                    }                  }                  ()  | 
