diff options
author | Teddy Wing | 2019-11-01 15:09:31 +0100 |
---|---|---|
committer | Teddy Wing | 2019-11-01 15:09:31 +0100 |
commit | f19139b6c4fe212e36fcfae2575228b6575bda77 (patch) | |
tree | 5105a5c5e8014e2985cfc9e96c37c5c5a453e46e /src | |
parent | 612842e55541d112ae4dbf56d49a6026319d1165 (diff) | |
download | pdf-urls-f19139b6c4fe212e36fcfae2575228b6575bda77.tar.bz2 |
Find the PDF object that URLs are stored in
Thanks to plinth (https://stackoverflow.com/users/20481/plinth) on Stack
Overflow, learned that URLs are stored in /A entries in a PDF:
> To get the link to go somewhere you'll need either a /Dest or an /A
> entry in the link annot (but not both). /Dest is an older artifact for
> page-level navigation - you won't use this. Instead, use the /A entry
> which is an action dictionary. So if you wanted to navigate to the url
> http://www.google.com, you would make your annotation look like this:
>
> << /Type /Annot /Subtype /Link /Rect [ x1 y1 x2 y2 ]
> /A << /Type /Action /S /URI /URI (http://www.google.com) >>
> >>
https://stackoverflow.com/questions/19492229/add-a-hyperlink-into-a-pdf-document/19496996#19496996
To extract URLs, find the /A objects and get the text value of their
`URI` fields.
Diffstat (limited to 'src')
-rw-r--r-- | src/main.rs | 24 |
1 files changed, 23 insertions, 1 deletions
diff --git a/src/main.rs b/src/main.rs index 95a6d1e..ac9a9c6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,7 +18,29 @@ fn main() { // dbg!(d); for (k, v) in d.iter() { - dbg!(::std::str::from_utf8(&k).unwrap(), v); + let key = ::std::str::from_utf8(&k).unwrap(); + + if key == "A" { + dbg!(v); + + for (k, v) in v.as_dict().unwrap() { + let key = ::std::str::from_utf8(&k).unwrap(); + + // dbg!(key, v); + if key == "URI" { + dbg!(v); + + match v { + Object::String(s, _) => { + dbg!(::std::str::from_utf8(s).unwrap()); + + () + }, + _ => (), + } + } + } + } } () |