From f19139b6c4fe212e36fcfae2575228b6575bda77 Mon Sep 17 00:00:00 2001 From: Teddy Wing Date: Fri, 1 Nov 2019 15:09:31 +0100 Subject: Find the PDF object that URLs are stored in Thanks to plinth (https://stackoverflow.com/users/20481/plinth) on Stack Overflow, learned that URLs are stored in /A entries in a PDF: > To get the link to go somewhere you'll need either a /Dest or an /A > entry in the link annot (but not both). /Dest is an older artifact for > page-level navigation - you won't use this. Instead, use the /A entry > which is an action dictionary. So if you wanted to navigate to the url > http://www.google.com, you would make your annotation look like this: > > << /Type /Annot /Subtype /Link /Rect [ x1 y1 x2 y2 ] > /A << /Type /Action /S /URI /URI (http://www.google.com) >> > >> https://stackoverflow.com/questions/19492229/add-a-hyperlink-into-a-pdf-document/19496996#19496996 To extract URLs, find the /A objects and get the text value of their `URI` fields. --- src/main.rs | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/main.rs b/src/main.rs index 95a6d1e..ac9a9c6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,7 +18,29 @@ fn main() { // dbg!(d); for (k, v) in d.iter() { - dbg!(::std::str::from_utf8(&k).unwrap(), v); + let key = ::std::str::from_utf8(&k).unwrap(); + + if key == "A" { + dbg!(v); + + for (k, v) in v.as_dict().unwrap() { + let key = ::std::str::from_utf8(&k).unwrap(); + + // dbg!(key, v); + if key == "URI" { + dbg!(v); + + match v { + Object::String(s, _) => { + dbg!(::std::str::from_utf8(s).unwrap()); + + () + }, + _ => (), + } + } + } + } } () -- cgit v1.2.3