Find the PDF object that URLs are stored in

Thanks to plinth (https://stackoverflow.com/users/20481/plinth) on Stack Overflow, learned that URLs are stored in /A entries in a PDF: > To get the link to go somewhere you'll need either a /Dest or an /A > entry in the link annot (but not both). /Dest is an older artifact for > page-level navigation - you won't use this. Instead, use the /A entry > which is an action dictionary. So if you wanted to navigate to the url > http://www.google.com, you would make your annotation look like this: > > << /Type /Annot /Subtype /Link /Rect [ x1 y1 x2 y2 ] > /A << /Type /Action /S /URI /URI (http://www.google.com) >> > >> https://stackoverflow.com/questions/19492229/add-a-hyperlink-into-a-pdf-document/19496996#19496996 To extract URLs, find the /A objects and get the text value of their `URI` fields.
author: Teddy Wing 2019-11-01 15:09:31 +0100
committer: Teddy Wing 2019-11-01 15:09:31 +0100
commit: f19139b6c4fe212e36fcfae2575228b6575bda77 (patch)
tree: 5105a5c5e8014e2985cfc9e96c37c5c5a453e46e /src
parent: 612842e55541d112ae4dbf56d49a6026319d1165 (diff)
download: pdf-urls-f19139b6c4fe212e36fcfae2575228b6575bda77.tar.bz2
1 files changed, 23 insertions, 1 deletions
diff --git a/src/main.rs b/src/main.rs
index 95a6d1e..ac9a9c6 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -18,7 +18,29 @@ fn main() {
                 // dbg!(d);
 
                 for (k, v) in d.iter() {
-                    dbg!(::std::str::from_utf8(&k).unwrap(), v);
+                    let key = ::std::str::from_utf8(&k).unwrap();
+
+                    if key == "A" {
+                        dbg!(v);
+
+                        for (k, v) in v.as_dict().unwrap() {
+                            let key = ::std::str::from_utf8(&k).unwrap();
+
+                            // dbg!(key, v);
+                            if key == "URI" {
+                                dbg!(v);
+
+                                match v {
+                                    Object::String(s, _) => {
+                                        dbg!(::std::str::from_utf8(s).unwrap());
+
+                                        ()
+                                    },
+                                    _ => (),
+                                }
+                            }
+                        }
+                    }
                 }
 
                 ()
author	Teddy Wing	2019-11-01 15:09:31 +0100
committer	Teddy Wing	2019-11-01 15:09:31 +0100
commit	f19139b6c4fe212e36fcfae2575228b6575bda77 (patch)
tree	5105a5c5e8014e2985cfc9e96c37c5c5a453e46e /src
parent	612842e55541d112ae4dbf56d49a6026319d1165 (diff)
download	pdf-urls-f19139b6c4fe212e36fcfae2575228b6575bda77.tar.bz2