aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorTeddy Wing2019-11-01 15:09:31 +0100
committerTeddy Wing2019-11-01 15:09:31 +0100
commitf19139b6c4fe212e36fcfae2575228b6575bda77 (patch)
tree5105a5c5e8014e2985cfc9e96c37c5c5a453e46e /src
parent612842e55541d112ae4dbf56d49a6026319d1165 (diff)
downloadpdf-urls-f19139b6c4fe212e36fcfae2575228b6575bda77.tar.bz2
Find the PDF object that URLs are stored in
Thanks to plinth (https://stackoverflow.com/users/20481/plinth) on Stack Overflow, learned that URLs are stored in /A entries in a PDF: > To get the link to go somewhere you'll need either a /Dest or an /A > entry in the link annot (but not both). /Dest is an older artifact for > page-level navigation - you won't use this. Instead, use the /A entry > which is an action dictionary. So if you wanted to navigate to the url > http://www.google.com, you would make your annotation look like this: > > << /Type /Annot /Subtype /Link /Rect [ x1 y1 x2 y2 ] > /A << /Type /Action /S /URI /URI (http://www.google.com) >> > >> https://stackoverflow.com/questions/19492229/add-a-hyperlink-into-a-pdf-document/19496996#19496996 To extract URLs, find the /A objects and get the text value of their `URI` fields.
Diffstat (limited to 'src')
-rw-r--r--src/main.rs24
1 files changed, 23 insertions, 1 deletions
diff --git a/src/main.rs b/src/main.rs
index 95a6d1e..ac9a9c6 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -18,7 +18,29 @@ fn main() {
// dbg!(d);
for (k, v) in d.iter() {
- dbg!(::std::str::from_utf8(&k).unwrap(), v);
+ let key = ::std::str::from_utf8(&k).unwrap();
+
+ if key == "A" {
+ dbg!(v);
+
+ for (k, v) in v.as_dict().unwrap() {
+ let key = ::std::str::from_utf8(&k).unwrap();
+
+ // dbg!(key, v);
+ if key == "URI" {
+ dbg!(v);
+
+ match v {
+ Object::String(s, _) => {
+ dbg!(::std::str::from_utf8(s).unwrap());
+
+ ()
+ },
+ _ => (),
+ }
+ }
+ }
+ }
}
()