From 67da99861f5ff0a65016cbc7904d37fb3aa4c013 Mon Sep 17 00:00:00 2001
From: Teddy Wing
Date: Sat, 2 Nov 2019 03:42:46 +0100
Subject: get_urls_from_pdf: Test extracted URLs

Add a test with a simple text-only PDF with three URLs.

Currently I'm getting the following failure, so visibly the order is not
necessarily the same as the visible order, and multi-line hyperlinks can
be encoded as two link areas:

    ---- tests::get_urls_from_pdf_extracts_urls_from_pdf stdout ----
    thread 'tests::get_urls_from_pdf_extracts_urls_from_pdf' panicked at 'assertion failed: `(left == right)`
      left: `["http://www.gutenberg.org/ebooks/11", "https://ia800908.us.archive.org/6/items/alicesadventures19033gut/19033-h/images/i002.jpg", "https://science.nasa.gov/news-article/black-hole-image-makes-history"]`,
     right: `["http://www.gutenberg.org/ebooks/11", "https://science.nasa.gov/news-article/black-hole-image-makes-history", "https://ia800908.us.archive.org/6/items/alicesadventures19033gut/19033-h/images/i002.jpg", "https://ia800908.us.archive.org/6/items/alicesadventures19033gut/19033-h/images/i002.jpg"]`', src/lib.rs:65:9
---
 src/lib.rs | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'src')
diff --git a/src/lib.rs b/src/lib.rs
index 2a59906..c7f6e94 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -46,3 +46,22 @@ pub fn get_urls_from_pdf<P: AsRef<Path>>(path: P) -> Result<Vec<String>> {
 
     Ok(urls)
 }
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn get_urls_from_pdf_extracts_urls_from_pdf() {
+        let expected = vec![
+            "http://www.gutenberg.org/ebooks/11",
+            "https://ia800908.us.archive.org/6/items/alicesadventures19033gut/19033-h/images/i002.jpg",
+            "https://science.nasa.gov/news-article/black-hole-image-makes-history",
+        ];
+
+        let urls = get_urls_from_pdf("testdata/Alice's Adventures in Wonderland.pdf");
+
+        assert_eq!(expected, urls.unwrap());
+    }
+}
-- 
cgit v1.2.3