aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTeddy Wing2019-11-02 03:42:46 +0100
committerTeddy Wing2019-11-02 03:42:46 +0100
commit67da99861f5ff0a65016cbc7904d37fb3aa4c013 (patch)
tree5f7897d2ab48bd70155b47888af3a44d25642a2f
parent950c7d1a93ae44da2584345e4a2624d64ef84816 (diff)
downloadpdf-urls-67da99861f5ff0a65016cbc7904d37fb3aa4c013.tar.bz2
get_urls_from_pdf: Test extracted URLs
Add a test with a simple text-only PDF with three URLs. Currently I'm getting the following failure, so visibly the order is not necessarily the same as the visible order, and multi-line hyperlinks can be encoded as two link areas: ---- tests::get_urls_from_pdf_extracts_urls_from_pdf stdout ---- thread 'tests::get_urls_from_pdf_extracts_urls_from_pdf' panicked at 'assertion failed: `(left == right)` left: `["http://www.gutenberg.org/ebooks/11", "https://ia800908.us.archive.org/6/items/alicesadventures19033gut/19033-h/images/i002.jpg", "https://science.nasa.gov/news-article/black-hole-image-makes-history"]`, right: `["http://www.gutenberg.org/ebooks/11", "https://science.nasa.gov/news-article/black-hole-image-makes-history", "https://ia800908.us.archive.org/6/items/alicesadventures19033gut/19033-h/images/i002.jpg", "https://ia800908.us.archive.org/6/items/alicesadventures19033gut/19033-h/images/i002.jpg"]`', src/lib.rs:65:9
-rw-r--r--src/lib.rs19
-rw-r--r--testdata/Alice's Adventures in Wonderland.odtbin0 -> 18472 bytes
-rw-r--r--testdata/Alice's Adventures in Wonderland.pdfbin0 -> 23262 bytes
3 files changed, 19 insertions, 0 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 2a59906..c7f6e94 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -46,3 +46,22 @@ pub fn get_urls_from_pdf<P: AsRef<Path>>(path: P) -> Result<Vec<String>> {
Ok(urls)
}
+
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn get_urls_from_pdf_extracts_urls_from_pdf() {
+ let expected = vec![
+ "http://www.gutenberg.org/ebooks/11",
+ "https://ia800908.us.archive.org/6/items/alicesadventures19033gut/19033-h/images/i002.jpg",
+ "https://science.nasa.gov/news-article/black-hole-image-makes-history",
+ ];
+
+ let urls = get_urls_from_pdf("testdata/Alice's Adventures in Wonderland.pdf");
+
+ assert_eq!(expected, urls.unwrap());
+ }
+}
diff --git a/testdata/Alice's Adventures in Wonderland.odt b/testdata/Alice's Adventures in Wonderland.odt
new file mode 100644
index 0000000..09d8469
--- /dev/null
+++ b/testdata/Alice's Adventures in Wonderland.odt
Binary files differ
diff --git a/testdata/Alice's Adventures in Wonderland.pdf b/testdata/Alice's Adventures in Wonderland.pdf
new file mode 100644
index 0000000..47c673c
--- /dev/null
+++ b/testdata/Alice's Adventures in Wonderland.pdf
Binary files differ