aboutsummaryrefslogtreecommitdiffstats
path: root/Cargo.lock
diff options
context:
space:
mode:
authorTeddy Wing2021-03-14 17:24:18 +0100
committerTeddy Wing2021-03-14 17:24:18 +0100
commit62c083b5e3a164d596b49132c8c53248aa2daf42 (patch)
tree7d6ef1866c54e09a9a5b9716f779934cb6bd29b4 /Cargo.lock
parent7d46438c015e400ca6c035f5d99da040e6765740 (diff)
downloadmutt-ottolangy-62c083b5e3a164d596b49132c8c53248aa2daf42.tar.bz2
Strip HTML tags from single-part HTML emails
When an HTML body is fed to 'whatlang', it recognises it as English. This is likely due to the English HTML syntax. Remove all HTML tags with a simple regex substitution to get the language recognition working more properly. This doesn't remove CSS, which could also confuse the language recogniser. In a limited test, it seemed to work without having to remove any CSS, so not bothering with that. Still need to get this working for multipart emails.
Diffstat (limited to 'Cargo.lock')
-rw-r--r--Cargo.lock33
1 files changed, 33 insertions, 0 deletions
diff --git a/Cargo.lock b/Cargo.lock
index e17605c..1aab5af 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -7,6 +7,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217"
[[package]]
+name = "aho-corasick"
+version = "0.7.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
name = "autocfg"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -86,11 +95,18 @@ dependencies = [
]
[[package]]
+name = "memchr"
+version = "2.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
+
+[[package]]
name = "ottolangy"
version = "0.0.1"
dependencies = [
"exitcode",
"mailparse",
+ "regex",
"thiserror",
"whatlang",
"xdg",
@@ -121,6 +137,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47b080c5db639b292ac79cbd34be0cfc5d36694768d8341109634d90b86930e2"
[[package]]
+name = "regex"
+version = "1.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "54fd1046a3107eb58f42de31d656fee6853e5d276c455fd943742dce89fc3dd3"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.6.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548"
+
+[[package]]
name = "syn"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"