From c60b9819991cd7a78bbc1f260484bf426d31827c Mon Sep 17 00:00:00 2001 From: Teddy Wing Date: Sun, 14 Mar 2021 18:10:25 +0100 Subject: Extract email body from HTML part if no text part exists We're already doing this for single-part emails. This change makes it work additionally for multipart emails. --- src/main.rs | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/main.rs b/src/main.rs index 15c9c78..5c28dd1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -148,21 +148,20 @@ fn get_email_body(email: &[u8]) -> Result { // TODO: If no plain text part, use html one // TODO: New predicate function for text/plain // TODO: Maybe split into functions - for part in email.subparts { - // if part.headers.get_app_values() is one of: - // "multipart/alternative" - // "text/plain" - println!("part ctype: {:?}", part.ctype); + extract_multipart_email_body(&email) +} +/// Get the body from a "multipart/alternative" or "multipart/relative" email. +/// +/// Preferentially extract the body from the "text/plain" part. If none is +/// present, try extracting it from the "text/html" part. +fn extract_multipart_email_body( + email: &mailparse::ParsedMail, +) -> Result { + for part in &email.subparts { if part.ctype.mimetype == "multipart/alternative" { - for alternative_part in &part.subparts { - println!("apart ctype: {:?}", alternative_part.ctype); - - if alternative_part.ctype.mimetype == "text/plain" { - return Ok(alternative_part.get_body()?); - } - } + return extract_multipart_email_body(&part); } if part.ctype.mimetype == "text/plain" { @@ -170,6 +169,16 @@ fn get_email_body(email: &[u8]) -> Result { } } + for part in &email.subparts { + if email.ctype.mimetype == "text/html" { + let html_body = part.get_body()?; + let re = Regex::new("<[^>]*>").unwrap(); + + return Ok(re.replace_all(&html_body, "").into_owned()); + } + + } + Err(WrapError::ParseMailUnknown) } -- cgit v1.2.3