diff --git a/Cargo.lock b/Cargo.lock index 3866692..eb947b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,18 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -26,6 +38,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + [[package]] name = "anstream" version = "0.6.15" @@ -239,6 +257,16 @@ dependencies = [ "encoding_rs", ] +[[package]] +name = "chumsky" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" +dependencies = [ + "hashbrown", + "stacker", +] + [[package]] name = "clang-sys" version = "1.8.1" @@ -671,6 +699,16 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + [[package]] name = "heck" version = "0.5.0" @@ -834,7 +872,7 @@ dependencies = [ [[package]] name = "mail-sink" -version = "0.0.3" +version = "0.0.4" dependencies = [ "bincode", "bytes", @@ -847,6 +885,7 @@ dependencies = [ "num_cpus", "percent-encoding", "psutil", + "rfc2047-decoder", "rustls 0.23.13", "rustls-pemfile", "serde", @@ -867,7 +906,7 @@ checksum = "8cae768a50835557749599277fc59f7c728118724eb34185e8feb633ef266a32" dependencies = [ "charset", "data-encoding", - "quoted_printable", + "quoted_printable 0.4.8", ] [[package]] @@ -1083,6 +1122,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "psm" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa37f80ca58604976033fae9515a8a2989fc13797d953f7c04fb8fa36a11f205" +dependencies = [ + "cc", +] + [[package]] name = "psutil" version = "3.3.0" @@ -1117,6 +1165,12 @@ version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a3866219251662ec3b26fc217e3e05bf9c4f84325234dfb96bf0bf840889e49" +[[package]] +name = "quoted_printable" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "640c9bd8497b02465aeef5375144c26062e0dcd5939dfcbb0f5db76cb8c17c73" + [[package]] name = "rayon" version = "1.10.0" @@ -1184,6 +1238,20 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" +[[package]] +name = "rfc2047-decoder" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e90a668c463c412c3118ae1883e18b53d812c349f5af7a06de3ba4bb0c17cc73" +dependencies = [ + "base64 0.21.7", + "charset", + "chumsky", + "memchr", + "quoted_printable 0.5.1", + "thiserror", +] + [[package]] name = "ring" version = "0.16.20" @@ -1436,6 +1504,19 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "stacker" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.52.0", +] + [[package]] name = "strict" version = "0.2.0" @@ -1668,6 +1749,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -1985,6 +2072,26 @@ dependencies = [ "thiserror", ] +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.77", +] + [[package]] name = "zeroize" version = "1.8.1" diff --git a/Cargo.toml b/Cargo.toml index 5587171..088a94b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,7 @@ num_cpus = "1.16.0" percent-encoding = "2.3.1" mailparse = "0.13" lazy_static = "1.5.0" +rfc2047-decoder = "1.0.5" [profile.release] opt-level = "z" diff --git a/src/smtp/mail.rs b/src/smtp/mail.rs index c8df9a4..46f002c 100644 --- a/src/smtp/mail.rs +++ b/src/smtp/mail.rs @@ -1,6 +1,7 @@ use mailparse::parse_mail; use serde::{Deserialize, Serialize}; use std::collections::HashSet; +use rfc2047_decoder::decode; #[derive(Default, Serialize, Deserialize)] pub struct Mail { @@ -69,7 +70,8 @@ impl Mail { pub fn get_subject(data: &str) -> Option { for line in data.lines() { if line.to_lowercase().starts_with("subject:") { - return Some(line[8..].trim().to_string()); + let subject = line[8..].trim().to_string(); + return Some(decode(&subject).unwrap_or(subject)); } } diff --git a/src/tests/parsing_tester.rs b/src/tests/parsing_tester.rs index 42dae8f..2479f9b 100644 --- a/src/tests/parsing_tester.rs +++ b/src/tests/parsing_tester.rs @@ -6,6 +6,7 @@ mod parsing_tester { fn test_parse_body_multipart() { let body = std::fs::read_to_string("test/samples/discord_mail.body").unwrap(); let subject = get_subject(&body); + println!("subject: {:?}", subject); let mail = Mail { from: Default::default(), to: Default::default(), @@ -20,7 +21,8 @@ mod parsing_tester { let (from, _) = get_data_from_to(&mail.data); assert!(from.contains("noreply@discord.com")); - assert_eq!(mail.subject.unwrap(), "Verify Email Address for Discord"); + //should've decoded the subject with rfc2047 decoder + assert_eq!(mail.subject.unwrap(), "VĂ©rifie ton adresse e-mail Discord"); } #[test] diff --git a/test/samples/discord_mail.body b/test/samples/discord_mail.body index 2e06ed6..942dd73 100644 --- a/test/samples/discord_mail.body +++ b/test/samples/discord_mail.body @@ -18,7 +18,7 @@ Date: Fri, 27 Sep 2024 21:02:51 +0000 (UTC) From: Discord Mime-Version: 1.0 Message-ID: -Subject: Verify Email Address for Discord +Subject: =?UTF-8?B?VsOpcmlmaWU=?= ton adresse e-mail Discord X-SG-EID: =?us-ascii?Q?u001=2Encph=2Fx3Jw0NdHDwq6twCdiOEjRqduaW+3S=2FWXBRXzv62NXoGt1LlW8ZOp?= =?us-ascii?Q?GajFHOcRQzla331F6ukAeYmLWbVWcEmwbstknFb?=