From 286021afae440252572c8e990c3137162ab3f64e Mon Sep 17 00:00:00 2001 From: Mubelotix Date: Fri, 13 Jan 2023 17:11:15 +0100 Subject: [PATCH 01/26] Start over --- .github/workflows/rust.yml | 69 -- .gitignore | 9 +- Cargo.toml | 12 +- README.md | 53 -- email-parser/Cargo.toml | 45 - email-parser/benches/alternatives.html | 104 --- email-parser/benches/alternatives.rs | 29 - email-parser/benches/encodings.rs | 17 - email-parser/src/address.rs | 22 - email-parser/src/email.rs | 457 ---------- email-parser/src/error.rs | 24 - email-parser/src/lib.rs | 67 -- email-parser/src/mime.rs | 196 ---- email-parser/src/parsing/address.rs | 295 ------ email-parser/src/parsing/character_sets.rs | 145 --- email-parser/src/parsing/combinators.rs | 297 ------ email-parser/src/parsing/common.rs | 246 ----- email-parser/src/parsing/fields.rs | 849 ------------------ email-parser/src/parsing/message.rs | 165 ---- email-parser/src/parsing/mime/base64.rs | 250 ------ .../src/parsing/mime/encoded_headers.rs | 169 ---- email-parser/src/parsing/mime/entity.rs | 325 ------- email-parser/src/parsing/mime/mime_fields.rs | 618 ------------- email-parser/src/parsing/mime/mod.rs | 8 - email-parser/src/parsing/mime/multipart.rs | 164 ---- .../src/parsing/mime/percent_encoding.rs | 202 ----- .../src/parsing/mime/quoted_printables.rs | 187 ---- email-parser/src/parsing/mod.rs | 13 - email-parser/src/parsing/quoted_string.rs | 131 --- email-parser/src/parsing/time.rs | 383 -------- email-parser/src/parsing/whitespaces.rs | 151 ---- email-parser/src/prelude.rs | 21 - email-parser/src/string.rs | 70 -- email-parser/src/time.rs | 60 -- email-parser/tests/local_emails.rs | 86 -- src/lib.rs | 14 + 36 files changed, 23 insertions(+), 5930 deletions(-) delete mode 100644 .github/workflows/rust.yml delete mode 100644 README.md delete mode 100644 email-parser/Cargo.toml delete mode 100644 email-parser/benches/alternatives.html delete mode 100644 email-parser/benches/alternatives.rs delete mode 100644 email-parser/benches/encodings.rs delete mode 100644 email-parser/src/address.rs delete mode 100644 email-parser/src/email.rs delete mode 100644 email-parser/src/error.rs delete mode 100644 email-parser/src/lib.rs delete mode 100644 email-parser/src/mime.rs delete mode 100644 email-parser/src/parsing/address.rs delete mode 100644 email-parser/src/parsing/character_sets.rs delete mode 100644 email-parser/src/parsing/combinators.rs delete mode 100644 email-parser/src/parsing/common.rs delete mode 100644 email-parser/src/parsing/fields.rs delete mode 100644 email-parser/src/parsing/message.rs delete mode 100644 email-parser/src/parsing/mime/base64.rs delete mode 100644 email-parser/src/parsing/mime/encoded_headers.rs delete mode 100644 email-parser/src/parsing/mime/entity.rs delete mode 100644 email-parser/src/parsing/mime/mime_fields.rs delete mode 100644 email-parser/src/parsing/mime/mod.rs delete mode 100644 email-parser/src/parsing/mime/multipart.rs delete mode 100644 email-parser/src/parsing/mime/percent_encoding.rs delete mode 100644 email-parser/src/parsing/mime/quoted_printables.rs delete mode 100644 email-parser/src/parsing/mod.rs delete mode 100644 email-parser/src/parsing/quoted_string.rs delete mode 100644 email-parser/src/parsing/time.rs delete mode 100644 email-parser/src/parsing/whitespaces.rs delete mode 100644 email-parser/src/prelude.rs delete mode 100644 email-parser/src/string.rs delete mode 100644 email-parser/src/time.rs delete mode 100644 email-parser/tests/local_emails.rs create mode 100644 src/lib.rs diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml deleted file mode 100644 index 5fece4d..0000000 --- a/.github/workflows/rust.yml +++ /dev/null @@ -1,69 +0,0 @@ -on: [push, pull_request] - -name: Continuous integration - -jobs: - check: - name: Check - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - - uses: actions-rs/cargo@v1 - with: - command: check - use-cache: true - args: --all-features - - test: - name: Test Suite - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - - uses: actions-rs/cargo@v1 - with: - command: test - use-cache: true - args: --all-features - - fmt: - name: Rustfmt - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - - run: rustup component add rustfmt - - uses: actions-rs/cargo@v1 - with: - command: fmt - args: --all -- --check - - clippy: - name: Clippy - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - - run: rustup component add clippy - - uses: actions-rs/cargo@v1 - with: - command: clippy - args: -- -D warnings - use-cache: true diff --git a/.gitignore b/.gitignore index 2d73dcc..4fffb2f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,2 @@ /target -Cargo.lock -benches/alternatives.html -mail.txt -.vscode/settings.json -mail3.txt -mail2.txt -# This is a file for testing 491179 emails. Too big for git -tests/local_emails.rs +/Cargo.lock diff --git a/Cargo.toml b/Cargo.toml index 0accd10..89bb1cc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,4 +1,8 @@ -[workspace] -members = [ - "email-parser", -] +[package] +name = "email-parser2" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/README.md b/README.md deleted file mode 100644 index a0ba394..0000000 --- a/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# email-parser - -The fastest and lightest email parsing Rust library!\ -This library has no dependency by default (and only a small optional one). - -## Goal - -The goal of this library is to be fully compliant with RFC 5322. However, this library does not intend to support the obsolete syntax because it has been obsolete for 12 years, and it would slow down everything.\ -This library supports MIME and will support PGP in the future. - -## Example - -```rust -let email = Email::parse( - b"\ - From: Mubelotix \r\n\ - Subject:Example Email\r\n\ - To: Someone \r\n\ - Message-id: <6546518945@mubelotix.dev>\r\n\ - Date: 5 May 2003 18:58:34 +0000\r\n\ - \r\n\ - Hey!\r\n", -) -.unwrap(); - -assert_eq!(email.subject.unwrap(), "Example Email"); -assert_eq!(email.sender.name.unwrap(), vec!["Mubelotix"]); -assert_eq!(email.sender.address.local_part, "mubelotix"); -assert_eq!(email.sender.address.domain, "mubelotix.dev"); -``` - -## Pay for what you use - -Mails can be elaborated. No matter what you are building, you are certainly not using all of its features.\ -So why would you pay the parsing cost of header fields you are not using? This library allows you to enable headers you need so that other header values will be parsed as an unstructured header, which is much faster.\ -By disabling all header value parsing, this library can parse an entire mail twice faster! But don't worry if you need everything enabled; this library is blazing fast anyway! - -## Zero-Copy (almost) - -This library tries to avoid usage of owned `String`s as much as possible and is using `Cow` instead.\ -Thanks to this method, around 90% of the strings are references. - -## Benchmarks - -This chart shows the time took to parse a single email. - -![Benchmark](https://cdn.discordapp.com/attachments/694923348844609597/789162705494868020/unknown.png) - -Run these benchmarks by yourself with `rustup run nightly cargo bench` and `rustup run nightly cargo bench --no-default-features`.\ -Tests require a `mail.txt` file containing a raw mail next to the `Cargo.toml`.\ -Some libraries suffer from huge performance variations depending on the content of the mail, so this library is not **always** the fastest. - -License: MIT diff --git a/email-parser/Cargo.toml b/email-parser/Cargo.toml deleted file mode 100644 index 5898928..0000000 --- a/email-parser/Cargo.toml +++ /dev/null @@ -1,45 +0,0 @@ -[package] -name = "email-parser" -version = "0.5.0" -authors = ["Mubelotix "] -edition = "2018" -license = "MIT" -description = "The fastest and lightest email parsing Rust library. Supports MIME." -readme = "README.md" -repository = "https://github.com/Mubelotix/email-parser" -categories = ["email", "parser-implementations"] -keywords = ["email", "mail", "mime", "parser"] - -[dependencies] -textcode = {version="0.2", optional=true} - -[features] -default = ["headers"] -headers = ["to", "cc", "bcc", "date", "from", "sender", "reply-to", "message-id", "in-reply-to", "references", "subject", "comments", "keywords", "trace"] -to = [] -cc = [] -bcc = [] -date = [] -from = [] -sender = [] -reply-to = [] -message-id = [] -in-reply-to = [] -references = [] -subject = [] -comments = [] -keywords = [] -trace = [] -benchmarking = ["mime"] -compatibility-fixes = [] -content-disposition = ["mime"] -unrecognized-headers = ["mime"] -mime = ["textcode"] - -[dev-dependencies] -email = "0.0.21" -email-format = "0.8" -mailparse = "0.13" - -[profile.release] -codegen-units = 1 diff --git a/email-parser/benches/alternatives.html b/email-parser/benches/alternatives.html deleted file mode 100644 index 48b8026..0000000 --- a/email-parser/benches/alternatives.html +++ /dev/null @@ -1,104 +0,0 @@ - - - - Performance comparison - - - - -
-
-
email_parser
(all features off)
-
-
-
7,568 ns
- -
-
-
email_parser
(default features)
-
-
-
12,140 ns
-
-
-
email_parser
(all features on)
-
-
-
13,583 ns
-
-
-
mailparse
-
-
16,034 ns
-
-
email_format
-
-
27,914 ns
-
-
email
-
-
37,564 ns
-
- - \ No newline at end of file diff --git a/email-parser/benches/alternatives.rs b/email-parser/benches/alternatives.rs deleted file mode 100644 index d10591c..0000000 --- a/email-parser/benches/alternatives.rs +++ /dev/null @@ -1,29 +0,0 @@ -#![feature(test)] - -extern crate test; -use test::Bencher; - -const MAIL: &[u8] = include_bytes!("../mail.txt"); -const MAIL2: &str = include_str!("../mail.txt"); - -#[bench] -fn email_parser(b: &mut Bencher) { - b.iter(|| email_parser::prelude::parse_message(MAIL)); -} - -#[bench] -fn email(b: &mut Bencher) { - b.iter(|| email::rfc5322::Rfc5322Parser::new(MAIL2).consume_message()); -} - -#[bench] -fn email_format(b: &mut Bencher) { - use email_format::rfc5322::Parsable; - use email_format::Email; - b.iter(|| Email::parse(MAIL)); -} - -#[bench] -fn mailparse(b: &mut Bencher) { - b.iter(|| mailparse::parse_mail(MAIL)); -} diff --git a/email-parser/benches/encodings.rs b/email-parser/benches/encodings.rs deleted file mode 100644 index 6c431ac..0000000 --- a/email-parser/benches/encodings.rs +++ /dev/null @@ -1,17 +0,0 @@ -#![feature(test)] - -extern crate test; -use test::Bencher; - -#[cfg(feature = "benchmarking")] -#[bench] -fn base64_decoding(b: &mut Bencher) { - b.iter(|| email_parser::prelude::decode_base64(b"TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwgc2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWduYSBhbGlxdWEuIFV0IGVuaW0gYWQgbWluaW0gdmVuaWFtLCBxdWlzIG5vc3RydWQgZXhlcmNpdGF0aW9uIHVsbGFtY28gbGFib3JpcyBuaXNpIHV0IGFsaXF1aXAgZXggZWEgY29tbW9kbyBjb25zZXF1YXQuIER1aXMgYXV0ZSBpcnVyZSBkb2xvciBpbiByZXByZWhlbmRlcml0IGluIHZvbHVwdGF0ZSB2ZWxpdCBlc3NlIGNpbGx1bSBkb2xvcmUgZXUgZnVnaWF0IG51bGxhIHBhcmlhdHVyLiBFeGNlcHRldXIgc2ludCBvY2NhZWNhdCBjdXBpZGF0YXQgbm9uIHByb2lkZW50LCBzdW50IGluIGN1bHBhIHF1aSBvZmZpY2lhIGRlc2VydW50IG1vbGxpdCBhbmltIGlkIGVzdCBsYWJvcnVtLg==".to_vec())); -} - -#[cfg(feature = "benchmarking")] -#[bench] -fn quoted_printables_decoding(b: &mut Bencher) { - b.iter(|| email_parser::prelude::decode_qp(br#"\r\n\r\n\r\n\r\n - \r\n
\r\nqzdzq
\r\n\r\n"#.to_vec())); -} diff --git a/email-parser/src/address.rs b/email-parser/src/address.rs deleted file mode 100644 index 4f078b5..0000000 --- a/email-parser/src/address.rs +++ /dev/null @@ -1,22 +0,0 @@ -use std::borrow::Cow; - -#[derive(Debug, Clone)] -pub struct Mailbox<'a> { - /// The name associated with an email.\ - /// Each name is stored individually in the `Vec`. For example "Elton John" results in `Some(["Elton", "John"])`.\ - /// Be aware that might also get `Some(["Elton John"])` when the `mime` feature is enabled because whitespaces may appear after decoding encoded data. - pub name: Option>>, - pub address: EmailAddress<'a>, -} - -#[derive(Debug, Clone)] -pub struct EmailAddress<'a> { - pub local_part: Cow<'a, str>, - pub domain: Cow<'a, str>, -} - -#[derive(Debug, Clone)] -pub enum Address<'a> { - Mailbox(Mailbox<'a>), - Group((Vec>, Vec>)), -} diff --git a/email-parser/src/email.rs b/email-parser/src/email.rs deleted file mode 100644 index 164e667..0000000 --- a/email-parser/src/email.rs +++ /dev/null @@ -1,457 +0,0 @@ -use crate::address::*; -use crate::prelude::*; -use std::borrow::Cow; - -/// A struct representing a valid RFC 5322 message. -/// -/// # Example -/// -/// ``` -/// # use email_parser::prelude::*; -/// let email = Email::parse( -/// b"\ -/// From: Mubelotix \r\n\ -/// Subject:Example Email\r\n\ -/// To: Someone \r\n\ -/// Message-id: <6546518945@mubelotix.dev>\r\n\ -/// Date: 5 May 2003 18:58:34 +0000\r\n\ -/// \r\n\ -/// Hey!\r\n", -/// ) -/// .unwrap(); -/// -/// assert_eq!(email.subject.unwrap(), "Example Email"); -/// assert_eq!(email.sender.name.unwrap(), vec!["Mubelotix"]); -/// assert_eq!(email.sender.address.local_part, "mubelotix"); -/// assert_eq!(email.sender.address.domain, "mubelotix.dev"); -/// ``` -#[derive(Debug)] -pub struct Email<'a> { - /// The ASCII text of the body. - #[cfg(not(feature = "mime"))] - pub body: Option>, - - #[cfg(feature = "from")] - /// The list of authors of the message.\ - /// It's **not** the identity of the sender. See the [sender field](#structfield.sender). - pub from: Vec>, - - #[cfg(feature = "sender")] - /// The mailbox of the agent responsible for the actual transmission of the message.\ - /// Do not mix up with the [from field](#structfield.from) that contains the list of authors.\ - /// When there is only one author, this field can be omitted, and its value is inferred. Otherwise, an explicit value is required. - pub sender: Mailbox<'a>, - - #[cfg(feature = "subject")] - /// A short optional string identifying the topic of the message. - pub subject: Option>, - - #[cfg(feature = "date")] - /// The date and time at which the [sender](#structfield.sender) of the message indicated that the message was complete and ready to enter the mail delivery system. - /// For instance, this might be the time that a user pushes the "send" or "submit" button in an application program. - pub date: DateTime, - - #[cfg(feature = "to")] - pub to: Option>>, - - #[cfg(feature = "cc")] - pub cc: Option>>, - - #[cfg(feature = "bcc")] - pub bcc: Option>>, - - #[cfg(feature = "message-id")] - pub message_id: Option<(Cow<'a, str>, Cow<'a, str>)>, - - #[cfg(feature = "in-reply-to")] - pub in_reply_to: Option, Cow<'a, str>)>>, - - #[cfg(feature = "references")] - pub references: Option, Cow<'a, str>)>>, - - #[cfg(feature = "reply-to")] - pub reply_to: Option>>, - - #[cfg(feature = "comments")] - pub comments: Vec>, - - #[cfg(feature = "keywords")] - pub keywords: Vec>>, - - #[cfg(feature = "trace")] - pub trace: Vec<( - Option>>, - Vec<(Vec>, DateTime)>, - Vec>, - )>, - - #[cfg(feature = "mime")] - pub mime_entity: RawEntity<'a>, - - /// The list of unrecognized fields.\ - /// Each field is stored as a `(name, value)` tuple. - pub unknown_fields: Vec<(&'a str, Cow<'a, str>)>, -} - -impl<'a> Email<'a> { - /// Parse an email. - pub fn parse(data: &'a [u8]) -> Result, Error> { - let (fields, body) = crate::parse_message(data)?; - - #[cfg(feature = "from")] - let mut from = None; - #[cfg(feature = "sender")] - let mut sender = None; - #[cfg(feature = "subject")] - let mut subject = None; - #[cfg(feature = "date")] - let mut date = None; - #[cfg(feature = "to")] - let mut to = None; - #[cfg(feature = "cc")] - let mut cc = None; - #[cfg(feature = "bcc")] - let mut bcc = None; - #[cfg(feature = "message-id")] - let mut message_id = None; - #[cfg(feature = "in-reply-to")] - let mut in_reply_to = None; - #[cfg(feature = "references")] - let mut references = None; - #[cfg(feature = "reply-to")] - let mut reply_to = None; - #[cfg(feature = "comments")] - let mut comments = Vec::new(); - #[cfg(feature = "keywords")] - let mut keywords = Vec::new(); - #[cfg(feature = "trace")] - let mut trace = Vec::new(); - #[cfg(feature = "mime")] - let mut mime_version = None; - #[cfg(feature = "mime")] - let mut content_type = None; - #[cfg(feature = "mime")] - let mut content_transfer_encoding = None; - #[cfg(feature = "mime")] - let mut content_id = None; - #[cfg(feature = "mime")] - let mut content_description = None; - #[cfg(feature = "content-disposition")] - let mut content_disposition = None; - - let mut unknown_fields = Vec::new(); - - for field in fields { - match field { - #[cfg(feature = "from")] - Field::From(mailboxes) => { - if from.is_none() { - from = Some(mailboxes) - } else { - return Err(Error::DuplicateHeader("From")); - } - } - #[cfg(feature = "sender")] - Field::Sender(mailbox) => { - if sender.is_none() { - sender = Some(mailbox) - } else { - return Err(Error::DuplicateHeader("Sender")); - } - } - #[cfg(feature = "subject")] - Field::Subject(data) => { - if subject.is_none() { - subject = Some(data) - } else { - return Err(Error::DuplicateHeader("Subject")); - } - } - #[cfg(feature = "date")] - Field::Date(data) => { - if date.is_none() { - date = Some(data) - } else { - return Err(Error::DuplicateHeader("Date")); - } - } - #[cfg(feature = "to")] - Field::To(addresses) => { - if to.is_none() { - to = Some(addresses) - } else { - return Err(Error::DuplicateHeader("To")); - } - } - #[cfg(feature = "cc")] - Field::Cc(addresses) => { - if cc.is_none() { - cc = Some(addresses) - } else { - return Err(Error::DuplicateHeader("Cc")); - } - } - #[cfg(feature = "bcc")] - Field::Bcc(addresses) => { - if bcc.is_none() { - bcc = Some(addresses) - } else { - return Err(Error::DuplicateHeader("Bcc")); - } - } - #[cfg(feature = "message-id")] - Field::MessageId(id) => { - if message_id.is_none() { - message_id = Some(id) - } else { - return Err(Error::DuplicateHeader("Message-ID")); - } - } - #[cfg(feature = "in-reply-to")] - Field::InReplyTo(ids) => { - if in_reply_to.is_none() { - in_reply_to = Some(ids) - } else { - return Err(Error::DuplicateHeader("In-Reply-To")); - } - } - #[cfg(feature = "references")] - Field::References(ids) => { - if references.is_none() { - references = Some(ids) - } else { - return Err(Error::DuplicateHeader("References")); - } - } - #[cfg(feature = "reply-to")] - Field::ReplyTo(mailboxes) => { - if reply_to.is_none() { - reply_to = Some(mailboxes) - } else { - return Err(Error::DuplicateHeader("Reply-To")); - } - } - #[cfg(feature = "comments")] - Field::Comments(data) => comments.push(data), - #[cfg(feature = "keywords")] - Field::Keywords(mut data) => { - keywords.append(&mut data); - } - #[cfg(feature = "trace")] - Field::Trace { - return_path, - received, - fields, - } => { - trace.push((return_path, received, fields)); - } - #[cfg(feature = "mime")] - Field::MimeVersion(major, minor) => { - if mime_version.is_none() { - mime_version = Some((major, minor)) - } else { - return Err(Error::DuplicateHeader("Mime-Version")); - } - } - #[cfg(feature = "mime")] - Field::ContentType { - mime_type, - subtype, - parameters, - } => { - if content_type.is_none() { - content_type = Some((mime_type, subtype, parameters)) - } else { - return Err(Error::DuplicateHeader("Content-Type")); - } - } - #[cfg(feature = "mime")] - Field::ContentTransferEncoding(encoding) => { - if content_transfer_encoding.is_none() { - content_transfer_encoding = Some(encoding) - } else { - return Err(Error::DuplicateHeader("Content-Transfer-Encoding")); - } - } - #[cfg(feature = "mime")] - Field::ContentId(id) => { - if content_id.is_none() { - content_id = Some(id) - } else { - return Err(Error::DuplicateHeader("Content-Id")); - } - } - #[cfg(feature = "mime")] - Field::ContentDescription(description) => { - if content_description.is_none() { - content_description = Some(description) - } else { - return Err(Error::DuplicateHeader("Content-Description")); - } - } - #[cfg(feature = "content-disposition")] - Field::ContentDisposition(disposition) => { - if content_disposition.is_none() { - content_disposition = Some(disposition) - } else { - return Err(Error::DuplicateHeader("Content-Disposition")); - } - } - Field::Unknown { name, value } => { - unknown_fields.push((name, value)); - } - } - } - - #[cfg(feature = "from")] - let from = from.ok_or(Error::MissingHeader("From"))?; - #[cfg(feature = "date")] - let date = date.ok_or(Error::MissingHeader("Date"))?; - - #[cfg(feature = "sender")] - let sender = match sender { - Some(sender) => sender, - None => { - if from.len() == 1 { - from[0].clone() - } else { - return Err(Error::MissingHeader("Sender")); - } - } - }; - - #[cfg(feature = "mime")] - let (content_type, body) = ( - content_type.unwrap_or(( - ContentType::Text, - Cow::Borrowed("plain"), - vec![(Cow::Borrowed("charset"), Cow::Borrowed("us-ascii"))] - .into_iter() - .collect(), - )), - if let Some(body) = body { - Some(crate::parsing::mime::entity::decode_value( - Cow::Borrowed(body), - content_transfer_encoding.unwrap_or(ContentTransferEncoding::SevenBit), - )?) - } else { - None - }, - ); - - Ok(Email { - #[cfg(not(feature = "mime"))] - body, - #[cfg(feature = "from")] - from, - #[cfg(feature = "sender")] - sender, - #[cfg(feature = "subject")] - subject, - #[cfg(feature = "date")] - date, - #[cfg(feature = "to")] - to, - #[cfg(feature = "cc")] - cc, - #[cfg(feature = "bcc")] - bcc, - #[cfg(feature = "message-id")] - message_id, - #[cfg(feature = "in-reply-to")] - in_reply_to, - #[cfg(feature = "references")] - references, - #[cfg(feature = "reply-to")] - reply_to, - #[cfg(feature = "trace")] - trace, - #[cfg(feature = "comments")] - comments, - #[cfg(feature = "keywords")] - keywords, - #[cfg(feature = "mime")] - mime_entity: RawEntity { - mime_type: content_type.0, - subtype: content_type.1, - description: content_description, - id: content_id, - parameters: content_type.2, - #[cfg(feature = "content-disposition")] - disposition: content_disposition, - value: body.unwrap_or(Cow::Borrowed(b"")), - additional_headers: Vec::new(), - }, - unknown_fields, - }) - } -} - -impl<'a> std::convert::TryFrom<&'a [u8]> for Email<'a> { - type Error = crate::error::Error; - - fn try_from(value: &'a [u8]) -> Result { - Self::parse(value) - } -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_full_email() { - /*let multipart = Email::parse(include_bytes!("../mail.txt")).unwrap().mime_entity.parse().unwrap(); - println!("{:?}", multipart); - if let Entity::Multipart{content, subtype: _} = multipart { - for entity in content { - println!("{:?}", entity.parse().unwrap()) - } - } else { - panic!("Failed to parse multipart"); - }*/ - } - - #[test] - fn test_field_number() { - assert!(Email::parse( - // missing date - b"\ - From: Mubelotix \r\n\ - \r\n\ - Hey!\r\n", - ) - .is_err()); - - assert!(Email::parse( - // 2 date fields - b"\ - From: Mubelotix \r\n\ - Date: 5 May 2003 18:58:34 +0000\r\n\ - Date: 6 May 2003 18:58:34 +0000\r\n\ - \r\n\ - Hey!\r\n", - ) - .is_err()); - - assert!(Email::parse( - // missing from - b"\ - Date: 5 May 2003 18:58:34 +0000\r\n\ - \r\n\ - Hey!\r\n", - ) - .is_err()); - - assert!(Email::parse( - // 2 from fields - b"\ - From: Mubelotix \r\n\ - From: Someone \r\n\ - Date: 5 May 2003 18:58:34 +0000\r\n\ - \r\n\ - Hey!\r\n", - ) - .is_err()); - } -} diff --git a/email-parser/src/error.rs b/email-parser/src/error.rs deleted file mode 100644 index 4e74e5f..0000000 --- a/email-parser/src/error.rs +++ /dev/null @@ -1,24 +0,0 @@ -#[derive(Debug, PartialEq, Clone)] -pub enum Error { - Explicit(&'static str), - Unknown(&'static str), - DuplicateHeader(&'static str), - MissingHeader(&'static str), -} - -impl std::fmt::Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Error::Unknown(message) => write!(f, "{}", message), - Error::Explicit(message) => write!(f, "{}", message), - Error::DuplicateHeader(name) => { - write!(f, "There are too many {} headers in this mail.", name) - } - Error::MissingHeader(name) => write!(f, "A valid {} header is required.", name), - } - } -} - -impl std::error::Error for Error {} - -pub type Res<'a, T> = Result<(&'a [u8], T), Error>; diff --git a/email-parser/src/lib.rs b/email-parser/src/lib.rs deleted file mode 100644 index e5ef2d0..0000000 --- a/email-parser/src/lib.rs +++ /dev/null @@ -1,67 +0,0 @@ -#![allow(clippy::type_complexity)] -#![allow(clippy::manual_range_contains)] -#![allow(clippy::inconsistent_struct_constructor)] - -//! The fastest and lightest email parsing Rust library!\ -//! This library has no dependency. -//! -//! # Goal -//! -//! The goal of this library is to be fully compliant with RFC 5322. However, this library does not intend to support the obsolete syntax because it has been obsolete for 12 years, and it would slow down everything.\ -//! I plan to add optional support to the Multipurpose Internet Mail Extensions and for PGP. -//! -//! # Example -//! -//! ``` -//! # use email_parser::prelude::*; -//! let email = Email::parse( -//! b"\ -//! From: Mubelotix \r\n\ -//! Subject:Example Email\r\n\ -//! To: Someone \r\n\ -//! Message-id: <6546518945@mubelotix.dev>\r\n\ -//! Date: 5 May 2003 18:58:34 +0000\r\n\ -//! \r\n\ -//! Hey!\r\n", -//! ) -//! .unwrap(); -//! -//! assert_eq!(email.subject.unwrap(), "Example Email"); -//! assert_eq!(email.sender.name.unwrap(), vec!["Mubelotix"]); -//! assert_eq!(email.sender.address.local_part, "mubelotix"); -//! assert_eq!(email.sender.address.domain, "mubelotix.dev"); -//! ``` -//! -//! # Pay for what you use -//! -//! Mails can be elaborated. No matter what you are building, you are certainly not using all of its features.\ -//! So why would you pay the parsing cost of header fields you are not using? This library allows you to enable headers you need so that other header values will be parsed as an unstructured header, which is much faster.\ -//! By disabling all header value parsing, this library can parse an entire mail twice faster! But don't worry if you need everything enabled; this library is blazing fast anyway! -//! -//! # Zero-Copy (almost) -//! -//! This library tries to avoid usage of owned `String`s as much as possible and is using `Cow` instead.\ -//! Thanks to this method, around 90% of the strings are references. -//! -//! # Benchmarks -//! -//! This chart shows the time took to parse a single email. -//! -//! ![Benchmark](https://cdn.discordapp.com/attachments/770283472988143616/774711170208104448/Screenshot_2020-11-07_Performance_comparison1.png) -//! -//! Run these benchmarks by yourself with `rustup run nightly cargo bench` and `rustup run nightly cargo bench --no-default-features`.\ -//! Tests require a `mail.txt` file containing a raw mail next to the `Cargo.toml`.\ -//! Some libraries suffer from huge performance variations depending on the content of the mail, so this library is not **always** the fastest. - -pub mod address; -pub mod email; -pub mod error; -#[cfg(feature = "mime")] -pub mod mime; -pub(crate) mod parsing; -pub mod prelude; -pub(crate) mod string; -pub mod time; - -pub use crate::parsing::fields::Field; -pub use crate::parsing::message::parse_message; diff --git a/email-parser/src/mime.rs b/email-parser/src/mime.rs deleted file mode 100644 index 3ec50ff..0000000 --- a/email-parser/src/mime.rs +++ /dev/null @@ -1,196 +0,0 @@ -use crate::prelude::*; -use std::borrow::Cow; -use std::collections::HashMap; - -/// A generic MIME Entity. -#[derive(Debug, PartialEq, Clone)] -pub struct RawEntity<'a> { - pub mime_type: ContentType<'a>, - /// The subtype (in lowercase). - pub subtype: Cow<'a, str>, - pub description: Option>, - pub id: Option<(Cow<'a, str>, Cow<'a, str>)>, - /// Parameters named in lowercase. - pub parameters: HashMap, Cow<'a, str>>, - #[cfg(feature = "content-disposition")] - pub disposition: Option>, - /// The raw value of this entity. - /// It has already been decoded. - pub value: Cow<'a, [u8]>, - pub additional_headers: Vec<(Cow<'a, str>, Cow<'a, str>)>, -} - -impl<'a> RawEntity<'a> { - /// Use this function to decode [text](Entity::Text) and [multipart](Entity::Multipart) values.\ - /// If this library is not able to provide a higher-level structure, the data will be returned [untouched]([Entity::Unknown]).\ - /// If this entity is supported but is wrongly formatted, an error will be returned. - pub fn parse(&'a self) -> Result, Error> { - crate::parsing::mime::entity::entity(self) - } -} - -/// A higher-level reprentation of entities.\ -/// Can be obtained with [RawEntity::parse]. -#[derive(Debug, PartialEq, Clone)] -pub enum Entity<'a> { - /// A multipart entity is an array of entities.\ - /// See the subtype for information about their relation. - Multipart { - subtype: &'a Cow<'a, str>, - content: Vec>, - }, - /// A decoded text entity.\ - /// Supported charsets are all ISO, US-ASCII and UTF-8. - Text { - subtype: &'a Cow<'a, str>, - value: Cow<'a, str>, - }, - /// All other entities that are not supported by this library. - Unknown, -} - -#[derive(Debug, PartialEq, Clone)] -pub enum ContentType<'a> { - Text, - Image, - Audio, - Video, - Application, - Message, - Multipart, - Unknown(Cow<'a, str>), -} - -impl<'a> ContentType<'a> { - /// Extends the lifetime from `'a` to `'static` by guaranteeing that we have ownership after calling this function. - /// It will call `to_owned` on references.\ - /// Since there are rarely references, this is almost always free. - pub fn into_owned(self) -> ContentType<'static> { - match self { - ContentType::Text => ContentType::Text, - ContentType::Image => ContentType::Image, - ContentType::Audio => ContentType::Audio, - ContentType::Video => ContentType::Video, - ContentType::Application => ContentType::Application, - ContentType::Message => ContentType::Message, - ContentType::Multipart => ContentType::Multipart, - ContentType::Unknown(Cow::Owned(value)) => ContentType::Unknown(Cow::Owned(value)), - ContentType::Unknown(Cow::Borrowed(value)) => { - ContentType::Unknown(Cow::Owned(value.to_owned())) - } - } - } -} - -/// Information about how a [RawEntity] must be displayed.\ -/// Is accessible from [Disposition::disposition_type]. -#[derive(Debug, PartialEq, Clone)] -pub enum DispositionType<'a> { - /// An inline entity\ - /// [Learn more](https://tools.ietf.org/html/rfc2183#section-2.1) - Inline, - /// An attachment\ - /// [Learn more](https://tools.ietf.org/html/rfc2183#section-2.2) - Attachment, - /// An unknown content-disposition. Should be treated as [DispositionType::Attachment].\ - /// [Learn more](https://tools.ietf.org/html/rfc2183#section-2.8). - Unknown(Cow<'a, str>), -} - -impl<'a> DispositionType<'a> { - /// Extends the lifetime from `'a` to `'static` by guaranteeing that we have ownership after calling this function. - /// It will call `to_owned` on references.\ - /// Since there are rarely references, this is almost always free. - pub fn into_owned(self) -> DispositionType<'static> { - match self { - DispositionType::Inline => DispositionType::Inline, - DispositionType::Attachment => DispositionType::Attachment, - DispositionType::Unknown(Cow::Owned(value)) => { - DispositionType::Unknown(Cow::Owned(value)) - } - DispositionType::Unknown(Cow::Borrowed(value)) => { - DispositionType::Unknown(Cow::Owned(value.to_owned())) - } - } - } -} - -/// Some information about how to display a [RawEntity] and some file metadata.\ -/// Is accessible from [RawEntity::disposition].\ -/// The size parameter is not directly supported as it is the "approximate size". You can get the exact size in bytes by calling `.len()` on the value of an [RawEntity::value]. -#[derive(Debug, PartialEq, Clone)] -pub struct Disposition<'a> { - pub disposition_type: DispositionType<'a>, - pub filename: Option>, - pub creation_date: Option, - pub modification_date: Option, - pub read_date: Option, - pub unstructured: HashMap, Cow<'a, str>>, -} - -impl<'a> Disposition<'a> { - /// Extends the lifetime from `'a` to `'static` by guaranteeing that we have ownership after calling this function. - /// It will call `to_owned` on references. - pub fn into_owned(self) -> Disposition<'static> { - Disposition { - disposition_type: self.disposition_type.into_owned(), - filename: self - .filename - .map(|filename| Cow::Owned(filename.into_owned())), - creation_date: self.creation_date, - modification_date: self.modification_date, - read_date: self.read_date, - unstructured: self - .unstructured - .into_iter() - .map(|(n, v)| (Cow::Owned(n.into_owned()), Cow::Owned(v.into_owned()))) - .collect(), - } - } -} - -impl<'a> ContentType<'a> { - pub fn is_composite_type(&self) -> bool { - match self { - ContentType::Message => true, - ContentType::Multipart => true, - ContentType::Text => false, - ContentType::Image => false, - ContentType::Audio => false, - ContentType::Video => false, - ContentType::Application => false, - ContentType::Unknown(_) => false, - } - } -} - -#[derive(Debug, PartialEq, Clone)] -pub enum ContentTransferEncoding<'a> { - SevenBit, - HeightBit, - Binary, - QuotedPrintable, - Base64, - Unknown(Cow<'a, str>), -} - -impl<'a> ContentTransferEncoding<'a> { - /// Extends the lifetime from `'a` to `'static` by guaranteeing that we have ownership after calling this function. - /// It will call `to_owned` on references.\ - /// Since there are rarely references, this is almost always free. - pub fn into_owned(self) -> ContentTransferEncoding<'static> { - match self { - ContentTransferEncoding::SevenBit => ContentTransferEncoding::SevenBit, - ContentTransferEncoding::HeightBit => ContentTransferEncoding::HeightBit, - ContentTransferEncoding::Binary => ContentTransferEncoding::Binary, - ContentTransferEncoding::QuotedPrintable => ContentTransferEncoding::QuotedPrintable, - ContentTransferEncoding::Base64 => ContentTransferEncoding::Base64, - ContentTransferEncoding::Unknown(Cow::Owned(value)) => { - ContentTransferEncoding::Unknown(Cow::Owned(value)) - } - ContentTransferEncoding::Unknown(Cow::Borrowed(value)) => { - ContentTransferEncoding::Unknown(Cow::Owned(value.to_owned())) - } - } - } -} diff --git a/email-parser/src/parsing/address.rs b/email-parser/src/parsing/address.rs deleted file mode 100644 index 79c6752..0000000 --- a/email-parser/src/parsing/address.rs +++ /dev/null @@ -1,295 +0,0 @@ -use crate::address::*; -use crate::prelude::*; -use std::borrow::Cow; - -pub fn message_id(input: &[u8]) -> Res<(Cow, Cow)> { - fn no_fold_litteral(input: &[u8]) -> Res> { - let (input, ()) = tag( - input, - b"[", - "TAG ERROR: In message_id, a no_fold_litteral domain must be preceded by a `[`.", - )?; - let (input, domain) = take_while(input, is_dtext)?; - let (input, ()) = tag( - input, - b"]", - "TAG ERROR: In message_id, a no_fold_litteral domain must be closed by a `]`.", - )?; - Ok((input, Cow::Borrowed(domain))) - } - - let (input, _cfws) = optional(input, cfws); - let (input, ()) = tag( - input, - b"<", - "TAG ERROR: A message ID must start with a `<`.", - )?; - let (input, id_left) = dot_atom_text(input)?; - let (input, ()) = tag( - input, - b"@", - "TAG ERROR: A message ID right part must be followed by a `@`.", - )?; - let (input, id_right) = match_parsers(input, &mut [dot_atom_text, no_fold_litteral][..])?; - let (input, ()) = tag( - input, - b">", - "TAG ERROR: A message ID left part must be followed by a `>`.", - )?; - let (input, _cfws) = optional(input, cfws); - - Ok((input, (id_left, id_right))) -} - -pub fn addr_spec(input: &[u8]) -> Res { - let (input, local_part) = local_part(input)?; - let (input, ()) = tag( - input, - b"@", - "TAG ERROR: An address local part must be followed by a `@`.", - )?; - let (input, domain) = domain(input)?; - Ok((input, EmailAddress { local_part, domain })) -} - -pub fn angle_addr(input: &[u8]) -> Res { - let (input, _cfws) = optional(input, cfws); - let (input, ()) = tag( - input, - b"<", - "TAG ERROR: A angle_addr must start with a `<`.", - )?; - let (input, addr_spec) = addr_spec(input)?; - let (input, ()) = tag(input, b">", "TAG ERROR: A angle_addr must end with a `>`.")?; - let (input, _cfws) = optional(input, cfws); - Ok((input, addr_spec)) -} - -pub fn name_addr(input: &[u8]) -> Res { - let (input, display_name) = optional(input, phrase); - let (input, angle_addr) = angle_addr(input)?; - - Ok(( - input, - Mailbox { - name: display_name, - address: angle_addr, - }, - )) -} - -pub fn local_part(input: &[u8]) -> Res> { - match_parsers(input, &mut [dot_atom, quoted_string][..]) -} - -pub fn domain(input: &[u8]) -> Res> { - match_parsers(input, &mut [dot_atom, domain_literal][..]) -} - -pub fn domain_literal<'a>(input: &'a [u8]) -> Res> { - let (input, _cfws) = optional(input, cfws); - let (mut input, ()) = tag( - input, - b"[", - "TAG ERROR: A domain litteral must be preceded by a `[`.", - )?; - let mut output = empty_string(); - loop { - let (new_input, _fws) = optional(input, fws); - if let Ok((new_input, text)) = take_while1(new_input, is_dtext) { - input = new_input; - //add_string(&mut output, fws); should it be added? - add_str(&mut output, text); - } else { - break; - } - } - let (input, _fws) = optional(input, fws); - let (input, ()) = tag( - input, - b"]", - "TAG ERROR: A domain litteral must be followed by a `]`.", - )?; - let (input, _cfws) = optional(input, cfws); - Ok((input, output)) -} - -pub fn mailbox(input: &[u8]) -> Res { - match_parsers( - input, - &mut [ - name_addr, - (|input| { - addr_spec(input).map(|(i, m)| { - ( - i, - Mailbox { - name: None, - address: m, - }, - ) - }) - }) as fn(input: &[u8]) -> Res, - ][..], - ) -} - -pub fn mailbox_list(input: &[u8]) -> Res> { - let mut mailboxes = Vec::new(); - let (mut input, first_mailbox) = mailbox(input)?; - mailboxes.push(first_mailbox); - - while let Ok((new_input, new_mailbox)) = prefixed(input, mailbox, ",") { - input = new_input; - mailboxes.push(new_mailbox); - } - - Ok((input, mailboxes)) -} - -pub fn group(input: &[u8]) -> Res<(Vec>, Vec)> { - let (input, display_name) = phrase(input)?; - let (mut input, ()) = tag( - input, - b":", - "TAG ERROR: A group display name must be followed by a `:`.", - )?; - - let group_list = if let Ok((new_input, mailbox_list)) = mailbox_list(input) { - input = new_input; - mailbox_list - } else if let Ok((new_input, _cfws)) = cfws(input) { - input = new_input; - Vec::new() - } else { - Vec::new() - }; - - let (input, ()) = tag( - input, - b";", - "TAG ERROR: A group mailbox list must be closed by a `;`.", - )?; - let (input, _cfws) = optional(input, cfws); - Ok((input, (display_name, group_list))) -} - -pub fn address(input: &[u8]) -> Res
{ - if let Ok((input, mailbox)) = mailbox(input) { - Ok((input, Address::Mailbox(mailbox))) - } else if let Ok((input, group)) = group(input) { - Ok((input, Address::Group(group))) - } else { - Err(Error::Unknown("Invalid address: not a mailbox nor a group")) - } -} - -pub fn address_list(input: &[u8]) -> Res> { - let mut addresses = Vec::new(); - let (mut input, first_address) = address(input)?; - addresses.push(first_address); - - while let Ok((new_input, new_address)) = prefixed(input, address, ",") { - input = new_input; - addresses.push(new_address); - } - - Ok((input, addresses)) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_local_part() { - assert_eq!(local_part(b"mubelotix").unwrap().1, "mubelotix"); - assert_eq!( - local_part(b"\"mubelotix\\ the\\ admin\"").unwrap().1, - "mubelotix the admin" - ); - } - - #[test] - fn test_message_id() { - assert_eq!(message_id(b"").unwrap().1 .0, "idleft"); - assert_eq!(message_id(b"").unwrap().1 .1, "idright"); - assert_eq!(message_id(b"").unwrap().1 .1, "idright"); - } - - #[test] - fn test_domain() { - assert_eq!( - domain_literal(b"[mubelotix.dev]").unwrap().1, - "mubelotix.dev" - ); - assert_eq!( - domain_literal(b"[mubelotix\r\n .dev]").unwrap().1, - "mubelotix.dev" - ); - - assert_eq!(domain(b"[mubelotix\r\n .dev]").unwrap().1, "mubelotix.dev"); - assert_eq!(domain(b"mubelotix.dev").unwrap().1, "mubelotix.dev"); - } - - #[test] - fn test_addr() { - let address = addr_spec(b"mubelotix@mubelotix.dev").unwrap().1; - assert_eq!(address.local_part, "mubelotix"); - assert_eq!(address.domain, "mubelotix.dev"); - - let address = addr_spec(b"\"special\\ person\"@gmail.com").unwrap().1; - assert_eq!(address.local_part, "special person"); - assert_eq!(address.domain, "gmail.com"); - - let mlbx = name_addr(b"").unwrap().1; - assert!(mlbx.name.is_none()); - assert_eq!(mlbx.address.local_part, "mubelotix"); - assert_eq!(mlbx.address.domain, "gmail.com"); - - let mlbx = name_addr(b"Random Guy ").unwrap().1; - assert_eq!(mlbx.name.unwrap().len(), 2); - assert_eq!(mlbx.address.local_part, "someone"); - assert_eq!(mlbx.address.domain, "gmail.com"); - - let mlbx = mailbox(b"mubelotix@mubelotix.dev").unwrap().1; - assert!(mlbx.name.is_none()); - assert_eq!(mlbx.address.local_part, "mubelotix"); - assert_eq!(mlbx.address.domain, "mubelotix.dev"); - - let mlbx = mailbox(b"Random Guy ").unwrap().1; - assert_eq!(mlbx.name.unwrap(), vec!["Random", "Guy"]); - assert_eq!(mlbx.address.local_part, "someone"); - assert_eq!(mlbx.address.domain, "gmail.com"); - } - - #[test] - fn test_lists() { - assert_eq!( - mailbox_list(b"test@gmail.com,Michel,") - .unwrap() - .1 - .len(), - 3 - ); - - let (name, list) = - group(b"Developers: Mubelotix , Someone ;") - .unwrap() - .1; - assert_eq!(name[0], "Developers"); - assert_eq!(list[0].name.as_ref().unwrap(), &vec!["Mubelotix"]); - assert_eq!(list[0].address.local_part, "mubelotix"); - assert_eq!(list[0].address.domain, "mubelotix.dev"); - - assert_eq!( - address_list( - b"mubelotix@gmail.com,guy@gmail.com,Developers:mubelotix@gmail.com,guy@gmail.com;" - ) - .unwrap() - .1 - .len(), - 3 - ); - } -} diff --git a/email-parser/src/parsing/character_sets.rs b/email-parser/src/parsing/character_sets.rs deleted file mode 100644 index 0f64799..0000000 --- a/email-parser/src/parsing/character_sets.rs +++ /dev/null @@ -1,145 +0,0 @@ -use crate::prelude::*; - -#[inline] -pub fn is_wsp(character: u8) -> bool { - character == 9 || character == 32 -} - -#[inline] -pub fn is_ctext(character: u8) -> bool { - (character >= 33 && character <= 39) - || (character >= 42 && character <= 91) - || (character >= 93 && character <= 126) -} - -#[inline] -pub fn is_vchar(character: u8) -> bool { - character >= 0x21 && character <= 0x7e -} - -#[inline] -pub fn is_alpha(c: u8) -> bool { - (c >= 0x41 && c <= 0x5a) || (c >= 0x61 && c <= 0x7a) -} - -#[inline] -pub fn is_digit(c: u8) -> bool { - c >= 0x30 && c <= 0x39 -} - -pub fn digit(input: &[u8]) -> Result<(&[u8], u8), Error> { - match input.get(0) { - Some(b'0') => Ok((&input[1..], 0)), - Some(b'1') => Ok((&input[1..], 1)), - Some(b'2') => Ok((&input[1..], 2)), - Some(b'3') => Ok((&input[1..], 3)), - Some(b'4') => Ok((&input[1..], 4)), - Some(b'5') => Ok((&input[1..], 5)), - Some(b'6') => Ok((&input[1..], 6)), - Some(b'7') => Ok((&input[1..], 7)), - Some(b'8') => Ok((&input[1..], 8)), - Some(b'9') => Ok((&input[1..], 9)), - _ => Err(Error::Unknown("Invalid digit")), - } -} - -pub fn two_digits(input: &[u8]) -> Result<(&[u8], u8), Error> { - let (input, first) = digit(input)?; - let (input, second) = digit(input)?; - - Ok((input, first * 10 + second)) -} - -#[inline] -pub fn is_dtext(c: u8) -> bool { - (c >= 33 && c <= 90) || (c >= 94 && c <= 126) -} - -#[inline] -pub fn is_atext(c: u8) -> bool { - is_alpha(c) - || is_digit(c) - || c == b'!' - || c == b'#' - || c == b'$' - || c == b'%' - || c == b'&' - || c == b'\'' - || c == b'*' - || c == b'+' - || c == b'-' - || c == b'/' - || c == b'=' - || c == b'?' - || c == b'^' - || c == b'_' - || c == b'`' - || c == b'{' - || c == b'|' - || c == b'}' - || c == b'~' -} - -#[inline] -pub fn special(c: u8) -> bool { - c == b'(' - || c == b')' - || c == b'<' - || c == b'>' - || c == b'[' - || c == b']' - || c == b':' - || c == b';' - || c == b'@' - || c == b'\\' - || c == b',' - || c == b'.' - || c == b'"' -} - -#[inline] -pub fn tspecial(c: u8) -> bool { - c == b'(' - || c == b')' - || c == b'<' - || c == b'>' - || c == b'[' - || c == b']' - || c == b':' - || c == b';' - || c == b'@' - || c == b'\\' - || c == b',' - || c == b'/' - || c == b'?' - || c == b'=' - || c == b'"' -} - -#[inline] -pub fn is_qtext(c: u8) -> bool { - (c >= 35 && c <= 126 && c != 92) || c == 33 -} - -#[inline] -pub fn is_ftext(c: u8) -> bool { - (c >= 33 && c <= 57) || (c >= 59 && c <= 126) -} - -#[inline] -pub fn is_text(c: u8) -> bool { - c >= 1 && c <= 127 && c != 10 && c != 13 -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_wsp() { - assert!(is_wsp(b' ')); - assert!(is_wsp(b'\t')); - assert!(!is_wsp(b'a')); - assert!(!is_wsp(b'e')); - } -} diff --git a/email-parser/src/parsing/combinators.rs b/email-parser/src/parsing/combinators.rs deleted file mode 100644 index f2c2d0f..0000000 --- a/email-parser/src/parsing/combinators.rs +++ /dev/null @@ -1,297 +0,0 @@ -use crate::prelude::*; -use std::borrow::Cow; - -#[inline] -pub(crate) fn tag<'a>( - input: &'a [u8], - expected: &'static [u8], - error_message: &'static str, -) -> Res<'a, ()> { - debug_assert!(std::str::from_utf8(expected).is_ok()); - if input.starts_with(expected) { - Ok((unsafe { input.get_unchecked(expected.len()..) }, ())) - } else { - Err(Error::Explicit(error_message)) - } -} - -#[inline] -pub(crate) fn tag_no_case<'a>( - input: &'a [u8], - expected: &'static [u8], - expected2: &'static [u8], - error_message: &'static str, -) -> Res<'a, ()> { - debug_assert_eq!(expected.len(), expected2.len()); - debug_assert!(std::str::from_utf8(expected).is_ok()); - debug_assert!(std::str::from_utf8(expected2).is_ok()); - - #[cfg(debug_assertions)] - for i in 0..expected.len() { - if (expected[i].is_ascii_lowercase() && expected[i].to_ascii_uppercase() != expected2[i]) - || (expected[i].is_ascii_uppercase() - && expected[i].to_ascii_lowercase() != expected2[i]) - { - panic!("tag_no_case() is supposed to take opposite characters but it is not the case for {:?}", std::str::from_utf8(expected).unwrap()); - } - } - - if input.len() < expected.len() { - return Err(Error::Unknown( - "Tag error, input is smaller than expected string", - )); - } - - for idx in 0..expected.len() { - unsafe { - if input.get_unchecked(idx) != expected.get_unchecked(idx) - && input.get_unchecked(idx) != expected2.get_unchecked(idx) - { - return Err(Error::Explicit(error_message)); - } - } - } - - Ok((unsafe { input.get_unchecked(expected.len()..) }, ())) -} - -#[inline] -pub(crate) fn optional<'a, T, F>(input: &'a [u8], mut parser: F) -> (&'a [u8], Option) -where - F: FnMut(&'a [u8]) -> Res, -{ - if let Ok((input, parser)) = parser(input) { - (input, Some(parser)) - } else { - (input, None) - } -} - -#[inline] -pub(crate) fn match_parsers<'a, T, F>(input: &'a [u8], parsers: &mut [F]) -> Res<'a, T> -where - F: FnMut(&'a [u8]) -> Res, -{ - for parser in parsers { - let result = parser(input); - if result.is_ok() { - return result; - } - } - Err(Error::Unknown("No match arm is matching the data")) -} - -#[inline] -pub fn take_while(input: &[u8], mut condition: F) -> Res<&str> -where - F: FnMut(u8) -> bool, -{ - for i in 0..input.len() { - unsafe { - if !condition(*input.get_unchecked(i)) { - return Ok(( - input.get_unchecked(i..), - std::str::from_utf8_unchecked(input.get_unchecked(..i)), - )); - } - } - } - Ok((&[], unsafe { std::str::from_utf8_unchecked(input) })) -} - -#[inline] -pub fn take_while1(input: &[u8], mut condition: F) -> Res<&str> -where - F: FnMut(u8) -> bool, -{ - if let Some(character) = input.get(0) { - if !condition(*character) { - return Err(Error::Unknown("Expected at least one character matching")); - } - } else { - return Err(Error::Unknown( - "Expected at least one character matching, but there is no character", - )); - } - - for i in 1..input.len() { - unsafe { - if !condition(*input.get_unchecked(i)) { - return Ok(( - input.get_unchecked(i..), - std::str::from_utf8_unchecked(input.get_unchecked(..i)), - )); - } - } - } - Ok((&[], unsafe { std::str::from_utf8_unchecked(input) })) -} - -#[inline] -pub fn ignore_many<'a, T, F>(mut input: &'a [u8], mut parser: F) -> Res<()> -where - F: FnMut(&'a [u8]) -> Res, -{ - while let Ok((new_input, _result)) = parser(input) { - input = new_input; - } - - Ok((input, ())) -} - -#[inline] -pub fn many<'a, T, F>(mut input: &'a [u8], mut parser: F) -> Res> -where - F: FnMut(&'a [u8]) -> Res, -{ - let mut results = Vec::new(); - - while let Ok((new_input, new_result)) = parser(input) { - input = new_input; - results.push(new_result); - } - - Ok((input, results)) -} - -#[inline] -pub fn many1<'a, T, F>(input: &'a [u8], mut parser: F) -> Res> -where - F: FnMut(&'a [u8]) -> Res, -{ - let mut results = Vec::new(); - let (mut input, first_result) = parser(input)?; - results.push(first_result); - - while let Ok((new_input, new_result)) = parser(input) { - input = new_input; - results.push(new_result); - } - - Ok((input, results)) -} - -#[inline] -pub fn collect_many<'a, F>(mut input: &'a [u8], mut parser: F) -> Res> -where - F: FnMut(&'a [u8]) -> Res>, -{ - let mut result = empty_string(); - - while let Ok((new_input, new_result)) = parser(input) { - input = new_input; - add_string(&mut result, new_result); - } - - Ok((input, result)) -} - -#[inline] -pub fn pair<'a, T, U, F, G>(input: &'a [u8], mut parser1: F, mut parser2: G) -> Res<(U, T)> -where - F: FnMut(&'a [u8]) -> Res, - G: FnMut(&'a [u8]) -> Res, -{ - let (input, first) = parser1(input)?; - let (input, second) = parser2(input)?; - - Ok((input, (first, second))) -} - -#[inline] -pub fn triplet<'a, T, U, V, F, G, H>( - input: &'a [u8], - mut parser1: F, - mut parser2: G, - mut parser3: H, -) -> Res<(U, T, V)> -where - F: FnMut(&'a [u8]) -> Res, - G: FnMut(&'a [u8]) -> Res, - H: FnMut(&'a [u8]) -> Res, -{ - let (input, first) = parser1(input)?; - let (input, second) = parser2(input)?; - let (input, third) = parser3(input)?; - - Ok((input, (first, second, third))) -} - -#[inline] -pub fn collect_pair<'a, F, G>(input: &'a [u8], mut parser1: F, mut parser2: G) -> Res> -where - F: FnMut(&'a [u8]) -> Res>, - G: FnMut(&'a [u8]) -> Res>, -{ - let (input, mut first) = parser1(input)?; - let (input, second) = parser2(input)?; - add_string(&mut first, second); - - Ok((input, first)) -} - -#[inline] -pub fn prefixed<'a, 'b, T, F>( - mut input: &'a [u8], - mut parser: F, - prefix: &'b str, -) -> Result<(&'a [u8], T), Error> -where - F: FnMut(&'a [u8]) -> Result<(&'a [u8], T), Error>, -{ - if input.starts_with(prefix.as_bytes()) { - input = unsafe { input.get_unchecked(prefix.len()..) }; - } else { - return Err(Error::Unknown("Expected a prefix")); - } - parser(input) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn unsafe_add_test() { - let data = b"abcdef"; - let mut data1 = from_slice(&data[..3]); - let data2 = from_slice(&data[3..]); - add_string(&mut data1, data2); - - let mut data3 = from_slice(&data[..2]); - let data4 = from_slice(&data[3..]); - add_string(&mut data3, data4); - - assert!(matches!(data1, std::borrow::Cow::Borrowed(_))); - assert!(matches!(data3, std::borrow::Cow::Owned(_))); - } - - #[test] - fn test_optional() { - assert!( - optional(b"abcdef", |input| tag(input, b"efg", "TAG ERROR: Testing")) - .1 - .is_none() - ); - assert!( - optional(b"abcdef", |input| tag(input, b"abc", "TAG ERROR: Testing")) - .1 - .is_some() - ); - } - - #[test] - fn test_take_while() { - assert_eq!(take_while(b" abc", is_wsp).unwrap().1.len(), 5); - assert_eq!(take_while(b"abc", is_wsp).unwrap().1.len(), 0); - } - - #[test] - fn test_tag() { - assert!(tag(b"abc", b"def", "TAG ERROR: Testing").is_err()); - assert!(tag(b"abc", b"ab", "TAG ERROR: Testing").is_ok()); - assert_eq!(tag(b"abc", b"abc", "TAG ERROR: Testing").unwrap().0, b""); - assert!(tag(b"abc", b"Ab", "TAG ERROR: Testing").is_err()); - assert!(tag_no_case(b"abc", b"Ab", b"aB", "TAG ERROR: Testing no case").is_ok()); - } -} diff --git a/email-parser/src/parsing/common.rs b/email-parser/src/parsing/common.rs deleted file mode 100644 index cc2544b..0000000 --- a/email-parser/src/parsing/common.rs +++ /dev/null @@ -1,246 +0,0 @@ -use crate::prelude::*; -use std::borrow::Cow; - -pub fn lowercase(mut value: Cow) -> Cow { - let mut change_needed = false; - for c in value.chars() { - if c.is_uppercase() { - change_needed = true; - } - } - if change_needed { - value = Cow::Owned(value.to_ascii_lowercase()); - } - value -} - -pub fn atom(mut input: &[u8]) -> Res<&str> { - if let Ok((new_input, _)) = cfws(input) { - input = new_input - } - let (mut input, atom) = - take_while1(input, is_atext).map_err(|_| Error::Unknown("Atom required"))?; - if let Ok((new_input, _)) = cfws(input) { - input = new_input - } - Ok((input, atom)) -} - -pub fn dot_atom_text(input: &[u8]) -> Res> { - let (mut input, output) = take_while1(input, is_atext)?; - let mut output = Cow::Borrowed(output); - - loop { - if input.starts_with(b".") { - if let Ok((new_input, atom)) = if cfg!(feature = "compatibility-fixes") { - take_while(&input[1..], is_atext) - } else { - take_while1(&input[1..], is_atext) - } { - add_string(&mut output, from_slice(&input[..1])); - input = new_input; - add_str(&mut output, atom); - } else { - break; - } - } else { - break; - } - } - - Ok((input, output)) -} - -pub fn dot_atom(mut input: &[u8]) -> Result<(&[u8], Cow), Error> { - if let Ok((new_input, _)) = cfws(input) { - input = new_input - } - let (mut input, dot_atom) = dot_atom_text(input)?; - if let Ok((new_input, _)) = cfws(input) { - input = new_input - } - Ok((input, dot_atom)) -} - -pub fn word(input: &[u8]) -> Res> { - match_parsers( - input, - &mut [ - |input| { - let (input, value) = atom(input)?; - Ok((input, Cow::Borrowed(value))) - }, - |input| quoted_string(input), - ][..], - ) -} - -pub fn phrase(input: &[u8]) -> Result<(&[u8], Vec>), Error> { - #[cfg(feature = "mime")] - fn word(input: &[u8]) -> Res> { - match_parsers( - input, - &mut [ - (|i| { - let (i, _) = optional(i, fws); - crate::parsing::mime::encoded_headers::encoded_word(i) - }), - (|i| crate::parsing::common::word(i)), - ][..], - ) - } - - let mut words = Vec::new(); - let (mut input, first_word) = word(input)?; - words.push(first_word); - - while let Ok((new_input, word)) = word(input) { - input = new_input; - words.push(word) - } - - Ok((input, words)) -} - -pub fn unstructured(input: &[u8]) -> Result<(&[u8], Cow), Error> { - let (mut input, output) = collect_many(input, |i| { - collect_pair( - i, - |i| Ok(fws(i).unwrap_or((i, empty_string()))), - |i| { - let (input, value) = take_while1(i, is_vchar)?; - Ok((input, Cow::Borrowed(value))) - }, - ) - })?; - - while let Ok((new_input, _wsp)) = take_while1(input, is_wsp) { - input = new_input; - } - - Ok((input, output)) -} - -#[cfg(feature = "mime")] -pub fn mime_unstructured(input: &[u8]) -> Res> { - let mut previous_was_encoded = false; - let (mut input, output) = collect_many(input, |i| { - let (i, mut wsp) = fws(i).unwrap_or((i, empty_string())); - - if let Ok((i, text)) = crate::parsing::mime::encoded_headers::encoded_word(i) { - if previous_was_encoded { - return Ok((i, text)); - } else { - previous_was_encoded = true; - add_string(&mut wsp, text); - return Ok((i, wsp)); - } - } else if let Ok((i, text)) = take_while1(i, is_vchar) { - previous_was_encoded = false; - add_str(&mut wsp, text); - return Ok((i, wsp)); - } - Err(Error::Unknown("No match arm is matching the data")) - })?; - - while let Ok((new_input, _wsp)) = take_while1(input, is_wsp) { - input = new_input; - } - - Ok((input, output)) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[cfg(feature = "mime")] - #[test] - fn test_encoded_unstructured() { - assert_eq!( - "the quick brown fox jumps over Chloé Helloco", - mime_unstructured( - b"the quick brown fox jumps\r\n over =?UTF-8?Q?Chlo=C3=A9_Helloco?= " - ) - .unwrap() - .1 - ); - - assert_eq!("a", mime_unstructured(b"=?ISO-8859-1?Q?a?=").unwrap().1); - assert_eq!("a b", mime_unstructured(b"=?ISO-8859-1?Q?a?= b").unwrap().1); - assert_eq!( - "ab", - mime_unstructured(b"=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=") - .unwrap() - .1 - ); - assert_eq!( - "ab", - mime_unstructured(b"=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=") - .unwrap() - .1 - ); - assert_eq!( - "ab", - mime_unstructured(b"=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=") - .unwrap() - .1 - ); - assert_eq!("a b", mime_unstructured(b"=?ISO-8859-1?Q?a_b?=").unwrap().1); - assert_eq!( - "a b", - mime_unstructured(b"=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=") - .unwrap() - .1 - ); - } - - #[cfg(feature = "mime")] - #[test] - fn test_encoded_phrase() { - assert_eq!( - phrase(b"Lou =?UTF-8?Q?Dorl=C3=A9ans?=").unwrap().1, - vec!["Lou", "Dorléans"] - ); - - assert_eq!( - phrase(b"=?ISO-8859-1?Q?Andr=E9?= Pirard").unwrap().1, - vec!["André", "Pirard"] - ); - - assert_eq!( - phrase(b" =?US-ASCII?Q?Keith_Moore?=").unwrap().1, - vec!["Keith Moore"] - ); - } - - #[test] - fn test_word_and_phrase() { - assert_eq!(word(b" this is a \"rust\\ test\" ").unwrap().1, "this"); - assert_eq!( - phrase(b" this is a \"rust\\ test\" ").unwrap().1, - vec!["this", "is", "a", "rust test"] - ); - } - - #[test] - fn test_unstructured() { - assert_eq!( - unstructured(b"the quick brown fox jumps\r\n over the lazy dog ") - .unwrap() - .1, - "the quick brown fox jumps over the lazy dog" - ); - } - - #[test] - fn test_atom() { - assert_eq!(atom(b"this is a test").unwrap().1, "this"); - assert_eq!(atom(b" averylongatom ").unwrap().1, "averylongatom"); - assert_eq!( - dot_atom_text(b"this.is.a.test").unwrap().1, - "this.is.a.test" - ); - assert_eq!(dot_atom(b" this.is.a.test ").unwrap().1, "this.is.a.test"); - } -} diff --git a/email-parser/src/parsing/fields.rs b/email-parser/src/parsing/fields.rs deleted file mode 100644 index 8a47b9d..0000000 --- a/email-parser/src/parsing/fields.rs +++ /dev/null @@ -1,849 +0,0 @@ -use crate::address::*; -use crate::parsing::time::*; -use crate::prelude::*; -use std::borrow::Cow; -#[cfg(feature = "mime")] -use std::collections::HashMap; - -#[derive(Debug)] -pub enum TraceField<'a> { - Date(DateTime), - From(Vec>), - Sender(Mailbox<'a>), - To(Vec>), - Cc(Vec>), - Bcc(Vec>), - MessageId((Cow<'a, str>, Cow<'a, str>)), -} - -#[derive(Debug)] -pub enum Field<'a> { - #[cfg(feature = "date")] - Date(DateTime), - #[cfg(feature = "from")] - From(Vec>), - #[cfg(feature = "sender")] - Sender(Mailbox<'a>), - #[cfg(feature = "reply-to")] - ReplyTo(Vec>), - #[cfg(feature = "to")] - To(Vec>), - #[cfg(feature = "cc")] - Cc(Vec>), - #[cfg(feature = "bcc")] - Bcc(Vec>), - #[cfg(feature = "message-id")] - MessageId((Cow<'a, str>, Cow<'a, str>)), - #[cfg(feature = "in-reply-to")] - InReplyTo(Vec<(Cow<'a, str>, Cow<'a, str>)>), - #[cfg(feature = "references")] - References(Vec<(Cow<'a, str>, Cow<'a, str>)>), - #[cfg(feature = "subject")] - Subject(Cow<'a, str>), - #[cfg(feature = "comments")] - Comments(Cow<'a, str>), - #[cfg(feature = "keywords")] - Keywords(Vec>>), - #[cfg(feature = "mime")] - MimeVersion(u8, u8), - #[cfg(feature = "mime")] - ContentType { - mime_type: ContentType<'a>, - subtype: Cow<'a, str>, - parameters: HashMap, Cow<'a, str>>, - }, - #[cfg(feature = "mime")] - ContentTransferEncoding(ContentTransferEncoding<'a>), - #[cfg(feature = "mime")] - ContentId((Cow<'a, str>, Cow<'a, str>)), - #[cfg(feature = "mime")] - ContentDescription(Cow<'a, str>), - #[cfg(feature = "content-disposition")] - ContentDisposition(Disposition<'a>), - #[cfg(feature = "trace")] - Trace { - return_path: Option>>, - received: Vec<(Vec>, DateTime)>, - fields: Vec>, - }, - Unknown { - name: &'a str, - value: Cow<'a, str>, - }, -} - -pub fn fields(mut input: &[u8]) -> Res> { - let mut fields: Vec = Vec::new(); - - #[cfg(feature = "trace")] - while let Ok((new_input, trace)) = trace(input) { - input = new_input; - let mut trace_fields = Vec::new(); - - while let Ok((new_input, new_result)) = match_parsers( - input, - &mut [ - |i| resent_date(i).map(|(i, v)| (i, TraceField::Date(v))), - |i| resent_from(i).map(|(i, v)| (i, TraceField::From(v))), - |i| resent_sender(i).map(|(i, v)| (i, TraceField::Sender(v))), - |i| resent_to(i).map(|(i, v)| (i, TraceField::To(v))), - |i| resent_cc(i).map(|(i, v)| (i, TraceField::Cc(v))), - |i| resent_bcc(i).map(|(i, v)| (i, TraceField::Bcc(v))), - |i| resent_message_id(i).map(|(i, v)| (i, TraceField::MessageId(v))), - ][..], - ) { - input = new_input; - trace_fields.push(new_result); - } - - // TODO optional fields - - fields.push(Field::Trace { - return_path: trace.0, - received: trace.1, - fields: trace_fields, - }); - } - - while let Ok((new_input, field)) = match_parsers( - input, - &mut [ - #[cfg(feature = "date")] - |i| date(i).map(|(i, v)| (i, Field::Date(v))), - #[cfg(feature = "from")] - |i| from(i).map(|(i, v)| (i, Field::From(v))), - #[cfg(feature = "sender")] - |i| sender(i).map(|(i, v)| (i, Field::Sender(v))), - #[cfg(feature = "reply-to")] - |i| reply_to(i).map(|(i, v)| (i, Field::ReplyTo(v))), - #[cfg(feature = "to")] - |i| to(i).map(|(i, v)| (i, Field::To(v))), - #[cfg(feature = "cc")] - |i| cc(i).map(|(i, v)| (i, Field::Cc(v))), - #[cfg(feature = "bcc")] - |i| bcc(i).map(|(i, v)| (i, Field::Bcc(v))), - #[cfg(feature = "message-id")] - |i| message_id(i).map(|(i, v)| (i, Field::MessageId(v))), - #[cfg(feature = "in-reply-to")] - |i| in_reply_to(i).map(|(i, v)| (i, Field::InReplyTo(v))), - #[cfg(feature = "references")] - |i| references(i).map(|(i, v)| (i, Field::References(v))), - #[cfg(feature = "subject")] - |i| subject(i).map(|(i, v)| (i, Field::Subject(v))), - #[cfg(feature = "comments")] - |i| comments(i).map(|(i, v)| (i, Field::Comments(v))), - #[cfg(feature = "mime")] - |i| mime_version(i).map(|(i, (mj, mn))| (i, Field::MimeVersion(mj, mn))), - #[cfg(feature = "mime")] - |i| { - content_type(i).map(|(i, (t, st, p))| { - ( - i, - Field::ContentType { - mime_type: t, - subtype: st, - parameters: p, - }, - ) - }) - }, - #[cfg(feature = "mime")] - |i| content_transfer_encoding(i).map(|(i, e)| (i, Field::ContentTransferEncoding(e))), - #[cfg(feature = "mime")] - |i| content_id(i).map(|(i, v)| (i, Field::ContentId(v))), - #[cfg(feature = "mime")] - |i| content_description(i).map(|(i, d)| (i, Field::ContentDescription(d))), - #[cfg(feature = "content-disposition")] - |i| content_disposition(i).map(|(i, d)| (i, Field::ContentDisposition(d))), - #[cfg(feature = "keywords")] - |i| keywords(i).map(|(i, v)| (i, Field::Keywords(v))), - |i| unknown(i).map(|(i, (name, value))| (i, Field::Unknown { name, value })), - ][..], - ) { - input = new_input; - fields.push(field); - } - - Ok((input, fields)) -} - -pub fn date(input: &[u8]) -> Res { - let (input, ()) = tag_no_case( - input, - b"Date:", - b"dATE:", - "TAG NO CASE ERROR: Header name (Date) does not match.", - )?; - let (input, date_time) = date_time(input)?; - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`Date` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, date_time)) -} - -pub fn from(input: &[u8]) -> Res> { - let (input, ()) = tag_no_case( - input, - b"From:", - b"fROM:", - "TAG NO CASE ERROR: Header name (From) does not match.", - )?; - let (input, mailbox_list) = mailbox_list(input)?; - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`From` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, mailbox_list)) -} - -pub fn sender(input: &[u8]) -> Res { - let (input, ()) = tag_no_case( - input, - b"Sender:", - b"sENDER:", - "TAG NO CASE ERROR: Header name (Sender) does not match.", - )?; - let (input, mailbox) = mailbox(input)?; - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`Sender` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, mailbox)) -} - -pub fn reply_to(input: &[u8]) -> Res> { - let (input, ()) = tag_no_case( - input, - b"Reply-To:", - b"rEPLY-tO:", - "TAG NO CASE ERROR: Header name (Reply-To) does not match.", - )?; - let (input, mailbox) = address_list(input)?; - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`Reply-To` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, mailbox)) -} - -pub fn to(input: &[u8]) -> Res> { - let (input, ()) = tag_no_case( - input, - b"To:", - b"tO:", - "TAG NO CASE ERROR: Header name (To) does not match.", - )?; - let (input, mailbox) = address_list(input)?; - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`To` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, mailbox)) -} - -pub fn cc(input: &[u8]) -> Res> { - let (input, ()) = tag_no_case( - input, - b"Cc:", - b"cC:", - "TAG NO CASE ERROR: Header name (Cc) does not match.", - )?; - let (input, mailbox) = address_list(input)?; - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`Cc` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, mailbox)) -} - -pub fn bcc(input: &[u8]) -> Res> { - let (input, ()) = tag_no_case( - input, - b"Bcc:", - b"bCC:", - "TAG NO CASE ERROR: Header name (Bcc) does not match.", - )?; - let (input, mailbox) = if let Ok((input, list)) = address_list(input) { - (input, list) - } else if let Ok((input, _cfws)) = cfws(input) { - (input, Vec::new()) - } else { - return Err(Error::Unknown("Invalid bcc field")); - }; - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`Bcc` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, mailbox)) -} - -pub fn message_id(input: &[u8]) -> Res<(Cow, Cow)> { - let (input, ()) = tag_no_case( - input, - b"Message-ID:", - b"mESSAGE-id:", - "TAG NO CASE ERROR: Header name (Message-ID) does not match.", - )?; - let (input, id) = crate::parsing::address::message_id(input)?; - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`Message-ID` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, id)) -} - -pub fn in_reply_to(input: &[u8]) -> Res, Cow)>> { - let (input, ()) = tag_no_case( - input, - b"In-Reply-To:", - b"iN-rEPLY-tO:", - "TAG NO CASE ERROR: Header name (Reply-To) does not match.", - )?; - let (input, ids) = many1(input, crate::parsing::address::message_id)?; - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`In-Reply-To` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, ids)) -} - -pub fn references(input: &[u8]) -> Res, Cow)>> { - let (input, ()) = tag_no_case( - input, - b"References:", - b"rEFERENCES:", - "TAG NO CASE ERROR: Header name (References) does not match.", - )?; - let (input, ids) = many1(input, crate::parsing::address::message_id)?; - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`References` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, ids)) -} - -pub fn subject(input: &[u8]) -> Res> { - let (input, ()) = tag_no_case( - input, - b"Subject:", - b"sUBJECT:", - "TAG NO CASE ERROR: Header name (Subject) does not match.", - )?; - #[cfg(not(feature = "mime"))] - let (input, subject) = unstructured(input)?; - #[cfg(feature = "mime")] - let (input, subject) = mime_unstructured(input)?; - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`Subject` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, subject)) -} - -pub fn comments(input: &[u8]) -> Res> { - let (input, ()) = tag_no_case( - input, - b"Comments:", - b"cOMMENTS:", - "TAG NO CASE ERROR: Header name (Comments) does not match.", - )?; - #[cfg(not(feature = "mime"))] - let (input, comments) = unstructured(input)?; - #[cfg(feature = "mime")] - let (input, comments) = mime_unstructured(input)?; - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`Comments` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, comments)) -} - -pub fn keywords(input: &[u8]) -> Res>>> { - let (input, ()) = tag_no_case( - input, - b"Keywords:", - b"kEYWORDS:", - "TAG NO CASE ERROR: Header name (Keywords) does not match.", - )?; - - let mut keywords = Vec::new(); - let (mut input, first_keyword) = phrase(input)?; - keywords.push(first_keyword); - - while let Ok((new_input, new_keyword)) = prefixed(input, phrase, ",") { - input = new_input; - keywords.push(new_keyword); - } - - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`Keywords` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, keywords)) -} - -pub fn resent_date(input: &[u8]) -> Res { - let (input, ()) = tag_no_case( - input, - b"Resent-", - b"rESENT-", - "TAG NO CASE ERROR: Header name (Resent-Date) does not match.", - )?; - let (input, date) = date(input)?; - - Ok((input, date)) -} - -pub fn resent_from(input: &[u8]) -> Res> { - let (input, ()) = tag_no_case( - input, - b"Resent-", - b"rESENT-", - "TAG NO CASE ERROR: Header name (Resent-From) does not match.", - )?; - let (input, from) = from(input)?; - - Ok((input, from)) -} - -pub fn resent_sender(input: &[u8]) -> Res { - let (input, ()) = tag_no_case( - input, - b"Resent-", - b"rESENT-", - "TAG NO CASE ERROR: Header name (Resent-Sender) does not match.", - )?; - let (input, sender) = sender(input)?; - - Ok((input, sender)) -} - -pub fn resent_to(input: &[u8]) -> Res> { - let (input, ()) = tag_no_case( - input, - b"Resent-", - b"rESENT-", - "TAG NO CASE ERROR: Header name (Resent-To) does not match.", - )?; - let (input, to) = to(input)?; - - Ok((input, to)) -} - -pub fn resent_cc(input: &[u8]) -> Res> { - let (input, ()) = tag_no_case( - input, - b"Resent-", - b"rESENT-", - "TAG NO CASE ERROR: Header name (Resent-Cc) does not match.", - )?; - let (input, cc) = cc(input)?; - - Ok((input, cc)) -} - -pub fn resent_bcc(input: &[u8]) -> Res> { - let (input, ()) = tag_no_case( - input, - b"Resent-", - b"rESENT-", - "TAG NO CASE ERROR: Header name (Resent-Bcc) does not match.", - )?; - let (input, bcc) = bcc(input)?; - - Ok((input, bcc)) -} - -pub fn resent_message_id(input: &[u8]) -> Res<(Cow, Cow)> { - let (input, ()) = tag_no_case( - input, - b"Resent-", - b"rESENT-", - "TAG NO CASE ERROR: Header name (Resent-Message-ID) does not match.", - )?; - let (input, id) = message_id(input)?; - - Ok((input, id)) -} - -pub fn return_path(input: &[u8]) -> Res> { - fn empty_path(input: &[u8]) -> Res<()> { - let (input, _cfws) = optional(input, cfws); - let (input, ()) = tag(input, b"<", "TAG ERROR: An empty path must start with `<`.")?; - let (input, _cfws) = optional(input, cfws); - let (input, ()) = tag(input, b">", "TAG ERROR: An empty path must end with `>`.")?; - let (input, _cfws) = optional(input, cfws); - Ok((input, ())) - } - - let (input, ()) = tag_no_case( - input, - b"Return-Path:", - b"rETURN-pATH:", - "TAG NO CASE ERROR: Header name (Return-Path) does not match.", - )?; - let (input, addr) = match_parsers( - input, - &mut [ - (|i| angle_addr(i).map(|(i, v)| (i, Some(v)))) - as fn(input: &[u8]) -> Res>, - (|i| empty_path(i).map(|(i, _)| (i, None))) - as fn(input: &[u8]) -> Res>, - ][..], - )?; - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`Return-Path` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, addr)) -} - -#[derive(Debug)] -pub enum ReceivedToken<'a> { - Word(Cow<'a, str>), - Addr(EmailAddress<'a>), - Domain(Cow<'a, str>), -} - -pub fn received(input: &[u8]) -> Res<(Vec, DateTime)> { - let (input, ()) = tag_no_case( - input, - b"Received:", - b"rECEIVED:", - "TAG NO CASE ERROR: Header name (Received) does not match.", - )?; - let (input, received_tokens) = many(input, |input| { - if let Ok((word_input, word)) = word(input) { - if let Ok((domain_input, domain)) = domain(input) { - if domain.len() > word.len() { - return Ok((domain_input, ReceivedToken::Domain(domain))); - } - } - Ok((word_input, ReceivedToken::Word(word))) - } else if let Ok((input, addr)) = angle_addr(input) { - Ok((input, ReceivedToken::Addr(addr))) - } else if let Ok((input, addr)) = addr_spec(input) { - Ok((input, ReceivedToken::Addr(addr))) - } else if let Ok((input, domain)) = domain(input) { - Ok((input, ReceivedToken::Domain(domain))) - } else { - Err(Error::Unknown("match error")) - } - })?; - let (input, ()) = tag( - input, - b";", - "TAG ERROR: Received tokens must be followed by a `;`.", - )?; - let (input, date_time) = date_time(input)?; - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`Received` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, (received_tokens, date_time))) -} - -pub fn trace( - input: &[u8], -) -> Res<( - Option>, - Vec<(Vec, DateTime)>, -)> { - let (input, return_path) = optional(input, return_path); - let (input, received) = many1(input, received)?; - - Ok((input, (return_path, received))) -} - -pub fn unknown(input: &[u8]) -> Res<(&str, Cow)> { - let (input, name) = take_while1(input, is_ftext)?; - let (input, ()) = tag( - input, - b":", - "TAG ERROR: A header name must be followed by a `:`.", - )?; - #[cfg(not(feature = "unrecognized-headers"))] - let (input, value) = unstructured(input)?; - #[cfg(feature = "unrecognized-headers")] - let (input, value) = mime_unstructured(input)?; - - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header must end with a CRLF sequence.", - )?; - - Ok((input, (name, value))) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_fields() { - assert!(fields( - b"To: Mubelotix \r\nFrOm: Mubelotix \r\n" - ) - .unwrap() - .0 - .is_empty()); - //println!("{:#?}", fields(include_bytes!("../../mail.txt")).unwrap().1); - } - - #[test] - fn test_unknown_field() { - assert_eq!( - unknown(b"hidden-field:hidden message\r\n").unwrap().1 .1, - "hidden message" - ); - assert_eq!( - unknown(b"hidden-field:hidden message\r\n").unwrap().1 .0, - "hidden-field" - ); - } - - #[test] - fn test_trace() { - assert!(return_path(b"Return-Path:<>\r\n").unwrap().1.is_none()); - assert_eq!( - return_path(b"Return-Path:\r\n") - .unwrap() - .1 - .unwrap() - .local_part, - "mubelotix" - ); - - assert!(matches!( - received(b"Received:test;5 May 2003 18:59:03 +0000\r\n") - .unwrap() - .1 - .0[0], - ReceivedToken::Word(_) - )); - assert!(matches!( - received(b"Received:test;5 May 2003 18:59:03 +0000\r\n") - .unwrap() - .1 - .0[1], - ReceivedToken::Addr(_) - )); - assert!(matches!( - received(b"Received:mubelotix.dev;5 May 2003 18:59:03 +0000\r\n") - .unwrap() - .1 - .0[0], - ReceivedToken::Domain(_) - )); - - assert!(trace(b"Return-Path:<>\r\nReceived:akala miam miam;5 May 2003 18:59:03 +0000\r\nReceived:mubelotix.dev;5 May 2003 18:59:03 +0000\r\n").unwrap().0.is_empty()); - } - - #[test] - fn test_resent() { - assert!(resent_date(b"Resent-Date:5 May 2003 18:59:03 +0000\r\n").is_ok()); - assert_eq!( - resent_from(b"Resent-FrOm: Mubelotix \r\n") - .unwrap() - .1[0] - .address - .local_part, - "mubelotix" - ); - assert_eq!( - resent_sender(b"Resent-sender: Mubelotix \r\n") - .unwrap() - .1 - .address - .domain, - "gmail.com" - ); - assert!( - !resent_to(b"Resent-To: Mubelotix \r\n") - .unwrap() - .1 - .is_empty() - ); - assert!( - !resent_cc(b"Resent-Cc: Mubelotix \r\n") - .unwrap() - .1 - .is_empty() - ); - assert!( - !resent_bcc(b"Resent-Bcc: Mubelotix \r\n") - .unwrap() - .1 - .is_empty() - ); - } - - #[test] - fn test_date() { - assert!(date(b"Date:5 May 2003 18:59:03 +0000\r\n").is_ok()); - } - - #[test] - fn test_originators() { - assert_eq!( - from(b"FrOm: Mubelotix \r\n") - .unwrap() - .1[0] - .address - .local_part, - "mubelotix" - ); - assert_eq!( - sender(b"sender: Mubelotix \r\n") - .unwrap() - .1 - .address - .domain, - "gmail.com" - ); - assert_eq!( - reply_to(b"Reply-to: Mubelotix \r\n") - .unwrap() - .1 - .len(), - 1 - ); - } - - #[test] - fn test_destination() { - assert!(!to(b"To: Mubelotix \r\n") - .unwrap() - .1 - .is_empty()); - assert!(!cc(b"Cc: Mubelotix \r\n") - .unwrap() - .1 - .is_empty()); - assert!(!bcc(b"Bcc: Mubelotix \r\n") - .unwrap() - .1 - .is_empty()); - assert!(bcc(b"Bcc: \r\n \r\n").unwrap().1.is_empty()); - } - - #[test] - fn test_ids() { - assert_eq!( - message_id(b"Message-ID:<556100154@gmail.com>\r\n") - .unwrap() - .1 - .0, - "556100154" - ); - assert_eq!( - message_id(b"Message-ID:<556100154@gmail.com>\r\n") - .unwrap() - .1 - .1, - "gmail.com" - ); - - assert_eq!( - references(b"References:\r\n") - .unwrap() - .1 - .len(), - 2 - ); - - assert_eq!( - in_reply_to(b"In-Reply-To:<52@s.dz>\r\n") - .unwrap() - .1 - .len(), - 3 - ); - } - - #[test] - fn test_informational() { - assert_eq!( - subject(b"Subject:French school is boring\r\n").unwrap().1, - "French school is boring" - ); - assert_eq!( - subject(b"Subject:Folding\r\n is slow\r\n").unwrap().1, - "Folding is slow" - ); - - assert_eq!( - comments(b"Comments:Rust is great\r\n").unwrap().1, - "Rust is great" - ); - - assert_eq!( - keywords(b"Keywords:rust parser fast zero copy,email rfc5322\r\n") - .unwrap() - .1 - .len(), - 2 - ); - } - - #[test] - #[cfg(all(feature = "mime", feature = "unrecognized-headers"))] - fn test_mime_encoding() { - assert_eq!( - subject(b"Subject: =?UTF-8?B?8J+OiEJpcnRoZGF5IEdpdmVhd2F58J+OiA==?= Win free stickers\r\n from daily.dev =?UTF-8?B?8J+MiA==?=\r\n").unwrap().1, - " 🎈Birthday Giveaway🎈 Win free stickers from daily.dev 🌈" - ); - - assert_eq!( - comments(b"Comments: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\r\n =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=\r\n").unwrap().1, - " If you can read this you understand the example." - ); - - assert_eq!( - from(b"From: =?US-ASCII?Q?Keith_Moore?= \r\n") - .unwrap() - .1[0] - .name - .as_ref() - .unwrap()[0], - "Keith Moore" - ); - - assert_eq!( - unknown(b"X-SG-EID:\r\n =?us-ascii?Q?t3vk5cTFE=2FYEGeQ8h3SwrnzIAGc=2F+ADymlys=2FfRFW4Zjpt=2F3MuaO9JNHS2enYQ?=\r\n =?us-ascii?Q?Jsv0=2FpYrPem+YssHetKlrE5nJnOfr=2FYdJOyJFf8?=\r\n =?us-ascii?Q?3mRuMRE9KGu=2F5O75=2FwwN6dG14nuP4SyMIZwbMdG?=\r\n =?us-ascii?Q?vXmM2kgcM=2FOalKeT03BMp4YCg9h1LhkV6PZEoHB?=\r\n =?us-ascii?Q?d4tcAvNZQqLaA4ykI1EpNxKVVyZXVWqTp2uisdf?=\r\n =?us-ascii?Q?HB=2F6BKcIs+XSDNeakQqmn=2FwAqOk78AvtRB5LnNL?=\r\n =?us-ascii?Q?lz3oRXlMZbdFgRH+KAyLQ=3D=3D?=\r\n").unwrap().1.1, - " t3vk5cTFE/YEGeQ8h3SwrnzIAGc/+ADymlys/fRFW4Zjpt/3MuaO9JNHS2enYQJsv0/pYrPem+YssHetKlrE5nJnOfr/YdJOyJFf83mRuMRE9KGu/5O75/wwN6dG14nuP4SyMIZwbMdGvXmM2kgcM/OalKeT03BMp4YCg9h1LhkV6PZEoHBd4tcAvNZQqLaA4ykI1EpNxKVVyZXVWqTp2uisdfHB/6BKcIs+XSDNeakQqmn/wAqOk78AvtRB5LnNLlz3oRXlMZbdFgRH+KAyLQ==" - ); - } -} diff --git a/email-parser/src/parsing/message.rs b/email-parser/src/parsing/message.rs deleted file mode 100644 index c06a7ee..0000000 --- a/email-parser/src/parsing/message.rs +++ /dev/null @@ -1,165 +0,0 @@ -use crate::parsing::fields::{fields, Field}; -use crate::prelude::*; -use std::borrow::Cow; - -pub fn line(input: &[u8]) -> Res> { - let max_idx = std::cmp::min(input.len(), 998); - - // index cannot be out of range so no need to check - unsafe { - for i in 0..max_idx { - if !is_text(*input.get_unchecked(i)) { - return Ok(( - input.get_unchecked(i..), - from_slice(input.get_unchecked(..i)), - )); - } - } - - Ok(( - input.get_unchecked(max_idx..), - from_slice(input.get_unchecked(..max_idx)), - )) - } -} - -pub fn check_line(input: &[u8]) -> Res<()> { - let max_idx = std::cmp::min(input.len(), 998); - - // index cannot be out of range so no need to check - unsafe { - for i in 0..max_idx { - if !is_text(*input.get_unchecked(i)) { - return Ok((input.get_unchecked(i..), ())); - } - } - - Ok((input.get_unchecked(max_idx..), ())) - } -} - -pub fn body_lines(input: &[u8]) -> Result>, Error> { - if input.is_empty() { - return Ok(Vec::new()); - } - let (mut input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: Headers must be followed by a CRLF sequence.", - )?; - - let mut lines = Vec::new(); - loop { - let (new_input, new_line) = line(input)?; - match tag( - new_input, - b"\r\n", - "TAG ERROR: In a body, a line must end with a CRLF sequence.", - ) { - Ok((new_input, ())) => input = new_input, - Err(e) => { - if new_input.is_empty() { - lines.push(new_line); - break; - } else { - return Err(e); - } - } - } - lines.push(new_line); - } - - Ok(lines) -} - -pub fn body(input: &[u8]) -> Result>, Error> { - if input.is_empty() { - return Ok(None); - } - - let (mut new_input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: Headers must be followed by a CRLF sequence.", - )?; - - loop { - let (new_input2, ()) = check_line(new_input)?; - match tag( - new_input2, - b"\r\n", - "TAG ERROR: In a body, a line must end with a CRLF sequence.", - ) { - Ok((new_input2, ())) => new_input = new_input2, - Err(e) => { - if new_input2.is_empty() { - break; - } else { - return Err(e); - } - } - } - } - - Ok(Some(unsafe { - // there is a least 2 characters - from_slice(input.get_unchecked(2..)) - })) -} - -#[cfg(not(feature = "mime"))] -pub fn parse_message(input: &[u8]) -> Result<(Vec, Option>), Error> { - let (input, fields) = fields(input)?; - let body = body(input)?; - - Ok((fields, body)) -} - -#[cfg(feature = "mime")] -pub fn parse_message(input: &[u8]) -> Result<(Vec, Option<&[u8]>), Error> { - let (input, fields) = fields(input)?; - - if input.is_empty() { - return Ok((fields, None)); - } - - let (new_input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: Headers must be followed by a CRLF sequence.", - )?; - - Ok((fields, Some(new_input))) -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_body() { - assert_eq!( - line(b"This is a line\r\nAnd this is a second line") - .unwrap() - .1, - "This is a line" - ); - assert_eq!( - body_lines(b"\r\nThis is a line\r\nAnd this is a second line") - .unwrap() - .len(), - 2 - ); - assert_eq!( - body(b"\r\nThis is a line\r\nAnd this is a second line") - .unwrap() - .unwrap(), - "This is a line\r\nAnd this is a second line" - ); - } - - #[test] - fn test_full_message() { - //println!("{:#?}", parse_message(include_bytes!("../../mail.txt")).unwrap()); - } -} diff --git a/email-parser/src/parsing/mime/base64.rs b/email-parser/src/parsing/mime/base64.rs deleted file mode 100644 index 1fc1082..0000000 --- a/email-parser/src/parsing/mime/base64.rs +++ /dev/null @@ -1,250 +0,0 @@ -use crate::prelude::*; - -const BASE64_MAP: [u8; 64] = [ - b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', - b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'a', b'b', b'c', b'd', b'e', b'f', - b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', - b'w', b'x', b'y', b'z', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'+', b'/', -]; - -pub fn encode_base64(data: Vec) -> Vec { - let mut encoded_data = Vec::new(); - let mut bytes = data.iter(); - let mut line_lenght = 0; - - while let Some(byte1) = bytes.next() { - if line_lenght >= 72 { - // 76 - 4 = 72 - encoded_data.push(b'\r'); - encoded_data.push(b'\n'); - line_lenght = 0; - } - - // JUSTIFICATION - // Benefit - // Gain performance by avoiding index checks on a fixed size array. - // Correctness - // Generated indexes cannot be greater than 63, which is the lenght of the array. - unsafe { - match (bytes.next(), bytes.next()) { - (Some(byte2), Some(byte3)) => { - let output_byte1 = (0b11111100 & byte1) >> 2; - let output_byte2 = ((0b00000011 & byte1) << 4) + ((0b11110000 & byte2) >> 4); - let output_byte3 = ((0b00001111 & byte2) << 2) + ((0b11000000 & byte3) >> 6); - let output_byte4 = 0b00111111 & byte3; - encoded_data.push(*BASE64_MAP.get_unchecked(output_byte1 as usize)); - encoded_data.push(*BASE64_MAP.get_unchecked(output_byte2 as usize)); - encoded_data.push(*BASE64_MAP.get_unchecked(output_byte3 as usize)); - encoded_data.push(*BASE64_MAP.get_unchecked(output_byte4 as usize)); - } - (Some(byte2), None) => { - let output_byte1 = (0b11111100 & byte1) >> 2; - let output_byte2 = ((0b00000011 & byte1) << 4) + ((0b11110000 & byte2) >> 4); - let output_byte3 = (0b00001111 & byte2) << 2; - encoded_data.push(*BASE64_MAP.get_unchecked(output_byte1 as usize)); - encoded_data.push(*BASE64_MAP.get_unchecked(output_byte2 as usize)); - encoded_data.push(*BASE64_MAP.get_unchecked(output_byte3 as usize)); - encoded_data.push(b'='); - } - (None, None) => { - let output_byte1 = (0b11111100 & byte1) >> 2; - let output_byte2 = (0b00000011 & byte1) << 4; - encoded_data.push(*BASE64_MAP.get_unchecked(output_byte1 as usize)); - encoded_data.push(*BASE64_MAP.get_unchecked(output_byte2 as usize)); - encoded_data.push(b'='); - encoded_data.push(b'='); - } - _ => unreachable!(), - } - } - - line_lenght += 4; - } - - encoded_data -} - -fn get_value_encoded(c: u8) -> Option { - match c { - b'A'..=b'Z' => Some(c - b'A'), - b'a'..=b'z' => Some(26 + (c - b'a')), - b'0'..=b'9' => Some(26 * 2 + (c - b'0')), - b'+' => Some(62), - b'/' => Some(63), - _ => None, - } -} - -pub fn decode_base64(mut data: Vec) -> Result, Error> { - let mut i = 0; - let mut offset = 0; - - 'main: loop { - let b1 = 'inner1: loop { - match data.get(i) { - Some(b) => { - if let Some(b) = get_value_encoded(*b) { - break 'inner1 b; - } else { - i += 1; - offset += 1; - } - } - None => break 'main, - } - }; - - let b2 = 'inner2: loop { - match data.get(i + 1) { - Some(b) => { - if let Some(b) = get_value_encoded(*b) { - break 'inner2 b; - } else { - i += 1; - offset += 1; - } - } - None => return Err(Error::Unknown("Missing at least 3 bytes")), - } - }; - - let b3 = 'inner3: loop { - match data.get(i + 2) { - Some(b) if *b == b'=' => break 'inner3 None, - Some(b) => { - if let Some(b) = get_value_encoded(*b) { - break 'inner3 Some(b); - } else { - i += 1; - offset += 1; - } - } - None => return Err(Error::Unknown("Missing at least 2 bytes")), - } - }; - - let b4 = 'inner4: loop { - match data.get(i + 3) { - Some(b) if *b == b'=' => break 'inner4 None, - Some(b) if b3.is_none() && get_value_encoded(*b).is_some() => { - return Err(Error::Unknown("Data after end of data")) - } - Some(b) => { - if let Some(b) = get_value_encoded(*b) { - break 'inner4 Some(b); - } else { - i += 1; - offset += 1; - } - } - None => return Err(Error::Unknown("Missing at least 1 byte")), - } - }; - - // JUSTIFICATION - // Benefit - // Gain performance by avoiding index checks on the vector. - // Correctness - // i < len (see checks above) - // i >= offset (so index cannot underflow) - unsafe { - *data.get_unchecked_mut(i - offset) = (b1 << 2) + ((b2 & 0b00110000) >> 4); - if let Some(b3) = b3 { - *data.get_unchecked_mut((i + 1) - offset) = - ((b2 & 0b00001111) << 4) + ((b3 & 0b00111100) >> 2); - - if let Some(b4) = b4 { - *data.get_unchecked_mut((i + 2) - offset) = ((b3 & 0b00000011) << 6) + b4; - } else { - offset += 1; - } - } else { - offset += 2; - } - } - i += 4; - offset += 1; - } - - data.truncate(i - offset); - - Ok(data) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn encode() { - assert_eq!( - "VGhhdCdzIGEgdGVzdCE=", - String::from_utf8(encode_base64(b"That's a test!".to_vec())).unwrap() - ); - assert_eq!( - "UnVzdCBpcyB0aGUgYmVzdCBsYW5ndWFnZQ==", - String::from_utf8(encode_base64(b"Rust is the best language".to_vec())).unwrap() - ); - assert_eq!( - "SSBhbSBmcmVuY2gh", - String::from_utf8(encode_base64(b"I am french!".to_vec())).unwrap() - ); - assert_eq!( - "YWJjZGVmZ2hp", - String::from_utf8(encode_base64(b"abcdefghi".to_vec())).unwrap() - ); - assert_eq!( - "YWJjZGVmZ2hpag==", - String::from_utf8(encode_base64(b"abcdefghij".to_vec())).unwrap() - ); - assert_eq!( - "YWJjZGVmZ2hpams=", - String::from_utf8(encode_base64(b"abcdefghijk".to_vec())).unwrap() - ); - assert_eq!( - "YWJjZGVmZ2hpamts", - String::from_utf8(encode_base64(b"abcdefghijkl".to_vec())).unwrap() - ); - } - - #[test] - fn decode() { - assert_eq!(get_value_encoded(BASE64_MAP[5]).unwrap(), 5); - assert_eq!(get_value_encoded(BASE64_MAP[15]).unwrap(), 15); - assert_eq!(get_value_encoded(BASE64_MAP[25]).unwrap(), 25); - assert_eq!(get_value_encoded(BASE64_MAP[53]).unwrap(), 53); - assert_eq!(get_value_encoded(BASE64_MAP[62]).unwrap(), 62); - assert_eq!(get_value_encoded(BASE64_MAP[63]).unwrap(), 63); - assert_eq!( - "abcdefghijkl", - String::from_utf8(decode_base64(b"YWJjZGVmZ2hpamts".to_vec()).unwrap()).unwrap() - ); - assert_eq!( - "abcdefghij", - String::from_utf8(decode_base64(b"YWJjZGVmZ2hpag==".to_vec()).unwrap()).unwrap() - ); - assert_eq!( - "abcdefghij", - String::from_utf8(decode_base64(b"YWJjZGV*mZ2hpag==".to_vec()).unwrap()).unwrap() - ); - assert_eq!( - "abcdefghij", - String::from_utf8(decode_base64(b"YWJjZGV*******mZ2hpag==".to_vec()).unwrap()).unwrap() - ); - assert_eq!( - "abcdefghij", - String::from_utf8( - decode_base64(b"***Y*WJ*jZ*GV*******mZ2**hp*ag=*=**".to_vec()).unwrap() - ) - .unwrap() - ); - assert_eq!( - "
Hey émoji 😍
\r\n", - String::from_utf8( - decode_base64(b"PGRpdiBkaXI9Imx0ciI+SGV5IMOpbW9qaSDwn5iNPC9kaXY+DQo=".to_vec()) - .unwrap() - ) - .unwrap() - ); - } -} diff --git a/email-parser/src/parsing/mime/encoded_headers.rs b/email-parser/src/parsing/mime/encoded_headers.rs deleted file mode 100644 index e8d3fcf..0000000 --- a/email-parser/src/parsing/mime/encoded_headers.rs +++ /dev/null @@ -1,169 +0,0 @@ -use crate::prelude::Res; - -use crate::prelude::*; -use std::borrow::Cow; - -use super::{base64, quoted_printables}; - -#[inline] -fn especials(c: u8) -> bool { - c == b'(' - || c == b')' - || c == b'<' - || c == b'>' - || c == b'@' - || c == b',' - || c == b';' - || c == b':' - || c == b'\\' - || c == b'"' - || c == b'/' - || c == b'[' - || c == b']' - || c == b'?' - || c == b'.' - || c == b'=' -} - -fn charset(input: &[u8]) -> Res> { - let (input, charset) = take_while1(input, |c| { - c > 0x20 && c < 0x7F && !especials(c) && c != b'*' - })?; - let charset = lowercase(Cow::Borrowed(charset)); - Ok((input, charset)) -} - -fn encoding(input: &[u8]) -> Res> { - let (input, encoding) = take_while1(input, |c| c > 0x20 && c < 0x7F && !especials(c))?; - let encoding = lowercase(Cow::Borrowed(encoding)); - Ok((input, encoding)) -} - -fn encoded_text(input: &[u8]) -> Res<&str> { - take_while1(input, |c| c > 0x20 && c <= 0x7E && c != b'?') -} - -pub fn encoded_word(input: &[u8]) -> Res> { - let (input, _) = tag( - input, - b"=?", - "TAG ERROR: An encoded word must start with `=?`.", - )?; - let (input, charset) = charset(input)?; - let (input, _language) = match optional(input, |input| { - pair( - input, - |input| { - tag(input, b"*", "TAG ERROR: In an encoded word with encoding, a charset must be followed by a `*`.") - }, - encoding, - ) - }) { - (input, Some(((), language))) => (input, Some(language)), - (input, None) => (input, None), - }; - let (input, _) = tag( - input, - b"?", - "TAG ERROR: In an encoded word, a language must be followed by a `?`.", - )?; - let (input, encoding) = encoding(input)?; - let (input, _) = tag( - input, - b"?", - "TAG ERROR: In an encoded word, a encoding must be followed by a `?`.", - )?; - let (input, data) = encoded_text(input)?; - let (input, _) = tag( - input, - b"?=", - "TAG ERROR: An encoded word must end with `?=`.", - )?; - - let value = match encoding.as_ref() { - "b" => base64::decode_base64(data.to_owned().into_bytes())?, - "q" => quoted_printables::decode_header_qp(data.to_owned().into_bytes()), - _ => return Err(Error::Unknown("Unknown encoding")), - }; - - use textcode::*; - let text: Cow = match charset.as_ref() { - "utf-8" | "us-ascii" => Cow::Owned( - String::from_utf8(value).map_err(|_| Error::Unknown("Invalid text encoding"))?, - ), - "iso-8859-1" => Cow::Owned(iso8859_1::decode_to_string(&value)), - "iso-8859-2" => Cow::Owned(iso8859_2::decode_to_string(&value)), - "iso-8859-3" => Cow::Owned(iso8859_3::decode_to_string(&value)), - "iso-8859-4" => Cow::Owned(iso8859_4::decode_to_string(&value)), - "iso-8859-5" => Cow::Owned(iso8859_5::decode_to_string(&value)), - "iso-8859-6" => Cow::Owned(iso8859_6::decode_to_string(&value)), - "iso-8859-7" => Cow::Owned(iso8859_7::decode_to_string(&value)), - "iso-8859-8" => Cow::Owned(iso8859_8::decode_to_string(&value)), - "iso-8859-9" => Cow::Owned(iso8859_9::decode_to_string(&value)), - "iso-8859-10" => Cow::Owned(iso8859_10::decode_to_string(&value)), - "iso-8859-11" => Cow::Owned(iso8859_11::decode_to_string(&value)), - "iso-8859-13" => Cow::Owned(iso8859_13::decode_to_string(&value)), - "iso-8859-14" => Cow::Owned(iso8859_14::decode_to_string(&value)), - "iso-8859-15" => Cow::Owned(iso8859_15::decode_to_string(&value)), - "iso-8859-16" => Cow::Owned(iso8859_16::decode_to_string(&value)), - "iso-6937" => Cow::Owned(iso6937::decode_to_string(&value)), - "gb2312" => Cow::Owned(gb2312::decode_to_string(&value)), - _ => return Err(Error::Unknown("Unknown charset")), - }; - - Ok((input, text)) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn encoded_with_language() { - assert_eq!( - "this is some text", - encoded_word(b"=?iso-8859-1*en?q?this=20is=20some=20text?=") - .unwrap() - .1 - ); - assert_eq!( - "voici un peu de texte", - encoded_word(b"=?utf-8*en?q?voici=20un=20peu=20de=20texte?=") - .unwrap() - .1 - ); - } - - #[test] - fn encoded_word_test() { - assert_eq!( - "this is some text", - encoded_word(b"=?iso-8859-1?q?this=20is=20some=20text?=") - .unwrap() - .1 - ); - assert_eq!( - "Don't forget! Claim your $5 today 💸", - encoded_word(b"=?utf-8?q?Don=27t_forget!_Claim_your_=245_today_=F0=9F=92=B8?=") - .unwrap() - .1 - ); - assert_eq!( - "Chloé Helloco", - encoded_word(b"=?UTF-8?Q?Chlo=C3=A9_Helloco?=").unwrap().1 - ); - - assert_eq!( - "If you can read this yo", - encoded_word(b"=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=") - .unwrap() - .1 - ); - assert_eq!( - "u understand the example.", - encoded_word(b"=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=") - .unwrap() - .1 - ); - } -} diff --git a/email-parser/src/parsing/mime/entity.rs b/email-parser/src/parsing/mime/entity.rs deleted file mode 100644 index 86ab886..0000000 --- a/email-parser/src/parsing/mime/entity.rs +++ /dev/null @@ -1,325 +0,0 @@ -use crate::{parsing::fields::unknown, prelude::*}; -use std::borrow::Cow; -use std::collections::HashMap; - -use super::multipart; - -pub fn raw_entity(mut input: Cow<[u8]>) -> Result { - let ( - encoding, - mime_type, - subtype, - parameters, - additional_headers, - id, - description, - disposition, - ) = match input { - Cow::Borrowed(ref mut input) => { - let (len, r) = header_part(input)?; - *input = &input[input.len() - len..]; - r - } - Cow::Owned(ref mut input) => { - let (len, r) = header_part_owned(input)?; - input.drain(..input.len() - len); - r - } - }; - let value = decode_value(input, encoding)?; - - Ok(RawEntity { - mime_type, - subtype, - description, - id, - parameters, - disposition, - value, - additional_headers, - }) -} - -pub fn entity<'a>(raw_entity: &'a RawEntity<'a>) -> Result, Error> { - if raw_entity.mime_type == ContentType::Multipart { - return Ok(Entity::Multipart { - subtype: &raw_entity.subtype, - content: multipart::parse_multipart(raw_entity.value.as_ref(), &raw_entity.parameters)?, - }); - } - - if raw_entity.mime_type == ContentType::Text { - use textcode::*; - - // TODO: auto charset for plain text - - if let Some(charset) = raw_entity.parameters.get("charset") { - let charset = charset.to_lowercase(); - - let value: Cow = match charset.as_str() { - "utf-8" | "us-ascii" => Cow::Borrowed( - std::str::from_utf8(raw_entity.value.as_ref()) - .map_err(|_| Error::Unknown("Invalid text encoding"))?, - ), - "iso-8859-1" => Cow::Owned(iso8859_1::decode_to_string(&raw_entity.value)), - "iso-8859-2" => Cow::Owned(iso8859_2::decode_to_string(&raw_entity.value)), - "iso-8859-3" => Cow::Owned(iso8859_3::decode_to_string(&raw_entity.value)), - "iso-8859-4" => Cow::Owned(iso8859_4::decode_to_string(&raw_entity.value)), - "iso-8859-5" => Cow::Owned(iso8859_5::decode_to_string(&raw_entity.value)), - "iso-8859-6" => Cow::Owned(iso8859_6::decode_to_string(&raw_entity.value)), - "iso-8859-7" => Cow::Owned(iso8859_7::decode_to_string(&raw_entity.value)), - "iso-8859-8" => Cow::Owned(iso8859_8::decode_to_string(&raw_entity.value)), - "iso-8859-9" => Cow::Owned(iso8859_9::decode_to_string(&raw_entity.value)), - "iso-8859-10" => Cow::Owned(iso8859_10::decode_to_string(&raw_entity.value)), - "iso-8859-11" => Cow::Owned(iso8859_11::decode_to_string(&raw_entity.value)), - "iso-8859-13" => Cow::Owned(iso8859_13::decode_to_string(&raw_entity.value)), - "iso-8859-14" => Cow::Owned(iso8859_14::decode_to_string(&raw_entity.value)), - "iso-8859-15" => Cow::Owned(iso8859_15::decode_to_string(&raw_entity.value)), - "iso-8859-16" => Cow::Owned(iso8859_16::decode_to_string(&raw_entity.value)), - "iso-6937" => Cow::Owned(iso6937::decode_to_string(&raw_entity.value)), - "gb2312" => Cow::Owned(gb2312::decode_to_string(&raw_entity.value)), - _ => return Ok(Entity::Unknown), - }; - - return Ok(Entity::Text { - subtype: &raw_entity.subtype, - value, - }); - } - } - - Ok(Entity::Unknown) -} - -pub fn header_part_owned( - input: &[u8], -) -> Result< - ( - usize, - ( - ContentTransferEncoding<'static>, - ContentType<'static>, - Cow<'static, str>, - HashMap, Cow<'static, str>>, - Vec<(Cow<'static, str>, Cow<'static, str>)>, - Option<(Cow<'static, str>, Cow<'static, str>)>, - Option>, - Option>, - ), - ), - Error, -> { - let (r, (ct, mt, st, p, ah, id, desc, disp)) = header_part(&input)?; - - Ok(( - r, - ( - ct.into_owned(), - mt.into_owned(), - Cow::Owned(st.into_owned()), - p.into_iter() - .map(|(n, v)| (Cow::Owned(n.into_owned()), Cow::Owned(v.into_owned()))) - .collect(), - ah.into_iter() - .map(|(n, v)| (Cow::Owned(n.into_owned()), Cow::Owned(v.into_owned()))) - .collect(), - id.map(|(f, l)| (Cow::Owned(f.into_owned()), Cow::Owned(l.into_owned()))), - desc.map(|desc| (Cow::Owned(desc.into_owned()))), - disp.map(|disp| disp.into_owned()), - ), - )) -} - -pub fn header_part( - mut input: &[u8], -) -> Result< - ( - usize, - ( - ContentTransferEncoding, - ContentType, - Cow, - HashMap, Cow>, - Vec<(Cow, Cow)>, - Option<(Cow, Cow)>, - Option>, - Option, - ), - ), - Error, -> { - let mut encoding = None; - let mut mime_type = None; - let mut id = None; - let mut description = None; - let mut disposition = None; - let mut additional_headers = Vec::new(); - - loop { - if let Ok((new_input, content_transfer_encoding)) = content_transfer_encoding(input) { - input = new_input; - encoding = Some(content_transfer_encoding); - } else if let Ok((new_input, content_type)) = content_type(input) { - input = new_input; - mime_type = Some(content_type); - } else if let Ok((new_input, cid)) = content_id(input) { - input = new_input; - id = Some(cid); - } else if let Ok((new_input, cdescription)) = content_description(input) { - input = new_input; - description = Some(cdescription); - } else { - if cfg!(feature = "content-disposition") { - if let Ok((new_input, cdisposition)) = content_disposition(input) { - input = new_input; - disposition = Some(cdisposition); - continue; - } - } - - if let Ok((new_input, (name, value))) = unknown(input) { - input = new_input; - additional_headers.push((Cow::Borrowed(name), value)); - continue; - } - - break; - } - } - - let encoding = encoding.unwrap_or(ContentTransferEncoding::SevenBit); - let (mime_type, subtype, parameters) = mime_type.unwrap_or(( - ContentType::Text, - Cow::Borrowed("plain"), - vec![(Cow::Borrowed("charset"), Cow::Borrowed("us-ascii"))] - .into_iter() - .collect(), - )); - - if input.is_empty() { - return Ok(( - input.len(), - ( - encoding, - mime_type, - subtype, - parameters, - additional_headers, - id, - description, - disposition, - ), - )); - } - - let (input, _) = tag( - &input, - b"\r\n", - "TAG ERROR: A MIME entity header part must be followed by a CRLF sequence.", - )?; - - Ok(( - input.len(), - ( - encoding, - mime_type, - subtype, - parameters, - additional_headers, - id, - description, - disposition, - ), - )) -} - -pub fn decode_value<'a>( - value: Cow<'a, [u8]>, - encoding: ContentTransferEncoding, -) -> Result, Error> { - Ok(match encoding { - ContentTransferEncoding::Base64 => { - Cow::Owned(super::base64::decode_base64(value.into_owned())?) - } - ContentTransferEncoding::SevenBit => value, // No need to check, we have to be tolerant - ContentTransferEncoding::HeightBit => value, - ContentTransferEncoding::QuotedPrintable => { - Cow::Owned(super::quoted_printables::decode_qp(value.into_owned())) - } - ContentTransferEncoding::Unknown(_) => { - return Err(Error::Unknown("Unknown format")); // FIXME: Allow user to get this data - } - ContentTransferEncoding::Binary => value, - }) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn raw_entity_test() { - assert_eq!( - RawEntity { - mime_type: ContentType::Text, - subtype: "plain".into(), - description: None, - id: None, - parameters: vec![(Cow::Borrowed("charset"), Cow::Borrowed("us-ascii"))] - .into_iter() - .collect(), - value: Cow::Borrowed(&[84, 101, 120, 116]), - #[cfg(feature = "content-disposition")] - disposition: None, - additional_headers: vec![], - }, - raw_entity(Cow::Borrowed(b"\r\nText")).unwrap() - ); - assert_eq!( - RawEntity { - mime_type: ContentType::Text, - subtype: "plain".into(), - description: None, - id: None, - parameters: vec![(Cow::Borrowed("charset"), Cow::Borrowed("us-ascii"))] - .into_iter() - .collect(), - value: Cow::Borrowed(&[84, 101, 120, 116]), - #[cfg(feature = "content-disposition")] - disposition: None, - additional_headers: vec![], - }, - raw_entity(Cow::Owned(b"\r\nText".to_vec())).unwrap() - ); - assert_eq!( - RawEntity { - mime_type: ContentType::Text, - subtype: "html".into(), - description: None, - id: None, - parameters: vec![(Cow::Borrowed("charset"), Cow::Borrowed("utf-8"))] - .into_iter() - .collect(), - value: Cow::Borrowed(&[60, 112, 62, 84, 101, 120, 116, 60, 47, 112, 62]), - #[cfg(feature = "content-disposition")] - disposition: None, - additional_headers: vec![("Unknown".into(), " Test".into())], - }, - raw_entity(Cow::Owned( - b"Content-type: text/html; charset=utf-8\r\nUnknown: Test\r\n\r\n

Text

" - .to_vec() - )) - .unwrap() - ); - - println!("{:?}", raw_entity(Cow::Borrowed(b"Content-type: multipart/alternative; boundary=\"simple boundary\"\r\n\r\nThis is the preamble. It is to be ignored, though it\r\nis a handy place for composition agents to include an\r\nexplanatory note to non-MIME conformant readers.\r\n\r\n--simple boundary\r\n\r\nThis is implicitly typed plain US-ASCII text.\r\nIt does NOT end with a linebreak.\r\n--simple boundary\r\nContent-type: text/plain; charset=us-ascii\r\n\r\nThis is explicitly typed plain US-ASCII text.\r\nIt DOES end with a linebreak.\r\n\r\n--simple boundary--\r\n\r\nThis is the epilogue. It is also to be ignored.")).unwrap()); - } - - #[test] - fn entity_test() { - assert_eq!(Entity::Text { - subtype: &"html".into(), - value: "

Testé

".into() - }, raw_entity(Cow::Owned(b"Content-type: text/html; charset=utf-8\r\nContent-Transfer-Encoding: quoted-printable\r\n\r\n

Test=C3=A9

".to_vec())).unwrap().parse().unwrap()); - } -} diff --git a/email-parser/src/parsing/mime/mime_fields.rs b/email-parser/src/parsing/mime/mime_fields.rs deleted file mode 100644 index 06660c4..0000000 --- a/email-parser/src/parsing/mime/mime_fields.rs +++ /dev/null @@ -1,618 +0,0 @@ -use crate::mime::*; -use crate::prelude::*; -use std::borrow::Cow; -use std::collections::HashMap; - -#[inline] -fn ignore_inline_cfws(input: &[u8]) -> Res<()> { - triplet( - input, - |input| take_while(input, is_wsp), - |input| Ok(optional(input, comment)), - |input| take_while(input, is_wsp), - ) - .map(|(i, _)| (i, ())) -} - -#[inline] -fn token(input: &[u8]) -> Res<&str> { - take_while1(input, |c| { - c > 0x1F && c < 0x7F && !is_wsp(c) && !tspecial(c) - }) -} - -pub fn mime_version(input: &[u8]) -> Res<(u8, u8)> { - let (input, ()) = tag_no_case( - input, - b"MIME-Version:", - b"mime-vERSION:", - "TAG NO CASE ERROR: Header name (Mime-Version) does not match.", - )?; - let (input, _) = optional(input, cfws); - - fn u8_number(input: &[u8]) -> Res { - let (mut input, mut number) = digit(input)?; - - while let Ok((new_input, new_digit)) = digit(input) { - input = new_input; - number = number - .checked_mul(10) - .ok_or(Error::Unknown("Overflow while reading u8."))?; - number = number - .checked_add(new_digit) - .ok_or(Error::Unknown("Overflow while reading u8."))?; - } - - Ok((input, number)) - } - - let (input, d1) = u8_number(input)?; - let (input, ()) = tag( - input, - b".", - "TAG ERROR: A MIME version's major version number must be followed by a `.`.", - )?; - let (input, d2) = u8_number(input)?; - - let (input, _cwfs) = ignore_inline_cfws(input)?; - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`MIME-Version` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, (d1, d2))) -} - -fn parameter(input: &[u8]) -> Res<(Cow, Option, bool, Cow)> { - let (input, _) = optional(input, cfws); - let (input, ()) = tag( - input, - b";", - "TAG ERROR: A MIME parameter must start with `;`.", - )?; - let (input, _) = optional(input, cfws); - let (input, (mut name, index, encoded)) = match_parsers( - input, - &mut [ - |input| { - let (input, name) = take_while1(input, |c| { - c > 0x1F && c < 0x7F && !is_wsp(c) && !tspecial(c) && c != b'*' - })?; - - let (mut input, index) = optional(input, |input| { - pair( - input, - |input| { - tag( - input, - b"*", - "TAG ERROR: An indexed MIME parameter name must contain a `*`.", - ) - }, - |input| take_while1(input, is_digit), - ) - }); - let index = if let Some(((), index)) = index { - Some( - index - .parse::() - .map_err(|_| Error::Unknown("Invalid index"))?, - ) - } else { - None - }; - - let encoded = if input.get(0) == Some(&b'*') { - input = &input[1..]; - true - } else { - false - }; - - if input.get(0) == Some(&b'=') { - Ok((input, (Cow::Borrowed(name), index, encoded))) - } else { - Err(Error::Unknown("It wont work with this method")) - } - }, - |input| { - let (input, name) = token(input)?; - Ok((input, (Cow::Borrowed(name), None, false))) - }, - ][..], - )?; - - name = lowercase(name); - - let (input, ()) = tag( - input, - b"=", - "TAG ERROR: A MIME parameter name must be followed by a `=`.", - )?; - let (input, value) = match_parsers( - input, - &mut [ - |input| { - let (input, value) = token(input)?; - Ok((input, Cow::Borrowed(value))) - }, - |input| quoted_string(input), - ][..], - )?; - - Ok((input, (name, index, encoded, value))) -} - -pub fn content_type(input: &[u8]) -> Res<(ContentType, Cow, HashMap, Cow>)> { - let (input, ()) = tag_no_case( - input, - b"Content-Type:", - b"cONTENT-tYPE:", - "TAG NO CASE ERROR: Header name (Content-Type) does not match.", - )?; - let (input, _) = optional(input, cfws); - - let (input, mime_type) = match_parsers( - input, - &mut [ - |input| { - tag_no_case( - input, - b"text", - b"TEXT", - "TAG NO CASE ERROR: In a content type header, `text` type does not match.", - ) - .map(|(i, ())| (i, ContentType::Text)) - }, - |input| { - tag_no_case( - input, - b"multipart", - b"MULTIPART", - "TAG NO CASE ERROR: In a content type header, `multipart` type does not match.", - ) - .map(|(i, ())| (i, ContentType::Multipart)) - }, - |input| { - tag_no_case(input, b"application", b"APPLICATION", "TAG NO CASE ERROR: In a content type header, `application` type does not match.") - .map(|(i, ())| (i, ContentType::Application)) - }, - |input| { - tag_no_case( - input, - b"image", - b"IMAGE", - "TAG NO CASE ERROR: In a content type header, `image` type does not match.", - ) - .map(|(i, ())| (i, ContentType::Image)) - }, - |input| { - tag_no_case( - input, - b"video", - b"VIDEO", - "TAG NO CASE ERROR: In a content type header, `video` type does not match.", - ) - .map(|(i, ())| (i, ContentType::Video)) - }, - |input| { - tag_no_case( - input, - b"audio", - b"AUDIO", - "TAG NO CASE ERROR: In a content type header, `audio` type does not match.", - ) - .map(|(i, ())| (i, ContentType::Audio)) - }, - |input| { - tag_no_case( - input, - b"message", - b"MESSAGE", - "TAG NO CASE ERROR: In a content type header, `message` type does not match.", - ) - .map(|(i, ())| (i, ContentType::Message)) - }, - |input| { - // TODO ietf token - let (input, name) = token(input)?; - let name = lowercase(Cow::Borrowed(name)); - - Ok((input, ContentType::Unknown(name))) - }, - ][..], - )?; - let (input, ()) = tag( - input, - b"/", - "TAG ERROR: A MIME content type must have a `/` separating the type and the subtype.", - )?; - let (input, subtype) = token(input)?; - let subtype = lowercase(Cow::Borrowed(subtype)); - - let (input, parameters_vec) = many(input, parameter)?; - let parameters = super::percent_encoding::collect_parameters(parameters_vec)?; - - let (input, ()) = ignore_inline_cfws(input)?; - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`Content-Type` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, (mime_type, subtype, parameters))) -} - -pub fn content_disposition(input: &[u8]) -> Res { - use crate::parsing::time::date_time; - - let (input, ()) = tag_no_case( - input, - b"Content-Disposition:", - b"cONTENT-dISPOSITION:", - "TAG NO CASE ERROR: Header name (Content-Disposition) does not match.", - )?; - let (input, _) = optional(input, cfws); - - let (mut input, disposition_type) = match_parsers( - input, - &mut [ - |input| { - tag_no_case(input, b"inline", b"INLINE", "TAG NO CASE ERROR: In a content disposition header, `inline` disposition does not match.").map(|(i, ())| (i, DispositionType::Inline)) - }, - |input| { - tag_no_case(input, b"attachment", b"ATTACHMENT", "TAG NO CASE ERROR: In a content disposition header, `attachment` disposition does not match.") - .map(|(i, ())| (i, DispositionType::Attachment)) - }, - |input| { - // TODO ietf token - let (input, name) = token(input)?; - let name = lowercase(Cow::Borrowed(name)); - - Ok((input, DispositionType::Unknown(name))) - }, - ][..], - )?; - - let mut disposition = Disposition { - disposition_type, - unstructured: HashMap::new(), - creation_date: None, - modification_date: None, - read_date: None, - filename: None, - }; - let mut parameters_vec = Vec::new(); - loop { - fn filename_parameter(input: &[u8]) -> Res> { - let (input, _) = optional(input, cfws); - let (input, ()) = tag(input, b";", "TAG ERROR: In a Content-Disposition header, a filename parameter must start with a `;`.")?; - let (input, _) = optional(input, cfws); - let (input, ()) = tag_no_case(input, b"filename", b"FILENAME", "TAG NO CASE ERROR: In a Content-Disposition header, the name of the parameter does not match a filename parameter.")?; - - let (input, ()) = tag(input, b"=", "TAG ERROR: In a Content-Disposition header, a filename parameter value must be preceded by a `=`.")?; - let (input, value) = match_parsers( - input, - &mut [ - |input| { - let (input, value) = token(input)?; - Ok((input, Cow::Borrowed(value))) - }, - |input| quoted_string(input), - ][..], - )?; - - Ok((input, value)) - } - - fn date_parameter<'a>( - input: &'a [u8], - name: &'static [u8], - name_uppercase: &'static [u8], - ) -> Res<'a, DateTime> { - let (input, _) = optional(input, cfws); - let (input, ()) = tag(input, b";", "TAG ERROR: In a Content-Disposition header, a date parameter must start with a `;`.")?; - let (input, _) = optional(input, cfws); - let (input, ()) = tag_no_case(input, name, name_uppercase, "TAG NO CASE ERROR: In a Content-Disposition header, the name of the parameter does not match a date parameter.")?; - - let (input, ()) = tag(input, b"=\"", "TAG ERROR: In a Content-Disposition header, a date parameter value must be preceded by `=\"`.")?; - let (input, value) = date_time(input)?; - let (input, ()) = tag(input, b"\"", "TAG ERROR: In a Content-Disposition header, a date parameter value must be closed by a `\"`.")?; - - Ok((input, value)) - } - - if let Ok((new_input, value)) = filename_parameter(input) { - disposition.filename = Some(value); - input = new_input; - } else if let Ok((new_input, value)) = - date_parameter(input, b"creation-date", b"CREATION-DATE") - { - disposition.creation_date = Some(value); - input = new_input; - } else if let Ok((new_input, value)) = - date_parameter(input, b"modification-date", b"MODIFICATION-DATE") - { - disposition.modification_date = Some(value); - input = new_input; - } else if let Ok((new_input, value)) = date_parameter(input, b"read-date", b"READ-DATE") { - disposition.read_date = Some(value); - input = new_input; - } else if let Ok((new_input, (name, index, encoded, value))) = parameter(input) { - parameters_vec.push((name, index, encoded, value)); - input = new_input; - } else { - break; - } - } - disposition.unstructured = super::percent_encoding::collect_parameters(parameters_vec)?; - - let (input, ()) = ignore_inline_cfws(input)?; - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`Content-Disposition` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, disposition)) -} - -pub fn content_transfer_encoding(input: &[u8]) -> Res { - let (input, ()) = tag_no_case( - input, - b"Content-Transfer-Encoding:", - b"cONTENT-tRANSFER-eNCODING:", - "TAG NO CASE ERROR: Header name (Content-Transfer-Encoding) does not match.", - )?; - let (input, _) = optional(input, cfws); - - let (input, encoding) = match_parsers( - input, - &mut [ - |input| { - tag_no_case(input, b"7bit", b"7BIT", "TAG NO CASE ERROR: In a content transfer encoding header, `7bit` encoding does not match.") - .map(|(i, ())| (i, ContentTransferEncoding::SevenBit)) - }, - |input| { - tag_no_case(input, b"quoted-printable", b"QUOTED-PRINTABLE", "TAG NO CASE ERROR: In a content transfer encoding header, `quoted-printable` encoding does not match.") - .map(|(i, ())| (i, ContentTransferEncoding::QuotedPrintable)) - }, - |input| { - tag_no_case(input, b"base64", b"BASE64", "TAG NO CASE ERROR: In a content transfer encoding header, `base64` encoding does not match.") - .map(|(i, ())| (i, ContentTransferEncoding::Base64)) - }, - |input| { - tag_no_case(input, b"8bit", b"8BIT", "TAG NO CASE ERROR: In a content transfer encoding header, `8bit` encoding does not match.") - .map(|(i, ())| (i, ContentTransferEncoding::HeightBit)) - }, - |input| { - tag_no_case(input, b"binary", b"BINARY", "TAG NO CASE ERROR: In a content transfer encoding header, `binary` encoding does not match.") - .map(|(i, ())| (i, ContentTransferEncoding::Binary)) - }, - |input| { - let (input, encoding) = token(input)?; - let encoding = lowercase(Cow::Borrowed(encoding)); - - Ok((input, ContentTransferEncoding::Unknown(encoding))) - }, - ][..], - )?; - - let (input, _cwfs) = ignore_inline_cfws(input)?; - let (input, ()) = tag(input, b"\r\n", "TAG ERROR: A header (`Content-Transfer-Encoding` in this case) must end with a CRLF sequence.")?; - - Ok((input, encoding)) -} - -pub fn content_id(input: &[u8]) -> Res<(Cow, Cow)> { - let (input, ()) = tag_no_case( - input, - b"Content-ID:", - b"cONTENT-id:", - "TAG NO CASE ERROR: Header name (Content-ID) does not match.", - )?; - let (input, id) = crate::parsing::address::message_id(input)?; - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`Content-ID` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, id)) -} - -pub fn content_description(input: &[u8]) -> Res> { - let (input, ()) = tag_no_case( - input, - b"Content-Description:", - b"cONTENT-dESCRIPTION:", - "TAG NO CASE ERROR: Header name (Content-Description) does not match.", - )?; - let (input, description) = mime_unstructured(input)?; - let (input, ()) = tag( - input, - b"\r\n", - "TAG ERROR: A header (`Content-Description` in this case) must end with a CRLF sequence.", - )?; - - Ok((input, description)) -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_content_disposition() { - assert_eq!( - Disposition { - disposition_type: DispositionType::Inline, - filename: None, - creation_date: None, - modification_date: None, - read_date: None, - unstructured: HashMap::new() - }, - content_disposition(b"Content-Disposition: inline\r\n") - .unwrap() - .1 - ); - assert_eq!(Disposition { disposition_type: DispositionType::Attachment, filename: Some("genome.jpeg".into()), creation_date: None, modification_date: Some(DateTime { day_name: Some(Day::Wednesday), date: Date { day: 12, month: Month::February, year: 1997 }, time: TimeWithZone { time: Time { hour: 16, minute: 29, second: 51 }, zone: Zone { sign: false, hour_offset: 5, minute_offset: 0 } } }), read_date: None, unstructured: HashMap::new() }, content_disposition(b"Content-Disposition: attachment; filename=genome.jpeg;\r\n modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\"\r\n").unwrap().1); - assert_eq!( - Disposition { - disposition_type: DispositionType::Attachment, - filename: None, - creation_date: None, - modification_date: None, - read_date: None, - unstructured: vec![("param".into(), "foobar".into())] - .into_iter() - .collect() - }, - content_disposition( - b"Content-Disposition: attachment; param*0=foo;\r\n param*1=bar\r\n" - ) - .unwrap() - .1 - ); - } - - #[test] - fn test_content_id() { - assert_eq!( - content_id(b"Content-ID: <123456@mubelotix.dev>\r\n") - .unwrap() - .1 - .0, - "123456" - ); - assert_eq!( - content_id(b"cOntent-id: \r\n") - .unwrap() - .1 - .1, - "gmail.com" - ); - } - - #[test] - fn test_content_description() { - assert_eq!( - content_description( - b"Content-Description:a picture of the Space Shuttle Endeavor.\r\n" - ) - .unwrap() - .1, - "a picture of the Space Shuttle Endeavor." - ); - assert_eq!( - content_description(b"Content-DeScription:Ferris the crab\r\n") - .unwrap() - .1, - "Ferris the crab" - ); - } - - #[test] - fn test_mime_version() { - assert_eq!(mime_version(b"MIME-Version: 1.0\r\n").unwrap().1, (1, 0)); - assert_eq!(mime_version(b"MIME-VersIon: 1.2\r\n").unwrap().1, (1, 2)); - assert_eq!( - mime_version(b"MIME-VersIon: (produced by MetaSend Vx.x) 2.0\r\n") - .unwrap() - .1, - (2, 0) - ); - assert_eq!( - mime_version(b"MIME-VersIon: 214.25 (produced by MetaSend Vx.x)\r\n") - .unwrap() - .1, - (214, 25) - ); - } - - #[test] - fn test_content_type() { - assert_eq!( - content_type(b"Content-type: tExt/plain\r\n").unwrap().1 .0, - ContentType::Text - ); - assert_eq!( - (ContentType::Message, "external-body".into(), vec![("access-type".into(), "URL".into()), ("url".into(), "ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar".into())].into_iter().collect()), content_type(b"Content-Type: message/external-body; access-type=URL;\r\n URL*0=\"ftp://\";\r\n URL*1=\"cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar\"\r\n").unwrap().1, - ); - assert_eq!( - (ContentType::Application, "x-stuff".into(), vec![("title".into(), "This is even more ***fun*** isn\'t it!".into())].into_iter().collect()), content_type(b"Content-Type: application/x-stuff;\r\n title*0*=us-ascii'en'This%20is%20even%20more%20;\r\n title*1*=%2A%2A%2Afun%2A%2A%2A%20;\r\n title*2=\"isn't it!\"\r\n").unwrap().1, - ); - assert_eq!( - content_type(b"Content-type: text/plain\r\n").unwrap().1 .1, - "plain" - ); - assert_eq!( - content_type(b"Content-type: multIpart/unknown\r\n") - .unwrap() - .1 - .0, - ContentType::Multipart - ); - assert_eq!( - content_type(b"Content-Type: text/plain; chaRSet=\"iso-8859-1\"\r\n") - .unwrap() - .1 - .2 - .get("charset") - .unwrap(), - "iso-8859-1" - ); - assert_eq!( - content_type(b"Content-Type: text/plain; charset=\"iso-8859-1\"\r\n") - .unwrap() - .1 - .2 - .get("charset") - .unwrap(), - "iso-8859-1" - ); - assert_eq!( - content_type(b"Content-type: text/plain; charset=us-ascii (Plain text)\r\n") - .unwrap() - .1 - .2 - .get("charset") - .unwrap(), - "us-ascii" - ); - assert_eq!( - content_type(b"Content-type: text/plain; charset=\"us-ascii\"\r\n") - .unwrap() - .1 - .2 - .get("charset") - .unwrap(), - "us-ascii" - ); - assert_eq!(content_type(b"Content-Type: multipart/alternative; \r\n\tboundary=\"_000_DB6P193MB0021E64E5870F10170A32CB8EB920DB6P193MB0021EURP_\"\r\n").unwrap().1.2.get("boundary").unwrap(), "_000_DB6P193MB0021E64E5870F10170A32CB8EB920DB6P193MB0021EURP_"); - } - - #[test] - fn test_content_transfer_encoding() { - assert_eq!( - content_transfer_encoding(b"Content-Transfer-Encoding: 7bit\r\n") - .unwrap() - .1, - ContentTransferEncoding::SevenBit - ); - assert_eq!( - content_transfer_encoding(b"Content-Transfer-Encoding: binary (invalid) \r\n") - .unwrap() - .1, - ContentTransferEncoding::Binary - ); - assert_eq!( - content_transfer_encoding(b"Content-Transfer-Encoding: (not readable) base64 \r\n") - .unwrap() - .1, - ContentTransferEncoding::Base64 - ); - } -} diff --git a/email-parser/src/parsing/mime/mod.rs b/email-parser/src/parsing/mime/mod.rs deleted file mode 100644 index 695a5d5..0000000 --- a/email-parser/src/parsing/mime/mod.rs +++ /dev/null @@ -1,8 +0,0 @@ -pub(crate) mod base64; -/// RFC 2047 -pub(crate) mod encoded_headers; -pub(crate) mod entity; -pub(crate) mod mime_fields; -pub(crate) mod multipart; -pub(crate) mod percent_encoding; -pub(crate) mod quoted_printables; diff --git a/email-parser/src/parsing/mime/multipart.rs b/email-parser/src/parsing/mime/multipart.rs deleted file mode 100644 index 82c6123..0000000 --- a/email-parser/src/parsing/mime/multipart.rs +++ /dev/null @@ -1,164 +0,0 @@ -use crate::prelude::*; -use std::borrow::Cow; -use std::collections::HashMap; - -fn before_boundary_idx(input: &[u8], boundary: &[u8]) -> Result<(usize, usize), Error> { - let full_boundary_len = 2 + 2 + boundary.len() + 2; - - // FIXME: ignore whitespaces after the boundary - if full_boundary_len - 2 <= input.len() - && input.get(..2) == Some(b"--") - && input.get(2..2 + boundary.len()) == Some(boundary) - && input.get(2 + boundary.len()..full_boundary_len - 2) == Some(b"\r\n") - { - return Ok((0, full_boundary_len - 2)); - } - - for idx in 0..input.len() { - // FIXME: ignore whitespaces after the boundary - if input.get(idx..idx + 4) == Some(b"\r\n--") - && input.get(idx + 4..idx + 4 + boundary.len()) == Some(boundary) - && input.get(idx + 4 + boundary.len()..idx + full_boundary_len) == Some(b"\r\n") - { - return Ok((idx, full_boundary_len)); - } - } - - Err(Error::Unknown("boundary not found")) -} - -fn before_boundary<'a, 'b>(input: &'a [u8], boundary: &'b [u8]) -> Res<'a, &'a [u8]> { - let (before, len) = before_boundary_idx(input, boundary)?; - - // JUSTIFICATION - // Benefit - // Improve performances by avoiding checking the range. - // Correctness - // The function cannot return out of range indexes. - unsafe { - Ok(( - input.get_unchecked(before + len..), - input.get_unchecked(..before), - )) - } -} - -fn before_closing_boundary_idx(input: &[u8], boundary: &[u8]) -> Result<(usize, usize), Error> { - let full_boundary_len = 2 + 2 + boundary.len() + 2 + 2; - for idx in 0..input.len() { - // FIXME: ignore whitespaces after the boundary - if input.get(idx..idx + 4) == Some(b"\r\n--") - && input.get(idx + 4..idx + 4 + boundary.len()) == Some(boundary) - && input.get(idx + 4 + boundary.len()..idx + full_boundary_len) == Some(b"--\r\n") - { - return Ok((idx, full_boundary_len)); - } - } - - Err(Error::Unknown("closing boundary not found")) -} - -fn before_closing_boundary<'a, 'b>(input: &'a [u8], boundary: &'b [u8]) -> Res<'a, &'a [u8]> { - let (before, len) = before_closing_boundary_idx(input, boundary)?; - - // JUSTIFICATION - // Benefit - // Improve performances by avoiding checking the range. - // Correctness - // The function returning indexes cannot return out of range. - unsafe { - Ok(( - input.get_unchecked(before + len..), - input.get_unchecked(..before), - )) - } -} - -pub fn parse_multipart<'a>( - input: &'a [u8], - parameters: &HashMap, Cow>, -) -> Result>, Error> { - let boundary = parameters - .get("boundary") - .ok_or(Error::Unknown("Missing boundary parameter"))?; - let (input, mut parts) = many(&input, |i| before_boundary(i, boundary.as_bytes()))?; - let (_epilogue, last_part) = before_closing_boundary(input, boundary.as_bytes())?; - parts.push(last_part); - parts.remove(0); // the prelude - - let mut entities = Vec::new(); - for part in parts { - entities.push(super::entity::raw_entity(Cow::Borrowed(part))?); - } - - Ok(entities) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_multipart() { - parse_multipart( - b"This is the preamble. It is to be ignored, though it\r\nis a handy place for composition agents to include an\r\nexplanatory note to non-MIME conformant readers.\r\n\r\n--simple boundary\r\n\r\nThis is implicitly typed plain US-ASCII text.\r\nIt does NOT end with a linebreak.\r\n--simple boundary\r\nContent-type: text/plain; charset=us-ascii\r\n\r\nThis is explicitly typed plain US-ASCII text.\r\nIt DOES end with a linebreak.\r\n\r\n--simple boundary--\r\n\r\nThis is the epilogue. It is also to be ignored.", - &vec![(Cow::Borrowed("boundary"), Cow::Borrowed("simple boundary"))].into_iter().collect(), - ) - .unwrap(); - } - - #[test] - fn test_boundary() { - assert_eq!( - b"", - before_boundary( - b"--boundary\r\nI am making a not here: huge success", - b"boundary" - ) - .unwrap() - .1 - ); - assert_eq!( - b"aeiouy", - before_boundary(b"aeiouy\r\n--boundary\r\n", b"boundary") - .unwrap() - .1 - ); - assert_eq!( - b"This was a triumph", - before_boundary( - b"This was a triumph\r\n--boundary\r\nI am making a not here: huge success", - b"boundary" - ) - .unwrap() - .1 - ); - assert_eq!( - b"", - before_boundary( - b"\r\n--boundary\r\nI am making a not here: huge success", - b"boundary" - ) - .unwrap() - .1 - ); - assert_eq!( - b"I am making a not here: huge success", - before_boundary( - b"This was a triumph\r\n--boundary\r\nI am making a not here: huge success", - b"boundary" - ) - .unwrap() - .0 - ); - assert_eq!( - b"I am making a not here: huge success", - before_closing_boundary( - b"This was a triumph\r\n--boundary--\r\nI am making a not here: huge success", - b"boundary" - ) - .unwrap() - .0 - ); - } -} diff --git a/email-parser/src/parsing/mime/percent_encoding.rs b/email-parser/src/parsing/mime/percent_encoding.rs deleted file mode 100644 index 643fb9f..0000000 --- a/email-parser/src/parsing/mime/percent_encoding.rs +++ /dev/null @@ -1,202 +0,0 @@ -use crate::prelude::*; -use std::{borrow::Cow, collections::HashMap}; - -pub fn decode_parameter(mut input: Vec, charset: Cow) -> Result { - if ![ - "utf-8", - "us-ascii", - "iso-8859-1", - "iso-8859-2", - "iso-8859-3", - "iso-8859-4", - "iso-8859-5", - "iso-8859-6", - "iso-8859-7", - "iso-8859-8", - "iso-8859-9", - "iso-8859-10", - "iso-8859-11", - "iso-8859-13", - "iso-8859-14", - "iso-8859-15", - "iso-8859-16", - "iso-6937", - "gb2312", - ] - .contains(&charset.as_ref()) - { - // JUSTIFICATION - // Benefit - // Gain performances by avoiding the utf8 string check. - // Correctness - // It's valid ASCII so it cannot be invalid utf8. - return Ok(unsafe { String::from_utf8_unchecked(input.to_vec()) }); - } - - let mut percents = Vec::new(); - for (idx, byte) in input.iter().enumerate() { - if *byte == b'%' { - percents.push(idx); - } - } - - for percent in percents.iter().rev() { - fn from_hex(n: &u8) -> Option { - match n { - b'0'..=b'9' => Some(n - b'0'), - b'A'..=b'F' => Some(10 + n - b'A'), - b'a'..=b'f' => Some(10 + n - b'a'), - _ => None, - } - } - - if let (Some(first), Some(second)) = (input.get(percent + 1), input.get(percent + 2)) { - if let (Some(first), Some(second)) = (from_hex(first), from_hex(second)) { - let n = first * 16 + second; - // JUSTIFICATION - // Benefit - // Improve performances by avoiding useless index checks. - // Correctness: - // We never delete items before `percent`. Since `percent` is decreasing, there is always an item at this index. - unsafe { - *input.get_unchecked_mut(*percent) = n; - } - input.remove(percent + 2); - input.remove(percent + 1); - } - } - } - - use textcode::*; - let text = match charset.as_ref() { - "utf-8" | "us-ascii" => { - String::from_utf8(input).map_err(|_| Error::Unknown("Invalid text encoding"))? - } - "iso-8859-1" => iso8859_1::decode_to_string(&input), - "iso-8859-2" => iso8859_2::decode_to_string(&input), - "iso-8859-3" => iso8859_3::decode_to_string(&input), - "iso-8859-4" => iso8859_4::decode_to_string(&input), - "iso-8859-5" => iso8859_5::decode_to_string(&input), - "iso-8859-6" => iso8859_6::decode_to_string(&input), - "iso-8859-7" => iso8859_7::decode_to_string(&input), - "iso-8859-8" => iso8859_8::decode_to_string(&input), - "iso-8859-9" => iso8859_9::decode_to_string(&input), - "iso-8859-10" => iso8859_10::decode_to_string(&input), - "iso-8859-11" => iso8859_11::decode_to_string(&input), - "iso-8859-13" => iso8859_13::decode_to_string(&input), - "iso-8859-14" => iso8859_14::decode_to_string(&input), - "iso-8859-15" => iso8859_15::decode_to_string(&input), - "iso-8859-16" => iso8859_16::decode_to_string(&input), - "iso-6937" => iso6937::decode_to_string(&input), - "gb2312" => gb2312::decode_to_string(&input), - _ => return Err(Error::Unknown("Unknown charset")), - }; - - Ok(text) -} - -pub fn collect_parameters<'a>( - parameters_vec: Vec<(Cow<'a, str>, Option, bool, Cow<'a, str>)>, -) -> Result, Cow<'a, str>>, Error> { - let mut parameters: HashMap<_, _> = HashMap::new(); - let mut complex_parameters: HashMap<_, HashMap<_, _>> = HashMap::new(); - for (name, index, encoded, value) in parameters_vec { - if let Some(index) = index { - if !complex_parameters.contains_key(&name) { - complex_parameters.insert(name.clone(), HashMap::new()); - } - complex_parameters - .get_mut(&name) - .unwrap() - .insert(index, (encoded, value)); - } else { - parameters.insert(name, value); - } - } - for (name, values) in complex_parameters.iter_mut() { - if let Some((encoded, value)) = values.remove(&0) { - let (mut value, charset, _language) = if encoded { - match value { - Cow::Borrowed(value) => { - let (value, charset) = take_while(value.as_bytes(), |c| c != b'\'')?; - let charset = lowercase(Cow::Borrowed(charset)); - let (value, _) = tag( - value, - b"'", - "TAG ERROR: In a parameter list, a charset must be followed by a `'`.", - )?; - let (value, language) = take_while(value, |c| c != b'\'')?; - let (value, _) = tag( - value, - b"'", - "TAG ERROR: In a parameter list, a language must be followed by a `'`.", - )?; - ( - Cow::Owned(decode_parameter(value.to_vec(), charset.clone())?), - Some(charset), - Some(Cow::Borrowed(language)), - ) - } - Cow::Owned(value) => { - let (value, charset) = take_while(value.as_bytes(), |c| c != b'\'')?; - let charset = lowercase(Cow::Borrowed(charset)); - let (value, _) = tag( - value, - b"'", - "TAG ERROR: In a parameter list, a charset must be followed by a `'`.", - )?; - let (value, language) = take_while(value, |c| c != b'\'')?; - let (value, _) = tag( - value, - b"'", - "TAG ERROR: In a parameter list, a language must be followed by a `'`.", - )?; - ( - Cow::Owned(decode_parameter(value.to_vec(), charset.clone())?), - Some(Cow::Owned(charset.into_owned())), - Some(Cow::Owned(language.to_owned())), - ) - } - } - } else { - (value, None, None) - }; - - let mut idx = 1; - while let Some((encoded, new_value)) = values.remove(&idx) { - if encoded && charset.is_some() { - add_string( - &mut value, - Cow::Owned(decode_parameter( - new_value.into_owned().into_bytes(), - charset.clone().unwrap(), - )?), - ); - } else { - add_string(&mut value, new_value); - } - idx += 1; - } - - parameters.insert(Cow::Owned(name.clone().into_owned()), value); - } - } - Ok(parameters) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_percent_encoding() { - assert_eq!( - "This is even more ", - decode_parameter("This%20is%20even%20more%20".into(), "us-ascii".into()).unwrap() - ); - assert_eq!( - "***fun*** ", - decode_parameter("%2A%2A%2Afun%2A%2A%2A%20".into(), "us-ascii".into()).unwrap() - ); - } -} diff --git a/email-parser/src/parsing/mime/quoted_printables.rs b/email-parser/src/parsing/mime/quoted_printables.rs deleted file mode 100644 index 6511252..0000000 --- a/email-parser/src/parsing/mime/quoted_printables.rs +++ /dev/null @@ -1,187 +0,0 @@ -// Second rule of the encoding -fn litteral_repr_possible(c: &u8) -> bool { - (c >= &33 && c <= &60) || (c >= &62 && c <= &126) -} - -#[allow(clippy::if_same_then_else)] -pub fn encode_qp(mut data: Vec) -> Vec { - // Fixme: Make it usable by binary formats - let mut line_lenght = 0; - let mut idx = 0; - - while let Some(byte) = data.get(idx).copied() { - if line_lenght >= 72 { - // 72 because in the worst case we add 3 chars + 1 equal sign -> 76 - // Fifth rule - data.insert(idx, b'\n'); - data.insert(idx, b'\r'); - data.insert(idx, b'='); - idx += 3; - line_lenght = 0; - } - - if litteral_repr_possible(&byte) { - // Second rule - idx += 1; - line_lenght += 1; - } else if (byte == 9 || byte == 32) - && data.get(idx + 1).map(|c| c != &b'\r').unwrap_or(false) - { - // Third rule - idx += 1; - line_lenght += 1; - } else if byte == b'\r' && data.get(idx + 1) == Some(&b'\n') { - // Fourth rule - idx += 2; - line_lenght = 0; - } else { - // First rule - - fn to_hex(n: u8) -> u8 { - match n { - 0..=9 => b'0' + n, - 10..=15 => b'A' + n - 10, - _ => unreachable!(), - } - } - - data.remove(idx); - data.insert(idx, to_hex(byte % 16)); - data.insert(idx, to_hex((byte - byte % 16) / 16)); - data.insert(idx, b'='); - - idx += 3; - line_lenght += 3; - }; - } - - data -} - -pub fn decode_qp(mut data: Vec) -> Vec { - let mut idx = 0; - - while let Some(byte) = data.get(idx) { - if litteral_repr_possible(byte) || byte == &b' ' || byte == &b'\t' { - idx += 1; - } else if byte == &b'=' { - if data.get(idx + 1) == Some(&b'\r') && data.get(idx + 2) == Some(&b'\n') { - data.drain(idx..idx + 3); - } else if data.len() > idx + 2 { - let first = data.remove(idx + 1); - let second = data.remove(idx + 1); - - fn from_hex(n: u8) -> Option { - match n { - b'0'..=b'9' => Some(n - b'0'), - b'A'..=b'F' => Some(10 + n - b'A'), - b'a'..=b'f' => Some(10 + n - b'a'), - _ => None, - } - } - - if let (Some(first), Some(second)) = (from_hex(first), from_hex(second)) { - data[idx] = first * 16 + second; - idx += 1; - } else { - data[idx] = 189; - data.insert(idx, 191); - data.insert(idx, 239); - idx += 3; - } - } else { - idx += 1; - } - } else if byte == &b'\r' && data.get(idx + 1) == Some(&b'\n') { - idx += 2; - } else { - data[idx] = 189; - data.insert(idx, 191); - data.insert(idx, 239); - idx += 3; - } - } - - data -} - -pub fn decode_header_qp(mut data: Vec) -> Vec { - let mut idx = 0; - - while let Some(byte) = data.get(idx).copied() { - if byte == b'_' { - data[idx] = 0x20; - idx += 1; - } else if byte == b'=' { - if data.get(idx + 1) == Some(&b'\r') && data.get(idx + 2) == Some(&b'\n') { - data.drain(idx..idx + 3); - } else if data.len() > idx + 2 { - let first = data.remove(idx + 1); - let second = data.remove(idx + 1); - - fn from_hex(n: u8) -> Option { - match n { - b'0'..=b'9' => Some(n - b'0'), - b'A'..=b'F' => Some(10 + n - b'A'), - b'a'..=b'f' => Some(10 + n - b'a'), - _ => None, - } - } - - if let (Some(first), Some(second)) = (from_hex(first), from_hex(second)) { - data[idx] = first * 16 + second; - idx += 1; - } else { - data[idx] = 189; - data.insert(idx, 191); - data.insert(idx, 239); - idx += 3; - } - } else { - idx += 1; - } - } else if byte >= 0x20 && byte <= 0x7E { - idx += 1; - } else { - data[idx] = 189; - data.insert(idx, 191); - data.insert(idx, 239); - idx += 3; - } - } - - data -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn encode() { - assert_eq!( - b"This was a triumph. Il =C3=A9tait une fois...\r\nAnd voil=C3=A0 !=20\r\nSimon" - .to_vec(), - encode_qp( - "This was a triumph. Il était une fois...\r\nAnd voilà ! \r\nSimon" - .to_string() - .into_bytes() - ) - ); - assert_eq!(b"Now\'s the time for all folk to come to the aid of their country. Wtf thi=\r\ns sentence is not long enough to test line-lenght limit.".to_vec(), encode_qp("Now's the time for all folk to come to the aid of their country. Wtf this sentence is not long enough to test line-lenght limit.".to_string().into_bytes())); - } - - #[test] - fn decode() { - assert_eq!( - b"This was a triumph. Il \xC3\xA9tait une fois...\r\nAnd voil\xC3\xA0 ! \r\nSimon" - .to_vec(), - decode_qp( - "This was a triumph. Il =C3=A9tait une fois...\r\nAnd voil=C3=A0 !=20\r\nSimon" - .to_string() - .into_bytes() - ) - ); - assert_eq!(b"Now's the time for all folk to come to the aid of their country. Wtf this sentence is not long enough to test line-lenght limit.".to_vec(), decode_qp("Now\'s the time for all folk to come to the aid of their country. Wtf thi=\r\ns sentence is not long enough to test line-lenght limit.".to_string().into_bytes())); - } -} diff --git a/email-parser/src/parsing/mod.rs b/email-parser/src/parsing/mod.rs deleted file mode 100644 index a043d7f..0000000 --- a/email-parser/src/parsing/mod.rs +++ /dev/null @@ -1,13 +0,0 @@ -#![allow(dead_code)] - -pub(crate) mod address; -pub(crate) mod character_sets; -pub(crate) mod combinators; -pub(crate) mod common; -pub(crate) mod fields; -pub(crate) mod message; -#[cfg(feature = "mime")] -pub(crate) mod mime; -pub(crate) mod quoted_string; -pub(crate) mod time; -pub(crate) mod whitespaces; diff --git a/email-parser/src/parsing/quoted_string.rs b/email-parser/src/parsing/quoted_string.rs deleted file mode 100644 index ac6cf2f..0000000 --- a/email-parser/src/parsing/quoted_string.rs +++ /dev/null @@ -1,131 +0,0 @@ -use crate::prelude::*; -use std::borrow::Cow; - -pub fn quoted_pair(input: &[u8]) -> Result<(&[u8], Cow), Error> { - let (input, ()) = tag( - input, - b"\\", - "TAG ERROR: A quoted pair must start with a `\\`.", - )?; - - if let Some(character) = input.get(0) { - if is_vchar(*character) || is_wsp(*character) { - // index are already checked - unsafe { - Ok(( - input.get_unchecked(1..), - from_slice(input.get_unchecked(..1)), - )) - } - } else { - Err(Error::Unknown( - "The quoted-pair character is no a vchar or a wsp.", - )) - } - } else { - Err(Error::Unknown("The quoted-pair has no second character.")) - } -} - -pub fn quoted_string(input: &[u8]) -> Result<(&[u8], Cow), Error> { - let input = if let Ok((input, _cfws)) = cfws(input) { - input - } else { - input - }; - - let mut input = if input.starts_with(b"\"") { - &input[1..] - } else { - return Err(Error::Unknown("Quoted string must begin with a dquote")); - }; - let mut output = empty_string(); - - loop { - let mut additionnal_output = empty_string(); - - let new_input = if let Ok((new_input, fws)) = fws(input) { - add_string(&mut additionnal_output, fws); - new_input - } else { - input - }; - - let new_input = if let Ok((new_input, str)) = take_while1(new_input, is_qtext) { - add_str(&mut additionnal_output, str); - new_input - } else if let Ok((new_input, str)) = quoted_pair(new_input) { - add_string(&mut additionnal_output, str); - new_input - } else { - break; - }; - - add_string(&mut output, additionnal_output); - input = new_input; - } - - let input = if let Ok((input, fws)) = fws(input) { - add_string(&mut output, fws); - input - } else { - input - }; - - let input = if input.starts_with(b"\"") { - &input[1..] - } else { - return Err(Error::Unknown("Quoted string must end with a dquote")); - }; - - let input = if let Ok((input, _cfws)) = cfws(input) { - input - } else { - input - }; - - Ok((input, output)) -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_quoted_pair() { - assert!(quoted_pair(b"\\rtest").is_ok()); - assert!(quoted_pair(b"\\ test").is_ok()); - - assert_eq!(quoted_pair(b"\\rtest").unwrap().1, "r"); - assert_eq!(quoted_pair(b"\\ test").unwrap().1, " "); - - assert!(quoted_pair(b"\\").is_err()); - assert!(quoted_pair(b"\\\0").is_err()); - assert!(quoted_pair(b"test").is_err()); - } - - #[test] - fn test_quoted_string() { - assert_eq!( - quoted_string(b" \"This\\ is\\ a\\ test\"").unwrap().1, - "This is a test" - ); - assert_eq!( - quoted_string(b"\r\n \"This\\ is\\ a\\ test\" ") - .unwrap() - .1, - "This is a test" - ); - - assert!(matches!( - quoted_string(b"\r\n \"This\\ is\\ a\\ test\" ") - .unwrap() - .1, - Cow::Owned(_) - )); - assert!(matches!( - quoted_string(b"\r\n \"hey\" ").unwrap().1, - Cow::Borrowed(_) - )); - } -} diff --git a/email-parser/src/parsing/time.rs b/email-parser/src/parsing/time.rs deleted file mode 100644 index e90fca3..0000000 --- a/email-parser/src/parsing/time.rs +++ /dev/null @@ -1,383 +0,0 @@ -use crate::prelude::*; - -pub fn day_name(input: &[u8]) -> Res { - if let (Some(input), Some(letters)) = (input.get(3..), input.get(..3)) { - let letters = letters.to_ascii_lowercase(); - match letters.as_slice() { - b"mon" => Ok((input, Day::Monday)), - b"tue" => Ok((input, Day::Tuesday)), - b"wed" => Ok((input, Day::Wednesday)), - b"thu" => Ok((input, Day::Thursday)), - b"fri" => Ok((input, Day::Friday)), - b"sat" => Ok((input, Day::Saturday)), - b"sun" => Ok((input, Day::Sunday)), - _ => Err(Error::Unknown("Not a valid day_name")), - } - } else { - Err(Error::Unknown( - "Expected day_name, but characters are missing (at least 3).", - )) - } -} - -pub fn month(input: &[u8]) -> Res { - if let (Some(input), Some(letters)) = (input.get(3..), input.get(..3)) { - let letters = letters.to_ascii_lowercase(); - match letters.as_slice() { - b"jan" => Ok((input, Month::January)), - b"feb" => Ok((input, Month::February)), - b"mar" => Ok((input, Month::March)), - b"apr" => Ok((input, Month::April)), - b"may" => Ok((input, Month::May)), - b"jun" => Ok((input, Month::June)), - b"jul" => Ok((input, Month::July)), - b"aug" => Ok((input, Month::August)), - b"sep" => Ok((input, Month::September)), - b"oct" => Ok((input, Month::October)), - b"nov" => Ok((input, Month::November)), - b"dec" => Ok((input, Month::December)), - _ => Err(Error::Unknown("Not a valid month")), - } - } else { - Err(Error::Unknown( - "Expected month, but characters are missing (at least 3).", - )) - } -} - -pub fn day_of_week(input: &[u8]) -> Res { - let (input, _fws) = optional(input, fws); - let (input, day) = day_name(input)?; - let (input, ()) = tag( - input, - b",", - "TAG ERROR: In a day_of_week, a day name must be followed by a comma.", - )?; - Ok((input, day)) -} - -pub fn year(input: &[u8]) -> Res { - let (input, _) = fws(input)?; - - let (input, year) = - take_while1(input, is_digit).map_err(|_e| Error::Unknown("no digit in year"))?; - if year.len() < 4 { - return Err(Error::Unknown("year is expected to have 4 digits or more")); - } - let year: usize = year - .parse() - .map_err(|_e| Error::Unknown("Failed to parse year"))?; - - if year < 1990 { - return Err(Error::Unknown("year must be after 1990")); - } - - let (input, _) = fws(input)?; - - Ok((input, year)) -} - -pub fn day(input: &[u8]) -> Res { - let (input, _fws) = optional(input, fws); - let (mut input, mut day) = digit(input)?; - if let Ok((new_input, digit)) = digit(input) { - day *= 10; - day += digit; - input = new_input; - } - if day > 31 { - return Err(Error::Unknown("day must be less than 31")); - } - let (input, _) = fws(input)?; - Ok((input, day)) -} - -pub fn time_of_day(input: &[u8]) -> Res