Skip to content

Commit

Permalink
feat: Add suggestions for syntax errors (#1192)
Browse files Browse the repository at this point in the history
* Add suggestions for syntax errors

* Fix lint

* Add suggested PR feedback

* Apply pr request feedback
  • Loading branch information
Agreon authored Oct 6, 2022
1 parent 49c8f5c commit 2d385f3
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 8 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,8 @@ The output will be a JSON with the below format, the fields should be self-expla
"syntax": {
"domain": "gmail.com",
"is_valid_syntax": true,
"username": "someone"
"username": "someone",
"suggestion": null
}
}
```
Expand Down
4 changes: 2 additions & 2 deletions backend/tests/check_email.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ use serde_json;
use warp::http::StatusCode;
use warp::test::request;

const FOO_BAR_RESPONSE: &str = r#"{"input":"foo@bar","is_reachable":"invalid","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"can_connect_smtp":false,"has_full_inbox":false,"is_catch_all":false,"is_deliverable":false,"is_disabled":false},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":""}}"#;
const FOO_BAR_BAZ_RESPONSE: &str = r#"{"input":"foo@bar.baz","is_reachable":"invalid","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"can_connect_smtp":false,"has_full_inbox":false,"is_catch_all":false,"is_deliverable":false,"is_disabled":false},"syntax":{"address":"foo@bar.baz","domain":"bar.baz","is_valid_syntax":true,"username":"foo"}}"#;
const FOO_BAR_RESPONSE: &str = r#"{"input":"foo@bar","is_reachable":"invalid","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"can_connect_smtp":false,"has_full_inbox":false,"is_catch_all":false,"is_deliverable":false,"is_disabled":false},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","suggestion":null}}"#;
const FOO_BAR_BAZ_RESPONSE: &str = r#"{"input":"foo@bar.baz","is_reachable":"invalid","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"can_connect_smtp":false,"has_full_inbox":false,"is_catch_all":false,"is_deliverable":false,"is_disabled":false},"syntax":{"address":"foo@bar.baz","domain":"bar.baz","is_valid_syntax":true,"username":"foo","suggestion":null}}"#;

#[tokio::test]
async fn test_input_foo_bar() {
Expand Down
1 change: 1 addition & 0 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ serde = { version = "1.0.145", features = ["derive"] }
serde_json = "1.0.85"
trust-dns-proto = "0.21.2"
md5 = "0.7.0"
levenshtein = "1.0.5"

[dev-dependencies]
tokio = { version = "1.21.2" }
Expand Down
11 changes: 9 additions & 2 deletions core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ mod util;
use misc::{check_misc, MiscDetails};
use mx::check_mx;
use smtp::{check_smtp, SmtpDetails, SmtpError};
use syntax::check_syntax;
use syntax::{check_syntax, get_similar_mail_provider};
pub use util::constants::LOG_TARGET;
pub use util::input_output::*;

Expand Down Expand Up @@ -116,7 +116,7 @@ pub async fn check_email(input: &CheckEmailInput) -> CheckEmailOutput {
to_email,
to_email
);
let my_syntax = check_syntax(to_email.as_ref());
let mut my_syntax = check_syntax(to_email.as_ref());
if !my_syntax.is_valid_syntax {
return CheckEmailOutput {
input: to_email.to_string(),
Expand All @@ -136,6 +136,8 @@ pub async fn check_email(input: &CheckEmailInput) -> CheckEmailOutput {
let my_mx = match check_mx(&my_syntax).await {
Ok(m) => m,
e => {
get_similar_mail_provider(&mut my_syntax);

// This happens when there's an internal error while checking MX
// records. Should happen fairly rarely.
return CheckEmailOutput {
Expand All @@ -150,6 +152,8 @@ pub async fn check_email(input: &CheckEmailInput) -> CheckEmailOutput {

// Return if we didn't find any MX records.
if my_mx.lookup.is_err() {
get_similar_mail_provider(&mut my_syntax);

return CheckEmailOutput {
input: to_email.to_string(),
is_reachable: Reachable::Invalid,
Expand Down Expand Up @@ -213,6 +217,9 @@ pub async fn check_email(input: &CheckEmailInput) -> CheckEmailOutput {
let my_smtp = my_smtp.expect(
"As long as lookup has at least 1 element (which we checked), my_smtp will be a Some. qed.",
);
if my_smtp.is_err() {
get_similar_mail_provider(&mut my_syntax);
}

CheckEmailOutput {
input: to_email.to_string(),
Expand Down
49 changes: 49 additions & 0 deletions core/src/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// along with this program. If not, see <https://www.gnu.org/licenses/>.

use async_smtp::EmailAddress;
use levenshtein::levenshtein;
use serde::{Deserialize, Serialize};
use std::str::FromStr;

Expand All @@ -32,6 +33,7 @@ pub struct SyntaxDetails {
/// The username, before "@". It will be the empty string if the email
/// address if ill-formed.
pub username: String,
pub suggestion: Option<String>,
}

impl Default for SyntaxDetails {
Expand All @@ -41,6 +43,7 @@ impl Default for SyntaxDetails {
domain: "".into(),
is_valid_syntax: false,
username: "".into(),
suggestion: None,
}
}
}
Expand All @@ -58,6 +61,7 @@ pub fn check_syntax(email_address: &str) -> SyntaxDetails {
domain: "".into(),
is_valid_syntax: false,
username: "".into(),
suggestion: None,
};
}
}
Expand All @@ -67,6 +71,7 @@ pub fn check_syntax(email_address: &str) -> SyntaxDetails {
domain: "".into(),
is_valid_syntax: false,
username: "".into(),
suggestion: None,
}
}
};
Expand All @@ -87,6 +92,34 @@ pub fn check_syntax(email_address: &str) -> SyntaxDetails {
domain,
is_valid_syntax: true,
username,
suggestion: None,
}
}

const MAIL_PROVIDERS: &[&str] = &[
"gmail.com",
"yahoo.com",
"outlook.com",
"hotmail.com",
"protonmail.com",
"icloud.com",
"yandex.com",
];
// Supplies the syntax parameter with a suggestion that matches the mail domain best by levenshtein
// distance.
pub fn get_similar_mail_provider(syntax: &mut SyntaxDetails) {
for possible_provider in MAIL_PROVIDERS {
let distance = levenshtein(&syntax.domain, possible_provider);

if distance < 3 {
// Return full address
syntax.suggestion = Some(format!(
"{}@{}",
syntax.username,
String::from(*possible_provider),
));
break;
}
}
}

Expand All @@ -103,6 +136,7 @@ mod tests {
domain: "".into(),
is_valid_syntax: false,
username: "".into(),
suggestion: None,
}
);
}
Expand All @@ -116,6 +150,7 @@ mod tests {
domain: "".into(),
is_valid_syntax: false,
username: "".into(),
suggestion: None,
}
);
}
Expand All @@ -129,7 +164,21 @@ mod tests {
domain: "bar.com".into(),
is_valid_syntax: true,
username: "foo".into(),
suggestion: None,
}
);
}

#[test]
fn should_suggest_a_correct_mail_if_similar() {
let mut syntax = SyntaxDetails {
address: Some(EmailAddress::new("test@gmali.com".into()).unwrap()),
domain: "gmali.com".into(),
is_valid_syntax: true,
username: "test".into(),
suggestion: None,
};
get_similar_mail_provider(&mut syntax);
assert_eq!(syntax.suggestion, Some("test@gmail.com".to_string()))
}
}
6 changes: 3 additions & 3 deletions core/src/util/input_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -405,20 +405,20 @@ mod tests {
let res = dummy_response_with_message("blacklist");
let actual = serde_json::to_string(&res).unwrap();
// Make sure the `description` is present with IpBlacklisted.
let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: blacklist"},"description":"IpBlacklisted"},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":""}}"#;
let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: blacklist"},"description":"IpBlacklisted"},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","suggestion":null}}"#;
assert_eq!(expected, actual);

let res =
dummy_response_with_message("Client host rejected: cannot find your reverse hostname");
let actual = serde_json::to_string(&res).unwrap();
// Make sure the `description` is present with NeedsRDNs.
let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: Client host rejected: cannot find your reverse hostname"},"description":"NeedsRDNS"},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":""}}"#;
let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: Client host rejected: cannot find your reverse hostname"},"description":"NeedsRDNS"},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","suggestion":null}}"#;
assert_eq!(expected, actual);

let res = dummy_response_with_message("foobar");
let actual = serde_json::to_string(&res).unwrap();
// Make sure the `description` is NOT present.
let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: foobar"}},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":""}}"#;
let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: foobar"}},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","suggestion":null}}"#;
assert_eq!(expected, actual);
}
}

0 comments on commit 2d385f3

Please sign in to comment.