From 3402b7a3abcb27e15584ef2d774f43f0fd09d8ca Mon Sep 17 00:00:00 2001 From: Georges Toth Date: Sat, 27 Apr 2024 23:36:30 +0200 Subject: [PATCH] linter fixes --- tests/test_decode.py | 4 +-- tests/test_emlparser.py | 62 ++++++++++++++++++++--------------------- tests/test_regexes.py | 7 +++-- tests/test_routing.py | 12 ++++---- 4 files changed, 43 insertions(+), 42 deletions(-) diff --git a/tests/test_decode.py b/tests/test_decode.py index 2d38ab7..882a326 100644 --- a/tests/test_decode.py +++ b/tests/test_decode.py @@ -11,7 +11,7 @@ class TestDecode: - def test_decode_field(self): + def test_decode_field(self) -> None: test_subjects = { 'Die Bezahlung mit Ihrer Kreditkarte wurde erfolgreich durchgeführt.': '=?utf-8?B?RGllIEJlemFobHVuZyBtaXQgSWhyZXIgS3JlZGl0a2FydGUgd3VyZGUgZXJmb2xncmVpY2ggZHVyY2hnZWbDvGhydC4=?=', 'Abmahnung Ihrer offenen Rechnung über 236,00 Euro': '=?utf-8?q?Abmahnung Ihrer offenen Rechnung =C3=BCber 236,00 Euro?=', @@ -33,7 +33,7 @@ def test_decode_field(self): for clear, encoded in test_subjects.items(): assert eml_parser.decode.decode_field(encoded) == clear - def test_robust_string2date(self): + def test_robust_string2date(self) -> None: """Test the converter function, it should never return the default date on the provided input """ diff --git a/tests/test_emlparser.py b/tests/test_emlparser.py index 804796a..2943366 100644 --- a/tests/test_emlparser.py +++ b/tests/test_emlparser.py @@ -80,7 +80,7 @@ def json_serial(obj: typing.Any) -> typing.Optional[str]: class TestEMLParser: - def test_get_file_hash(self): + def test_get_file_hash(self) -> None: with pathlib.Path(samples_dir, 'sample.eml').open('rb') as fhdl: raw_email = fhdl.read() @@ -93,10 +93,10 @@ def test_get_file_hash(self): assert eml_parser.EmlParser.get_file_hash(raw_email) == pre_computed_hashes - def test_wrap_hash_sha256(self): + def test_wrap_hash_sha256(self) -> None: assert eml_parser.EmlParser.get_hash('www.example.com', 'sha256') == '80fc0fb9266db7b83f85850fa0e6548b6d70ee68c8b5b412f1deea6ebdef0404' - def test_get_uri_ondata(self): + def test_get_uri_ondata(self) -> None: test_urls = """Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris consectetur mi tortor, http://www.example.com consectetur iaculis orci ultricies sit amet. Mauris "http://www.example.com/test1?bla" ornare lobortis ex nec dictum. Aliquam blandit arcu ac lorem iaculis aliquet. @@ -108,7 +108,7 @@ def test_get_uri_ondata(self): assert eml_parser.EmlParser(include_href=False).get_uri_ondata(test_urls) == expected_result - def test_get_uri_href_ondata(self): + def test_get_uri_href_ondata(self) -> None: test_urls = """Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris consectetur mi tortor, consectetur iaculis orci ultricies sit amet.
Play a cool game! @@ -129,9 +129,9 @@ def test_get_uri_href_ondata(self): assert eml_parser.EmlParser(include_href=True, email_force_tld=True).get_uri_ondata(test_urls) == expected_result - def test_get_uri_href_commas_ondata(self): + def test_get_uri_href_commas_ondata(self) -> None: test_urls = """ - http://www.example.com?t1=v1&t2=v2,https://www.example.com, http://www1.example.com?t1=v1&t2=v2, https://www1.example.com, + http://www.example.com?t1=v1&t2=v2,https://www.example.com, http://www1.example.com?t1=v1&t2=v2, https://www1.example.com, http://www2.example.com,https://www3.example.com """ @@ -146,7 +146,7 @@ def test_get_uri_href_commas_ondata(self): assert eml_parser.EmlParser(include_www=True).get_uri_ondata(test_urls) == expected_result - def test_get_valid_tld_uri_href_ondata(self): + def test_get_valid_tld_uri_href_ondata(self) -> None: test_urls = """Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris consectetur mi tortor, consectetur iaculis orci ultricies sit amet. Mauris ex nec dictum. Aliquam blandit arcu ac lorem iaculis aliquet. @@ -159,7 +159,7 @@ def test_get_valid_tld_uri_href_ondata(self): assert eml_parser.EmlParser(include_href=True, domain_force_tld=True).get_uri_ondata(test_urls) == expected_result - def test_get_uri_re_backtracking(self): + def test_get_uri_re_backtracking(self) -> None: """Ensure url_regex_simple does not cause catastrophic backtracking (Issue 63), test with re instead of re2 or regex""" test_urls = """ Lorem ipsum dolor sit amet, http://xxxxxxxxxx.example.com������������������������������������������������������������������������������������������������������������������������������������������������ consectetur adipiscing elit. @@ -171,7 +171,7 @@ def test_get_uri_re_backtracking(self): assert eml_parser.EmlParser(domain_force_tld=False).get_uri_ondata(test_urls) == expected_result - def test_get_uri_unicode_ondata(self): + def test_get_uri_unicode_ondata(self) -> None: """Ensure url_regex includes Unicode in domains and paths""" test_urls = """ Lorem ipsum dolor sit amet http://💌.example.คอม , http://💌.example.คอม/📮/📧/📬.png consectetur https://💩.la adipiscing elit. @@ -181,7 +181,7 @@ def test_get_uri_unicode_ondata(self): assert eml_parser.EmlParser(include_www=False, domain_force_tld=True).get_uri_ondata(test_urls) == expected_result - def test_get_uri_ipv6_ondata(self): + def test_get_uri_ipv6_ondata(self) -> None: """Ensure url_regex includes URLs with IPv6 hosts, including zone Indexes""" test_urls = """ Lorem ipsum dolor sit amet http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334] @@ -192,11 +192,11 @@ def test_get_uri_ipv6_ondata(self): assert eml_parser.EmlParser(ip_force_routable=False).get_uri_ondata(test_urls) == expected_result - def test_get_uri_ipv6_routable_ondata(self): + def test_get_uri_ipv6_routable_ondata(self) -> None: """Ensure url_regex can exclude private and other unallocated IPv6 hosts in URLs.""" test_urls = """ - Curabitur vel neque lacinia, consequat erat id http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334], - venenatis sem. Etiam dignissim ullamcorper http://[2606:2800:220:1:248:1893:25c8:1946] risus non pulvinar. + Curabitur vel neque lacinia, consequat erat id http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334], + venenatis sem. Etiam dignissim ullamcorper http://[2606:2800:220:1:248:1893:25c8:1946] risus non pulvinar. Etiam dui tortor http://[fe80::1ff:fe23:4567:890a%25eth0]/6️⃣, posuere et iaculis sed, accumsan a erat. """ @@ -204,13 +204,13 @@ def test_get_uri_ipv6_routable_ondata(self): assert eml_parser.EmlParser(ip_force_routable=True).get_uri_ondata(test_urls) == expected_result - def test_get_uri_www_ondata(self): + def test_get_uri_www_ondata(self) -> None: test_urls = """ - www91.example.com@www92.example.com www93.example.com@example.com + www91.example.com@www92.example.com www93.example.com@example.com www94......example.com/path not.www95.example.com:443/path www2.example.com:443/path 'www3.example.com/path' ‘www4.example.com#abc’ www5.example.com:443?def \nwww6.example.com.../path www7.example.com/?# www8.example.com?/# www9.example.com#?/ www10.example.com/? - https://www01.example.com/path https://www02.example.com..../path https://www03.example.com/ + https://www01.example.com/path https://www02.example.com..../path https://www03.example.com/ http://www04.example.com/?# http://www05.example.com?/# http://www06.example.com#?/ http://www07.example.com/? """ @@ -235,7 +235,7 @@ def test_get_uri_www_ondata(self): assert eml_parser.EmlParser(include_www=True).get_uri_ondata(test_urls) == expected_result - def test_headeremail2list_1(self): + def test_headeremail2list_1(self) -> None: msg = EmailMessage() msg['Subject'] = 'Test subject éèàöüä${}' msg['From'] = Address('John Doe', 'john.doe', 'example.com') @@ -248,7 +248,7 @@ def test_headeremail2list_1(self): assert sorted(ep.headeremail2list(header='to')) == ['james.doe@example.com', 'jane.doe@example.com'] - def test_headeremail2list_2(self): + def test_headeremail2list_2(self) -> None: """Here we test the headeremail2list function using an input which should trigger a email library bug 27257 """ @@ -268,7 +268,7 @@ def test_headeremail2list_2(self): # using a workaround assert ep.headeremail2list(header='to') == ['test@example.com'] - def test_parse_email_1(self): + def test_parse_email_1(self) -> None: """Parses a generated sample e-mail and tests it against a known good result""" msg = EmailMessage() msg['Subject'] = 'Test subject éèàöüä${}' @@ -289,7 +289,7 @@ def test_parse_email_1(self): recursive_compare(good_output, test_output) - def test_parse_email_2(self): + def test_parse_email_2(self) -> None: """Parses the e-mails from the samples folder""" ep = eml_parser.EmlParser() @@ -303,7 +303,7 @@ def test_parse_email_2(self): raw_email = fhdl.read() _ = ep.decode_email_bytes(raw_email) - def test_parse_email_3(self): + def test_parse_email_3(self) -> None: """Parses the e-mails from the samples folder while keeping raw data""" ep = eml_parser.EmlParser(include_raw_body=True, include_attachment_data=True) @@ -317,7 +317,7 @@ def test_parse_email_3(self): raw_email = fhdl.read() _ = ep.decode_email_bytes(raw_email) - def test_parse_email_4(self): + def test_parse_email_4(self) -> None: """Parses the e-mails from the samples folder while keeping raw data and passing in a filtering config 'pconf'""" pconf = {'whiteip': ['192.168.1.1'], 'whitefor': ['a@example.com'], 'byhostentry': ['example.com']} @@ -333,7 +333,7 @@ def test_parse_email_4(self): raw_email = fhdl.read() _ = ep.decode_email_bytes(raw_email) - def test_parse_email_5(self): + def test_parse_email_5(self) -> None: """Parses a generated sample e-mail and tests it against a known good result. In this test we want to specifically ignore e-mail addresses without TLD.""" msg = EmailMessage() @@ -356,7 +356,7 @@ def test_parse_email_5(self): recursive_compare(good_output, test_output) - def test_parse_email_6(self): + def test_parse_email_6(self) -> None: with pathlib.Path(samples_dir, 'sample_attachments.eml').open('rb') as fhdl: raw_email = fhdl.read() @@ -372,7 +372,7 @@ def test_parse_email_6(self): filename = attachment.get('filename', '') assert filename in attachment_filenames - def test_parse_email_7(self): + def test_parse_email_7(self) -> None: """Parse the sample file and make sure the currently unparsable date is returned as is. See https://bugs.python.org/issue30681 for details. @@ -385,7 +385,7 @@ def test_parse_email_7(self): assert test['header']['header']['orig-date'][0] == 'Wed Jul 2020 23:11:43 +0100' - def test_parse_email_8(self): + def test_parse_email_8(self) -> None: """Parse the sample file and make sure the currently unparsable date is returned as is. See https://github.com/GOVCERT-LU/eml_parser/issues/48 for details. @@ -398,7 +398,7 @@ def test_parse_email_8(self): assert test['body'][0]['hash'] == '4c8b6a63156885b0ca0855b1d36816c54984e1eb6f68277b46b55b4777cfac89' - def test_parse_email_9(self): + def test_parse_email_9(self) -> None: """Parses an email and verifies that www URLs with no scheme are extracted, and that URLs at the end of a message body are extracted""" with pathlib.Path(samples_dir, 'sample_body_noscheme_url.eml').open('rb') as fhdl: raw_email = fhdl.read() @@ -409,7 +409,7 @@ def test_parse_email_9(self): assert sorted(test['body'][0]['uri_noscheme']) == ['www.example.com/a/b/c/d/', 'www.example.com/test1?bla'] assert sorted(test['body'][0]['uri']) == ['http://www.example.com/', 'https://www.example2.com'] - def test_parse_email_from_email_email(self): + def test_parse_email_from_email_email(self) -> None: """Parses a generated sample e-mail and tests it against a known good result. In this test we want to specifically test for correct from address parsing where the from field contains two e-mail addresses.""" msg = EmailMessage() @@ -431,7 +431,7 @@ def test_parse_email_from_email_email(self): recursive_compare(good_output, test_output) - def test_parse_email_to_email_email(self): + def test_parse_email_to_email_email(self) -> None: """Parses a generated sample e-mail and tests it against a known good result. In this test we want to specifically test for correct to address parsing where the to field contains two e-mail addresses.""" msg = EmailMessage() @@ -453,7 +453,7 @@ def test_parse_email_to_email_email(self): recursive_compare(good_output, test_output) - def test_parse_email_newline_quopri(self): + def test_parse_email_newline_quopri(self) -> None: """Make sure we can parse RFC2047 encoded header fields with CR/LF embedded (which is invalid).""" ep = eml_parser.EmlParser() sample = samples_dir / 'sample_gh_issue_76.eml' @@ -468,7 +468,7 @@ def test_parse_email_newline_quopri(self): assert output['header']['header']['to'] == ['\n '] assert output['header']['header']['cc'] == ['\r '] - def test_parse_email_bad_message_id(self): + def test_parse_email_bad_message_id(self) -> None: """Parse bad message-id format.""" ep = eml_parser.EmlParser() sample_1 = samples_dir / 'sample_gh_issue_79_1.eml' diff --git a/tests/test_regexes.py b/tests/test_regexes.py index 79d5146..74c52ea 100644 --- a/tests/test_regexes.py +++ b/tests/test_regexes.py @@ -1,4 +1,5 @@ import pathlib + from eml_parser.regexes import * my_execution_dir = pathlib.Path(__file__).resolve().parent @@ -7,7 +8,7 @@ class TestRegularExpressions: - def test_url_regex_simple(self): + def test_url_regex_simple(self) -> None: """Ensure url_regex_simple matches URL samples""" with pathlib.Path(samples_dir, 'regexes_url_samples.txt').open('r', encoding='utf8') as fhdl: url_text_list = fhdl.read().splitlines() @@ -25,7 +26,7 @@ def test_url_regex_simple(self): assert url_diff == set() - def test_url_regex_www(self): + def test_url_regex_www(self) -> None: """Ensure url_regex_www matches URL samples""" with pathlib.Path(samples_dir, 'regexes_url_samples.txt').open('r', encoding='utf8') as fhdl: url_text_list = fhdl.read().splitlines() @@ -42,7 +43,7 @@ def test_url_regex_www(self): assert url_diff == set() - def test_dom_regex(self): + def test_dom_regex(self) -> None: """Ensure dom_regex matches domain samples""" test_doms = """www1.example.com www2.example.com""" diff --git a/tests/test_routing.py b/tests/test_routing.py index 89c9116..16917e4 100644 --- a/tests/test_routing.py +++ b/tests/test_routing.py @@ -10,7 +10,7 @@ class TestRouting: - def test_noparenthesis(self): + def test_noparenthesis(self) -> None: test_input = { '(test)': '', '((test))': '', @@ -21,7 +21,7 @@ def test_noparenthesis(self): for test, expected_result in test_input.items(): assert eml_parser.routing.noparenthesis(test) == expected_result - def test_cleanline(self): + def test_cleanline(self) -> None: test_input = { ' ;': '', ' test ': 'test', @@ -31,7 +31,7 @@ def test_cleanline(self): for test, expected_result in test_input.items(): assert eml_parser.routing.cleanline(test) == expected_result - def test_give_dom_ip(self): + def test_give_dom_ip(self) -> None: test_input = { ' 192.168.1.1 abc bla bla www.example.com sdsf ::1 test ': ['192.168.1.1', '::1', 'www.example.com'], } @@ -39,7 +39,7 @@ def test_give_dom_ip(self): for test, expected_result in test_input.items(): assert sorted(eml_parser.routing.get_domain_ip(test)) == sorted(expected_result) - def test_parserouting(self): + def test_parserouting(self) -> None: test_input = { 'test1': ( """Received: from mta1.example.com (mta1.example.com [192.168.1.100]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by mta.example2.com (Postfix) with ESMTPS id 6388F684168 for ; Fri, 26 Apr 2013 13:15:55 +0200 (CEST)""", @@ -71,11 +71,11 @@ def test_parserouting(self): ), } - for test_number, test in test_input.items(): + for _test_number, test in test_input.items(): test_output = eml_parser.routing.parserouting(test[0]) # get all keys from the test case - supported_keys = [x for x in test[1]] + supported_keys = list(test[1]) for sk in supported_keys: # make sure key is also in output