Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix inconsistent handling of upper and lower cases of email addresses. #7021

Merged
merged 15 commits into from
Jul 3, 2020
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/7021.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix inconsistent handling of upper and lower cases of email addresses that is use only case folding (MSC2265).
dklimpel marked this conversation as resolved.
Show resolved Hide resolved
5 changes: 3 additions & 2 deletions synapse/handlers/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
from synapse.module_api import ModuleApi
from synapse.push.mailer import load_jinja2_templates
from synapse.types import Requester, UserID
from synapse.util.threepids import canonicalise_email

from ._base import BaseHandler

Expand Down Expand Up @@ -935,7 +936,7 @@ async def add_threepid(
# for the presence of an email address during password reset was
# case sensitive).
if medium == "email":
address = address.lower()
address = canonicalise_email(address)

await self.store.user_add_threepid(
user_id, medium, address, validated_at, self.hs.get_clock().time_msec()
Expand Down Expand Up @@ -963,7 +964,7 @@ async def delete_threepid(

# 'Canonicalise' email addresses as per above
if medium == "email":
address = address.lower()
address = canonicalise_email(address)

identity_handler = self.hs.get_handlers().identity_handler
result = await identity_handler.try_unbind_threepid(
Expand Down
12 changes: 8 additions & 4 deletions synapse/rest/client/v1/login.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from synapse.rest.well_known import WellKnownBuilder
from synapse.types import UserID
from synapse.util.msisdn import phone_number_to_msisdn
from synapse.util.threepids import canonicalise_email

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -195,11 +196,14 @@ async def _do_other_login(self, login_submission):
if medium is None or address is None:
raise SynapseError(400, "Invalid thirdparty identifier")

# For emails, canonicalise the address.
# We store all email addresses canonicalised in the DB.
# (See add_threepid in synapse/handlers/auth.py)
if medium == "email":
# For emails, transform the address to lowercase.
# We store all email addreses as lowercase in the DB.
# (See add_threepid in synapse/handlers/auth.py)
address = address.lower()
try:
address = canonicalise_email(address)
except ValueError as e:
raise SynapseError(400, str(e))

# We also apply account rate limiting using the 3PID as a key, as
# otherwise using 3PID bypasses the ratelimiting based on user ID.
Expand Down
25 changes: 16 additions & 9 deletions synapse/rest/client/v2_alpha/account.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from synapse.push.mailer import Mailer, load_jinja2_templates
from synapse.util.msisdn import phone_number_to_msisdn
from synapse.util.stringutils import assert_valid_client_secret, random_string
from synapse.util.threepids import check_3pid_allowed
from synapse.util.threepids import canonicalise_email, check_3pid_allowed

from ._base import client_patterns, interactive_auth_handler

Expand Down Expand Up @@ -84,7 +84,10 @@ async def on_POST(self, request):
client_secret = body["client_secret"]
assert_valid_client_secret(client_secret)

email = body["email"]
try:
email = canonicalise_email(body["email"])
except ValueError as e:
raise SynapseError(400, str(e))
send_attempt = body["send_attempt"]
next_link = body.get("next_link") # Optional param

Expand Down Expand Up @@ -261,10 +264,13 @@ async def on_POST(self, request):
if "medium" not in threepid or "address" not in threepid:
raise SynapseError(500, "Malformed threepid")
if threepid["medium"] == "email":
# For emails, transform the address to lowercase.
# We store all email addreses as lowercase in the DB.
# For emails, canonicalise the address.
# We store all email addresses canonicalised in the DB.
# (See add_threepid in synapse/handlers/auth.py)
threepid["address"] = threepid["address"].lower()
try:
threepid["address"] = canonicalise_email(threepid["address"])
except ValueError as e:
raise SynapseError(400, str(e))
# if using email, we must know about the email they're authing with!
threepid_user_id = await self.datastore.get_user_id_by_threepid(
threepid["medium"], threepid["address"]
Expand Down Expand Up @@ -379,7 +385,10 @@ async def on_POST(self, request):
client_secret = body["client_secret"]
assert_valid_client_secret(client_secret)

email = body["email"]
try:
email = canonicalise_email(body["email"])
except ValueError as e:
raise SynapseError(400, str(e))
send_attempt = body["send_attempt"]
next_link = body.get("next_link") # Optional param

Expand All @@ -390,9 +399,7 @@ async def on_POST(self, request):
Codes.THREEPID_DENIED,
)

existing_user_id = await self.store.get_user_id_by_threepid(
"email", body["email"]
)
existing_user_id = await self.store.get_user_id_by_threepid("email", email)

if existing_user_id is not None:
if self.config.request_token_inhibit_3pid_errors:
Expand Down
14 changes: 11 additions & 3 deletions synapse/rest/client/v2_alpha/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
from synapse.util.msisdn import phone_number_to_msisdn
from synapse.util.ratelimitutils import FederationRateLimiter
from synapse.util.stringutils import assert_valid_client_secret, random_string
from synapse.util.threepids import check_3pid_allowed
from synapse.util.threepids import canonicalise_email, check_3pid_allowed

from ._base import client_patterns, interactive_auth_handler

Expand Down Expand Up @@ -119,7 +119,10 @@ async def on_POST(self, request):
client_secret = body["client_secret"]
assert_valid_client_secret(client_secret)

email = body["email"]
try:
email = canonicalise_email(body["email"])
except ValueError as e:
raise SynapseError(400, str(e))
send_attempt = body["send_attempt"]
next_link = body.get("next_link") # Optional param

Expand All @@ -131,7 +134,7 @@ async def on_POST(self, request):
)

existing_user_id = await self.hs.get_datastore().get_user_id_by_threepid(
"email", body["email"]
"email", email
)

if existing_user_id is not None:
Expand Down Expand Up @@ -570,6 +573,11 @@ async def on_POST(self, request):
if login_type in auth_result:
medium = auth_result[login_type]["medium"]
address = auth_result[login_type]["address"]
if medium == "email":
try:
address = canonicalise_email(address)
except ValueError as e:
raise SynapseError(400, str(e))

existing_user_id = await self.store.get_user_id_by_threepid(
medium, address
Expand Down
29 changes: 29 additions & 0 deletions synapse/util/threepids.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import email.utils
import logging
import re

Expand Down Expand Up @@ -48,3 +49,31 @@ def check_3pid_allowed(hs, medium, address):
return True

return False


def canonicalise_email(address: str) -> str:
"""'Canonicalise' email address
Case folding of local part of email address and lowercase domain part
See MSC2265, https://github.com/matrix-org/matrix-doc/pull/2265

Args:
address: email address to be canonicalised
Returns:
The canonical form of the email address
Raises:
ValueError if the address could not be parsed.
"""

address = address.strip()
# Validate address
# See https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2019-11340
parsedAddress = email.utils.parseaddr(address)[1]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is the object of this? email.utils.parseaddr is not intended as a way to validate addresses, so I think it is redundant.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You use this check in sydent: matrix-org/sydent@4e1cfff
But it does not find a missing @

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no real idea why sydent does that; I wouldn't recommend cargo-culting it. Suggest removing it.


# parseaddr does not find missing "@"
regex = r"^[^@]+@[^@]+\.[^@]+$"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this feels like a very complicated way to check that a string contains a single @ character? just do address.split('@') and check the length of the result?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm going to change it. The function is to canonicalize and not to validate.

if parsedAddress == "" or not bool(re.fullmatch(regex, address)):
logger.debug("Couldn't parse email address %s", address)
raise ValueError("Unable to parse email address")

address = address.split("@")
return address[0].casefold() + "@" + address[1].lower()
99 changes: 68 additions & 31 deletions tests/rest/client/v2_alpha/test_account.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,44 +386,31 @@ def prepare(self, reactor, clock, hs):
self.email = "test@example.com"
self.url_3pid = b"account/3pid"

def test_add_email(self):
"""Test adding an email to profile
"""
client_secret = "foobar"
session_id = self._request_token(self.email, client_secret)
def test_add_valid_email(self):
self.get_success(self._add_email(self.email, self.email))

self.assertEquals(len(self.email_attempts), 1)
link = self._get_link_from_email()
def test_add_email_no_at(self):
self.get_success(self._request_token_invalid_email("address-without-at.bar"))

self._validate_token(link)
def test_add_email_two_at(self):
self.get_success(self._request_token_invalid_email("foo@foo@test.bar"))

request, channel = self.make_request(
"POST",
b"/_matrix/client/unstable/account/3pid/add",
{
"client_secret": client_secret,
"sid": session_id,
"auth": {
"type": "m.login.password",
"user": self.user_id,
"password": "test",
},
},
access_token=self.user_id_tok,
def test_add_email_bad_format(self):
self.get_success(
self._request_token_invalid_email("user@bad.example.net@good.example.com")
)

self.render(request)
self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
def test_add_email_domain_to_lower(self):
self.get_success(self._add_email("foo@TEST.BAR", "foo@test.bar"))

# Get user
request, channel = self.make_request(
"GET", self.url_3pid, access_token=self.user_id_tok,
)
self.render(request)
def test_add_email_domain_with_umlaut(self):
self.get_success(self._add_email("foo@Öumlaut.com", "foo@öumlaut.com"))

self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
self.assertEqual("email", channel.json_body["threepids"][0]["medium"])
self.assertEqual(self.email, channel.json_body["threepids"][0]["address"])
def test_add_email_address_casefold(self):
self.get_success(self._add_email("Strauß@Example.com", "strauss@example.com"))

def test_address_trim(self):
self.get_success(self._add_email(" foo@test.bar ", "foo@test.bar"))

def test_add_email_if_disabled(self):
"""Test adding email to profile when doing so is disallowed
Expand Down Expand Up @@ -616,6 +603,17 @@ def _request_token(self, email, client_secret):

return channel.json_body["sid"]

def _request_token_invalid_email(self, email, client_secret="foobar"):
request, channel = self.make_request(
"POST",
b"account/3pid/email/requestToken",
{"client_secret": client_secret, "email": email, "send_attempt": 1},
)
self.render(request)
self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"])
self.assertEqual(Codes.UNKNOWN, channel.json_body["errcode"])
self.assertEqual("Unable to parse email address", channel.json_body["error"])

def _validate_token(self, link):
# Remove the host
path = link.replace("https://example.com", "")
Expand Down Expand Up @@ -643,3 +641,42 @@ def _get_link_from_email(self):
assert match, "Could not find link in email"

return match.group(0)

def _add_email(self, request_email, expected_email):
"""Test adding an email to profile
"""
client_secret = "foobar"
session_id = self._request_token(request_email, client_secret)

self.assertEquals(len(self.email_attempts), 1)
link = self._get_link_from_email()

self._validate_token(link)

request, channel = self.make_request(
"POST",
b"/_matrix/client/unstable/account/3pid/add",
{
"client_secret": client_secret,
"sid": session_id,
"auth": {
"type": "m.login.password",
"user": self.user_id,
"password": "test",
},
},
access_token=self.user_id_tok,
)

self.render(request)
self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])

# Get user
request, channel = self.make_request(
"GET", self.url_3pid, access_token=self.user_id_tok,
)
self.render(request)

self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
self.assertEqual("email", channel.json_body["threepids"][0]["medium"])
self.assertEqual(expected_email, channel.json_body["threepids"][0]["address"])
49 changes: 49 additions & 0 deletions tests/util/test_threepids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
# Copyright 2020 Dirk Klimpel
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from synapse.util.threepids import canonicalise_email

from tests.unittest import HomeserverTestCase


class CanonicaliseEmailTests(HomeserverTestCase):
def test_no_at(self):
with self.assertRaises(ValueError):
canonicalise_email("address-without-at.bar")

def test_two_at(self):
with self.assertRaises(ValueError):
canonicalise_email("foo@foo@test.bar")

def test_bad_format(self):
with self.assertRaises(ValueError):
canonicalise_email("user@bad.example.net@good.example.com")

def test_valid_format(self):
self.assertEqual(canonicalise_email("foo@test.bar"), "foo@test.bar")

def test_domain_to_lower(self):
self.assertEqual(canonicalise_email("foo@TEST.BAR"), "foo@test.bar")

def test_domain_with_umlaut(self):
self.assertEqual(canonicalise_email("foo@Öumlaut.com"), "foo@öumlaut.com")

def test_address_casefold(self):
self.assertEqual(
canonicalise_email("Strauß@Example.com"), "strauss@example.com"
)

def test_address_trim(self):
self.assertEqual(canonicalise_email(" foo@test.bar "), "foo@test.bar")