Skip to content

Commit

Permalink
Add a crypto handler (#26)
Browse files Browse the repository at this point in the history
To handle requests with an Olm-encrypted body, see
https://github.com/matrix-org/matrix-content-scanner-python/blob/main/docs/api.md#encrypted-post-body

Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
  • Loading branch information
babolivier and richvdh authored Oct 12, 2022
1 parent b7563c3 commit fb12651
Show file tree
Hide file tree
Showing 7 changed files with 195 additions and 8 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ deployment instructions) is the configuration format:
* `acceptedMimeType` is renamed `scan.allowed_mimetypes`
* `requestHeader` is renamed `download.additional_headers` and turned into a dictionary.

Note that the format of the cryptographic pickle file and key are compatible between this
project and the legacy Matrix Content Scanner. If no file exist at that path one will be
created automatically.
Note that the format of the cryptographic pickle file and key are compatible between
this project and the legacy Matrix Content Scanner. If no file exist at that path one will
be created automatically.

## Development

Expand Down
6 changes: 4 additions & 2 deletions config.sample.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,12 @@ download:

# Configuration for decrypting Olm-encrypted request bodies.
crypto:
# The path to the Olm pickle file.
# The path to the Olm pickle file. This file contains the key pair to use when
# encrypting and decrypting encrypted POST request bodies.
# A new keypair will be created at startup if the pickle file doesn't already exist.
# Required.
pickle_path: "./pickle"

# The key to the pickle.
# The key to use to decode the pickle file.
# Required.
pickle_key: "this_is_a_secret"
117 changes: 117 additions & 0 deletions matrix_content_scanner/crypto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Copyright 2022 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import logging
from typing import TYPE_CHECKING

from olm.pk import PkDecryption, PkDecryptionError, PkMessage

from matrix_content_scanner.utils.constants import ErrCode
from matrix_content_scanner.utils.errors import ConfigError, ContentScannerRestError
from matrix_content_scanner.utils.types import JsonDict

if TYPE_CHECKING:
from matrix_content_scanner.mcs import MatrixContentScanner


logger = logging.getLogger(__name__)


class CryptoHandler:
"""Handler for handling Olm-encrypted request bodies."""

def __init__(self, mcs: "MatrixContentScanner") -> None:
key = mcs.config.crypto.pickle_key
path = mcs.config.crypto.pickle_path

# Try reading the pickle file from disk.
try:
with open(path, "r") as fp:
pickle = fp.read()

# Create a PkDecryption object with the content and key.
try:
self._decryptor: PkDecryption = PkDecryption.from_pickle(
pickle=pickle.encode("ascii"),
passphrase=key,
)
except PkDecryptionError as e:
# If we failed to extract the key pair from the pickle file, it's likely
# because the key is incorrect, or there's an issue with the file's
# content.
raise ConfigError(
"Configured value for crypto.pickle_key is incorrect or pickle file"
f" is corrupted (Olm error code: {e})"
)

logger.info("Loaded Olm key pair from pickle file %s", path)

except FileNotFoundError:
logger.info(
"Pickle file not found, generating a new Olm key pair and storing it in"
" pickle file %s",
path,
)

# Generate a new key pair and turns it into a pickle.
self._decryptor = PkDecryption()
pickle_bytes = self._decryptor.pickle(passphrase=key)

# Try to write the pickle's content into a file.
try:
with open(path, "w+") as fp:
fp.write(pickle_bytes.decode("ascii"))
except OSError as e:
raise ConfigError(
"Failed to write the pickle file at the location configured for"
f" crypto.pickle_path ({path}): {e}"
)

except OSError as e:
raise ConfigError(
"Failed to read the pickle file at the location configured for"
f" crypto.pickle_path ({path}): {e}"
)

self.public_key = self._decryptor.public_key

def decrypt_body(self, ciphertext: str, mac: str, ephemeral: str) -> JsonDict:
"""Decrypts an Olm-encrypted body.
Args:
ciphertext: The encrypted body's ciphertext.
mac: The encrypted body's MAC.
ephemeral: The encrypted body's ephemeral key.
Returns:
The decrypted body, parsed as JSON.
"""
try:
decrypted = self._decryptor.decrypt(
message=PkMessage(
ephemeral_key=ephemeral,
mac=mac,
ciphertext=ciphertext,
)
)
except PkDecryptionError as e:
logger.error("Failed to decrypt encrypted body: %s", e)
raise ContentScannerRestError(
http_status=400,
reason=ErrCode.FAILED_TO_DECRYPT,
info=str(e),
)

# We know that `decrypted` parses as a JsonDict.
return json.loads(decrypted) # type: ignore[no-any-return]
19 changes: 17 additions & 2 deletions matrix_content_scanner/mcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

from matrix_content_scanner import logutils
from matrix_content_scanner.config import MatrixContentScannerConfig
from matrix_content_scanner.crypto import CryptoHandler
from matrix_content_scanner.httpserver import HTTPServer
from matrix_content_scanner.scanner.file_downloader import FileDownloader
from matrix_content_scanner.scanner.scanner import Scanner
Expand Down Expand Up @@ -58,6 +59,10 @@ def file_downloader(self) -> FileDownloader:
def scanner(self) -> Scanner:
return Scanner(self)

@cached_property
def crypto_handler(self) -> CryptoHandler:
return CryptoHandler(self)

def start(self) -> None:
"""Start the HTTP server and start the reactor."""
setup_logging()
Expand Down Expand Up @@ -106,9 +111,19 @@ def setup_logging() -> None:
except (ConfigError, ScannerError) as e:
# If there's an error reading the file, print it and exit without raising so we
# don't confuse/annoy the user with an unnecessary stack trace.
logger.error("Failed to read configuration file: %s", e)
print("Failed to read configuration file: %s" % e, file=sys.stderr)
sys.exit(1)

# Start the content scanner.
# Create the content scanner.
mcs = MatrixContentScanner(cfg)

# Construct the crypto handler early on, so we can make sure we can load the Olm key
# pair from the pickle file (or write it if it doesn't already exist).
try:
_ = mcs.crypto_handler
except ConfigError as e:
print(e, file=sys.stderr)
sys.exit(1)

# Start the content scanner.
mcs.start()
43 changes: 43 additions & 0 deletions tests/test_crypto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright 2022 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import unittest

from olm.pk import PkEncryption

from tests.testutils import get_content_scanner


class CryptoHandlerTestCase(unittest.TestCase):
def setUp(self) -> None:
self.crypto_handler = get_content_scanner().crypto_handler

def test_decrypt(self) -> None:
"""Tests that an Olm-encrypted payload is successfully decrypted."""
payload = {"foo": "bar"}

# Encrypt the payload with PkEncryption.
pke = PkEncryption(self.crypto_handler.public_key)
encrypted = pke.encrypt(json.dumps(payload))

# Decrypt the payload with the crypto handler.
decrypted = self.crypto_handler.decrypt_body(
encrypted.ciphertext,
encrypted.mac,
encrypted.ephemeral_key,
)

# Check that the decrypted payload is the same as the original one before
# encryption.
self.assertEqual(decrypted, payload)
4 changes: 3 additions & 1 deletion tests/testutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,4 +123,6 @@ def get_content_scanner(config: Optional[JsonDict] = None) -> MatrixContentScann
# all required settings in that section.
default_config.update(config)

return MatrixContentScanner(MatrixContentScannerConfig(default_config))
parsed_config = MatrixContentScannerConfig(default_config)

return MatrixContentScanner(parsed_config)
8 changes: 8 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,13 @@ commands =

extras = dev

# The current version of python-olm that's on PyPI does not include a types marker.
# Hopefully that's something we can fix at some point, but in the mean time let's not
# block things on this and instead use the wheels on gitlab.matrix.org's repository (which
# do have a type marker). We use --index-url (and not --extra-index-url) so that pip does
# not try to download the python-olm that's on pypi.org. This is fine because GitLab will
# redirect requests for packages it doesn't know about to pypi.org.
install_command = python -m pip install --index-url=https://gitlab.matrix.org/api/v4/projects/27/packages/pypi/simple {opts} {packages}

commands =
mypy matrix_content_scanner tests

0 comments on commit fb12651

Please sign in to comment.