Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a crypto handler #26

Merged
merged 9 commits into from
Oct 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ deployment instructions) is the configuration format:
* `acceptedMimeType` is renamed `scan.allowed_mimetypes`
* `requestHeader` is renamed `download.additional_headers` and turned into a dictionary.

Note that the format of the cryptographic pickle file and key are compatible between this
project and the legacy Matrix Content Scanner. If no file exist at that path one will be
created automatically.
Note that the format of the cryptographic pickle file and key are compatible between
this project and the legacy Matrix Content Scanner. If no file exist at that path one will
be created automatically.
babolivier marked this conversation as resolved.
Show resolved Hide resolved

## Development

Expand Down
6 changes: 4 additions & 2 deletions config.sample.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,12 @@ download:

# Configuration for decrypting Olm-encrypted request bodies.
crypto:
# The path to the Olm pickle file.
# The path to the Olm pickle file. This file contains the key pair to use when
# encrypting and decrypting encrypted POST request bodies.
# A new keypair will be created at startup if the pickle file doesn't already exist.
# Required.
pickle_path: "./pickle"

# The key to the pickle.
# The key to use to decode the pickle file.
# Required.
pickle_key: "this_is_a_secret"
117 changes: 117 additions & 0 deletions matrix_content_scanner/crypto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Copyright 2022 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import logging
from typing import TYPE_CHECKING

from olm.pk import PkDecryption, PkDecryptionError, PkMessage

from matrix_content_scanner.utils.constants import ErrCode
from matrix_content_scanner.utils.errors import ConfigError, ContentScannerRestError
from matrix_content_scanner.utils.types import JsonDict

if TYPE_CHECKING:
from matrix_content_scanner.mcs import MatrixContentScanner


logger = logging.getLogger(__name__)


class CryptoHandler:
"""Handler for handling Olm-encrypted request bodies."""

def __init__(self, mcs: "MatrixContentScanner") -> None:
key = mcs.config.crypto.pickle_key
path = mcs.config.crypto.pickle_path

# Try reading the pickle file from disk.
try:
with open(path, "r") as fp:
pickle = fp.read()

# Create a PkDecryption object with the content and key.
try:
self._decryptor: PkDecryption = PkDecryption.from_pickle(
pickle=pickle.encode("ascii"),
passphrase=key,
)
except PkDecryptionError as e:
# If we failed to extract the key pair from the pickle file, it's likely
# because the key is incorrect, or there's an issue with the file's
# content.
raise ConfigError(
"Configured value for crypto.pickle_key is incorrect or pickle file"
f" is corrupted (Olm error code: {e})"
)

logger.info("Loaded Olm key pair from pickle file %s", path)

except FileNotFoundError:
logger.info(
"Pickle file not found, generating a new Olm key pair and storing it in"
" pickle file %s",
path,
)

# Generate a new key pair and turns it into a pickle.
self._decryptor = PkDecryption()
pickle_bytes = self._decryptor.pickle(passphrase=key)

# Try to write the pickle's content into a file.
try:
with open(path, "w+") as fp:
fp.write(pickle_bytes.decode("ascii"))
except OSError as e:
raise ConfigError(
"Failed to write the pickle file at the location configured for"
f" crypto.pickle_path ({path}): {e}"
)

except OSError as e:
raise ConfigError(
"Failed to read the pickle file at the location configured for"
f" crypto.pickle_path ({path}): {e}"
)

self.public_key = self._decryptor.public_key

def decrypt_body(self, ciphertext: str, mac: str, ephemeral: str) -> JsonDict:
"""Decrypts an Olm-encrypted body.

Args:
ciphertext: The encrypted body's ciphertext.
mac: The encrypted body's MAC.
ephemeral: The encrypted body's ephemeral key.

Returns:
The decrypted body, parsed as JSON.
"""
try:
decrypted = self._decryptor.decrypt(
message=PkMessage(
ephemeral_key=ephemeral,
mac=mac,
ciphertext=ciphertext,
)
)
babolivier marked this conversation as resolved.
Show resolved Hide resolved
except PkDecryptionError as e:
logger.error("Failed to decrypt encrypted body: %s", e)
raise ContentScannerRestError(
http_status=400,
reason=ErrCode.FAILED_TO_DECRYPT,
info=str(e),
)

# We know that `decrypted` parses as a JsonDict.
return json.loads(decrypted) # type: ignore[no-any-return]
19 changes: 17 additions & 2 deletions matrix_content_scanner/mcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

from matrix_content_scanner import logutils
from matrix_content_scanner.config import MatrixContentScannerConfig
from matrix_content_scanner.crypto import CryptoHandler
from matrix_content_scanner.httpserver import HTTPServer
from matrix_content_scanner.scanner.file_downloader import FileDownloader
from matrix_content_scanner.scanner.scanner import Scanner
Expand Down Expand Up @@ -58,6 +59,10 @@ def file_downloader(self) -> FileDownloader:
def scanner(self) -> Scanner:
return Scanner(self)

@cached_property
def crypto_handler(self) -> CryptoHandler:
return CryptoHandler(self)

def start(self) -> None:
"""Start the HTTP server and start the reactor."""
setup_logging()
Expand Down Expand Up @@ -106,9 +111,19 @@ def setup_logging() -> None:
except (ConfigError, ScannerError) as e:
# If there's an error reading the file, print it and exit without raising so we
# don't confuse/annoy the user with an unnecessary stack trace.
logger.error("Failed to read configuration file: %s", e)
print("Failed to read configuration file: %s" % e, file=sys.stderr)
sys.exit(1)

# Start the content scanner.
# Create the content scanner.
mcs = MatrixContentScanner(cfg)

# Construct the crypto handler early on, so we can make sure we can load the Olm key
# pair from the pickle file (or write it if it doesn't already exist).
try:
_ = mcs.crypto_handler
except ConfigError as e:
print(e, file=sys.stderr)
sys.exit(1)

# Start the content scanner.
mcs.start()
43 changes: 43 additions & 0 deletions tests/test_crypto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright 2022 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import unittest

from olm.pk import PkEncryption

from tests.testutils import get_content_scanner


class CryptoHandlerTestCase(unittest.TestCase):
def setUp(self) -> None:
self.crypto_handler = get_content_scanner().crypto_handler

def test_decrypt(self) -> None:
"""Tests that an Olm-encrypted payload is successfully decrypted."""
payload = {"foo": "bar"}

# Encrypt the payload with PkEncryption.
pke = PkEncryption(self.crypto_handler.public_key)
encrypted = pke.encrypt(json.dumps(payload))

# Decrypt the payload with the crypto handler.
decrypted = self.crypto_handler.decrypt_body(
encrypted.ciphertext,
encrypted.mac,
encrypted.ephemeral_key,
)

# Check that the decrypted payload is the same as the original one before
# encryption.
self.assertEqual(decrypted, payload)
4 changes: 3 additions & 1 deletion tests/testutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,4 +123,6 @@ def get_content_scanner(config: Optional[JsonDict] = None) -> MatrixContentScann
# all required settings in that section.
default_config.update(config)

return MatrixContentScanner(MatrixContentScannerConfig(default_config))
parsed_config = MatrixContentScannerConfig(default_config)

return MatrixContentScanner(parsed_config)
8 changes: 8 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,13 @@ commands =

extras = dev

# The current version of python-olm that's on PyPI does not include a types marker.
# Hopefully that's something we can fix at some point, but in the mean time let's not
# block things on this and instead use the wheels on gitlab.matrix.org's repository (which
# do have a type marker). We use --index-url (and not --extra-index-url) so that pip does
# not try to download the python-olm that's on pypi.org. This is fine because GitLab will
# redirect requests for packages it doesn't know about to pypi.org.
install_command = python -m pip install --index-url=https://gitlab.matrix.org/api/v4/projects/27/packages/pypi/simple {opts} {packages}
Comment on lines +36 to +42
Copy link
Contributor Author

@babolivier babolivier Oct 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This means we don't run mypy with the same version of Olm as the one we actually use for running the content scanner and its tests (pypi -> 3.1.3; gitlab -> 3.2.13), which seems to be fine but isn't ideal.


commands =
mypy matrix_content_scanner tests