Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a crypto handler #26

Merged
merged 9 commits into from
Oct 12, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 2 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,24 +24,14 @@ pip install matrix-content-scanner
Copy and edit the [sample configuration file](https://github.com/matrix-org/matrix-content-scanner-python/blob/main/config.sample.yaml).
Each key is documented in this file.

Before running the Matrix Content Scanner for the first time (if you are not [migrating
from the legacy Matrix Content Scanner](#migrating-from-the-legacy-matrix-content-scanner)),
run (from within your virtual environment if one was created):
Then run the content scanner (from within your virtual environment if one was created):

```commandline
python -m matrix_content_scanner.mcs -c CONFIG_FILE --generate-secrets
python -m matrix_content_scanner.mcs -c CONFIG_FILE
```

Where `CONFIG_FILE` is the path to your configuration file.

This will generate the cryptographic secrets required for the content scanner to run.

Then run the content scanner:

```commandline
python -m matrix_content_scanner.mcs -c CONFIG_FILE
```

## API

See [the API documentation](/docs/api.md) for information about how clients are expected
Expand Down
2 changes: 0 additions & 2 deletions config.sample.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,6 @@ download:
crypto:
# The path to the Olm pickle file. This file contains the key pair to use when
# encrypting and decrypting encrypted POST request bodies.
# This file needs to be created with the --generate-secrets command line argument
# for the Matrix Content Scanner to run, see README.md for more information.
babolivier marked this conversation as resolved.
Show resolved Hide resolved
# Required.
pickle_path: "./pickle"

Expand Down
88 changes: 40 additions & 48 deletions matrix_content_scanner/crypto.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

from olm.pk import PkDecryption, PkDecryptionError, PkMessage

from matrix_content_scanner.config import MatrixContentScannerConfig
from matrix_content_scanner.utils.constants import ErrCode
from matrix_content_scanner.utils.errors import ConfigError, ContentScannerRestError
from matrix_content_scanner.utils.types import JsonDict
Expand All @@ -40,59 +39,52 @@ def __init__(self, mcs: "MatrixContentScanner") -> None:
try:
with open(path, "r") as fp:
pickle = fp.read()
except OSError as e:
raise ConfigError(
"Failed to open the pickle file configured at crypto.pickle_path (%s): %s"
% (path, e)
)

# Create a PkDecryption object with the content and key.
try:
self._decryptor: PkDecryption = PkDecryption.from_pickle(
pickle=pickle.encode("ascii"),
passphrase=key,
)
except PkDecryptionError as e:
# If we failed to extract the key pair from the pickle file, it's likely
# because the key is incorrect, or there's an issue with the file's content.
raise ConfigError(
"Configured value for crypto.pickle_key is incorrect or pickle file is"
" corrupted (Olm error code: %s)" % e
)

logger.info("Loaded Olm key pair from pickle file %s", path)

self.public_key = self._decryptor.public_key

@staticmethod
def generate_and_store_key_pair(config: MatrixContentScannerConfig) -> None:
"""Generates a new Olm key pair, and store it in the configured pickle file.
# Create a PkDecryption object with the content and key.
try:
self._decryptor: PkDecryption = PkDecryption.from_pickle(
pickle=pickle.encode("ascii"),
passphrase=key,
)
except PkDecryptionError as e:
# If we failed to extract the key pair from the pickle file, it's likely
# because the key is incorrect, or there's an issue with the file's
# content.
raise ConfigError(
"Configured value for crypto.pickle_key is incorrect or pickle file"
" is corrupted (Olm error code: %s)" % e
)

Args:
config: The content scanner config.
logger.info("Loaded Olm key pair from pickle file %s", path)

Raises:
ConfigError if we failed to write the file.
"""
path = config.crypto.pickle_path
except OSError as e:
if not isinstance(e, FileNotFoundError):
raise ConfigError(
"Failed to read the pickle file at the location configured for"
" crypto.pickle_path (%s): %s" % (path, e)
)
babolivier marked this conversation as resolved.
Show resolved Hide resolved

logger.info(
"Generating a new Olm key pair and storing it in pickle file %s", path
)
logger.info(
"Pickle file not found, generating a new Olm key pair and storing it in"
" pickle file %s",
path,
)

# Generate a new key pair and turns it into a pickle.
decryptor = PkDecryption()
pickle_bytes = decryptor.pickle(passphrase=config.crypto.pickle_key)
# Generate a new key pair and turns it into a pickle.
self._decryptor = PkDecryption()
pickle_bytes = self._decryptor.pickle(passphrase=key)

# Try to write the pickle's content into a file.
try:
with open(path, "w+") as fp:
fp.write(pickle_bytes.decode("ascii"))
except OSError as e:
raise ConfigError(
"Failed to write the pickle file at the location configured for"
" crypto.pickle_path (%s): %s" % (path, e)
babolivier marked this conversation as resolved.
Show resolved Hide resolved
)

# Try to write the pickle's content into a file.
try:
with open(path, "w+") as fp:
fp.write(pickle_bytes.decode("ascii"))
except OSError as e:
raise ConfigError(
"Failed to write the pickle file at the location configured for"
" crypto.pickle_path (%s): %s" % (path, e)
)
self.public_key = self._decryptor.public_key

def decrypt_body(self, ciphertext: str, mac: str, ephemeral: str) -> JsonDict:
"""Decrypts an Olm-encrypted body.
Expand Down
25 changes: 4 additions & 21 deletions matrix_content_scanner/mcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def crypto_handler(self) -> CryptoHandler:

def start(self) -> None:
"""Start the HTTP server and start the reactor."""
setup_logging()
http_server = HTTPServer(self)
http_server.start()
self.reactor.run()
Expand Down Expand Up @@ -92,8 +93,6 @@ def setup_logging() -> None:


if __name__ == "__main__":
setup_logging()

parser = argparse.ArgumentParser(
description="A web service for scanning media hosted by a Matrix media repository."
)
Expand All @@ -103,11 +102,6 @@ def setup_logging() -> None:
required=True,
help="The YAML configuration file.",
)
parser.add_argument(
"--generate-secrets",
action="store_true",
help="Generate secrets such as cryptographic key pairs needed for the content scanner to run.",
)

args = parser.parse_args()

Expand All @@ -117,29 +111,18 @@ def setup_logging() -> None:
except (ConfigError, ScannerError) as e:
# If there's an error reading the file, print it and exit without raising so we
# don't confuse/annoy the user with an unnecessary stack trace.
logger.error("Failed to read configuration file: %s", e)
print("Failed to read configuration file: %s" % e, file=sys.stderr)
sys.exit(1)

# If required by the command-line arguments, generate and store the secrets needed for
# the program to run.
if args.generate_secrets:
try:
CryptoHandler.generate_and_store_key_pair(cfg)
except ConfigError as e:
logger.error("Failed to generate secrets: %s", e)
sys.exit(1)

sys.exit(0)

# Create the content scanner.
mcs = MatrixContentScanner(cfg)

# Construct the crypto handler early on, so we can make sure we can load the Olm key
# pair from the pickle file.
# pair from the pickle file (or write it if it doesn't already exist).
try:
_ = mcs.crypto_handler
except ConfigError as e:
logger.error(e)
print(e, file=sys.stderr)
sys.exit(1)

# Start the content scanner.
Expand Down
4 changes: 0 additions & 4 deletions tests/testutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from twisted.web.http_headers import Headers

from matrix_content_scanner.config import MatrixContentScannerConfig
from matrix_content_scanner.crypto import CryptoHandler
from matrix_content_scanner.mcs import MatrixContentScanner
from matrix_content_scanner.utils.types import JsonDict

Expand Down Expand Up @@ -126,7 +125,4 @@ def get_content_scanner(config: Optional[JsonDict] = None) -> MatrixContentScann

parsed_config = MatrixContentScannerConfig(default_config)

# Generate the Olm key pair and store them in a pickle file.
CryptoHandler.generate_and_store_key_pair(parsed_config)

return MatrixContentScanner(parsed_config)