sparks-baird · sgbaird · Jun 1, 2023 · May 26, 2023 · May 26, 2023 · May 26, 2023
diff --git a/.coveragerc b/.coveragerc
@@ -1,7 +1,7 @@
 # .coveragerc to control coverage.py
 [run]
 branch = True
-source = matbench_genmetrics
+source = matbench_genmetrics.core
 # omit = bad_file.py
 
 [paths]

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -17,6 +17,9 @@ on:
     # (useful to check if updates on dependencies break the package)
     - cron: '0 0 1,16 * *'
 
+permissions:
+  contents: read
+
 concurrency:
   group: >-
     ${{ github.workflow }}-${{ github.ref_type }}-
@@ -31,15 +34,18 @@ jobs:
     steps:
       - uses: actions/checkout@v3
         with: {fetch-depth: 0}  # deep clone for setuptools-scm
-      - uses: actions/setup-python@v3
+      - uses: actions/setup-python@v4
+        id: setup-python
         with: {python-version: "3.10"}
       - name: Run static analysis and format checkers
         run: pipx run pre-commit run --all-files --show-diff-on-failure
       - name: Build package distribution files
-        run: pipx run tox -e clean,build
+        run: >-
+          pipx run --python '${{ steps.setup-python.outputs.python-path }}'
+          tox -e clean,build
       - name: Record the path of wheel distribution
         id: wheel-distribution
-        run: echo "::set-output name=path::$(ls dist/*.whl)"
+        run: echo "path=$(ls dist/*.whl)" >> $GITHUB_OUTPUT
       - name: Store the distribution files for use in other stages
         # `tests` and `publish` will use the same pre-built distributions,
         # so we make sure to release the exact same package that was tested
@@ -63,17 +69,22 @@ jobs:
     runs-on: ${{ matrix.platform }}
     steps:
       - uses: actions/checkout@v3
-      - uses: actions/setup-python@v3
+      - uses: actions/setup-python@v4
+        id: setup-python
         with:
           python-version: ${{ matrix.python }}
       - name: Retrieve pre-built distribution files
         uses: actions/download-artifact@v3
         with: {name: python-distribution-files, path: dist/}
+      - name: Install mp-api
+        run: pip install mp-api
       - name: Run tests
+        env:
+          MP_API_KEY: ${{ secrets.MP_API_KEY }}
         run: >-
-          pipx run tox
-          --installpkg '${{ needs.prepare.outputs.wheel-distribution }}'
-          -- -rFEx --durations 10 --color yes
+          pipx run --python '${{ steps.setup-python.outputs.python-path }}'
+          tox --installpkg '${{ needs.prepare.outputs.wheel-distribution }}'
+          -- -rFEx --durations 10 --color yes  # pytest args
       - name: Generate coverage report
         run: pipx run coverage lcov -o coverage.lcov
       - name: Upload partial coverage report
@@ -98,9 +109,11 @@ jobs:
     needs: finalize
     if: ${{ github.event_name == 'push' && contains(github.ref, 'refs/tags/') }}
     runs-on: ubuntu-latest
+    permissions:
+      contents: write
     steps:
       - uses: actions/checkout@v3
-      - uses: actions/setup-python@v3
+      - uses: actions/setup-python@v4
         with: {python-version: "3.10"}
       - name: Retrieve pre-built distribution files
         uses: actions/download-artifact@v3

diff --git a/.gitignore b/.gitignore
@@ -52,5 +52,12 @@ MANIFEST
 .venv*/
 .conda*/
 .python-version
+
 src/matbench_genmetrics/cdvae/utils.py
 matbench-genmetrics/meta.yaml
+
+pytest.ini
+src/matbench_genmetrics/mp_time_split/utils/mp_time_summary.json
+src/matbench_genmetrics/mp_time_split/utils/mp_time_summary.json.gz
+mp_time_split/meta.yaml
+mp-time-split/meta.yaml
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -16,8 +16,12 @@ sphinx:
 formats:
   - pdf
 
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.11"
+
 python:
-  version: 3.8
   install:
     - requirements: docs/requirements.txt
     - {path: ., method: pip}
diff --git a/AUTHORS.md b/AUTHORS.md
@@ -1,3 +1,4 @@
 # Contributors
 
 * sgbaird [sterling.baird@utah.edu](mailto:sterling.baird@utah.edu)
+* JosephMontoya-TRI
diff --git a/LICENSE.txt b/LICENSE.txt
@@ -1,6 +1,6 @@
 The MIT License (MIT)
 
-Copyright (c) 2022 sgbaird
+Copyright (c) 2023 sgbaird
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/README.md b/README.md
@@ -1,196 +1,55 @@
 [![Project generated with PyScaffold](https://img.shields.io/badge/-PyScaffold-005CA0?logo=pyscaffold)](https://pyscaffold.org/)
+<!-- These are examples of badges you might also want to add to your README. Update the URLs accordingly.
+[![Built Status](https://api.cirrus-ci.com/github/<USER>/matbench-genmetrics.svg?branch=main)](https://cirrus-ci.com/github/<USER>/matbench-genmetrics)
 [![ReadTheDocs](https://readthedocs.org/projects/matbench-genmetrics/badge/?version=latest)](https://matbench-genmetrics.readthedocs.io/en/stable/)
-[![Coveralls](https://img.shields.io/coveralls/github/sparks-baird/matbench-genmetrics/main.svg)](https://coveralls.io/r/sparks-baird/matbench-genmetrics)
+[![Coveralls](https://img.shields.io/coveralls/github/<USER>/matbench-genmetrics/main.svg)](https://coveralls.io/r/<USER>/matbench-genmetrics)
 [![PyPI-Server](https://img.shields.io/pypi/v/matbench-genmetrics.svg)](https://pypi.org/project/matbench-genmetrics/)
 [![Conda-Forge](https://img.shields.io/conda/vn/conda-forge/matbench-genmetrics.svg)](https://anaconda.org/conda-forge/matbench-genmetrics)
-![PyPI - Downloads](https://img.shields.io/pypi/dm/matbench-genmetrics)
-![Lines of code](https://img.shields.io/tokei/lines/github/sparks-baird/matbench-genmetrics)
-<!-- These are examples of badges you might also want to add to your README. Update the URLs accordingly.
-[![Built Status](https://api.cirrus-ci.com/github/<USER>/matbench-genmetrics.svg?branch=main)](https://cirrus-ci.com/github/<USER>/matbench-genmetrics)
 [![Monthly Downloads](https://pepy.tech/badge/matbench-genmetrics/month)](https://pepy.tech/project/matbench-genmetrics)
 [![Twitter](https://img.shields.io/twitter/url/http/shields.io.svg?style=social&label=Twitter)](https://twitter.com/matbench-genmetrics)
 -->
-> **NOTE: This is a WIP repository (as of 2022-08-06) being developed in parallel with [`xtal2png`](https://github.com/sparks-baird/xtal2png) and [`mp-time-split`](https://github.com/sparks-baird/mp-time-split). Feedback and contributions welcome!** This is not an official repository of Matbench, but may be incorporated into Matbench at a later time (see https://github.com/materialsproject/matbench/issues/150).
-
-# matbench-genmetrics [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/sparks-baird/matbench-genmetrics/blob/main/notebooks/1.0-matbench-genmetrics-basic.ipynb)
-> Generative materials benchmarking metrics, inspired by [guacamol](https://www.benevolent.com/guacamol) and [CDVAE](https://github.com/txie-93/cdvae).
-
-This repository provides standardized benchmarks for benchmarking generative models for
-crystal structure. Each benchmark has a fixed dataset, a predefined split, and a notion
-of best (i.e. metric) associated with it.
-
-<p align="center"><img src="https://github.com/sparks-baird/matbench-genmetrics/raw/main/reports/figures/metrics.png" width=450></p>
-
-## Getting Started
-
-Installation, a dummy example, output metrics for the example, and descriptions of the benchmark metrics.
-
-### Installation
-
-Create a conda environment with the `matbench-genmetrics` package installed from the
-`conda-forge` channel. Then activate the environment.
-
-> **NOTE: not available on conda-forge as of 2022-07-30, recipe under review by
-> conda-forge team. So use `pip install matbench-genmetrics` for now
-
-```bash
-conda create --name matbench-genmetrics --channel conda-forge python==3.9.* matbench-genmetrics
-conda activate matbench-genmetrics
-```
-
-> NOTE: It doesn't have to be Python 3.9; you can remove `python==3.9.*` altogether or
-change this to e.g. `python==3.8.*`. See [Advanced Installation](##Advanced-Installation)
-
-### Example
-
-> NOTE: be sure to set `dummy=False` for the real/full benchmark run. MPTSMetrics10 is
-> intended for fast prototyping and debugging, as it assumes only 10 generated structures.
-
-```python
->>> from tqdm import tqdm
->>> from mp_time_split.utils.gen import DummyGenerator
->>> from matbench_genmetrics.core import MPTSMetrics10, MPTSMetrics100, MPTSMetrics1000, MPTSMetrics10000
->>> mptm = MPTSMetrics10(dummy=True)
->>> for fold in mptm.folds:
->>>     train_val_inputs = mptm.get_train_and_val_data(fold)
->>>     dg = DummyGenerator()
->>>     dg.fit(train_val_inputs)
->>>     gen_structures = dg.gen(n=mptm.num_gen)
->>>     mptm.evaluate_and_record(fold, gen_structures)
-```
-
-### Output
-
-```python
-print(mptm.recorded_metrics)
-```
-
-```python
-{
-    0: {
-        "validity": 0.4375,
-        "coverage": 0.0,
-        "novelty": 1.0,
-        "uniqueness": 0.9777777777777777,
-    },
-    1: {
-        "validity": 0.4390681003584229,
-        "coverage": 0.0,
-        "novelty": 1.0,
-        "uniqueness": 0.9333333333333333,
-    },
-    2: {
-        "validity": 0.4401197604790419,
-        "coverage": 0.0,
-        "novelty": 1.0,
-        "uniqueness": 0.8222222222222222,
-    },
-    3: {
-        "validity": 0.4408740359897172,
-        "coverage": 0.0,
-        "novelty": 1.0,
-        "uniqueness": 0.8444444444444444,
-    },
-    4: {
-        "validity": 0.4414414414414415,
-        "coverage": 0.0,
-        "novelty": 1.0,
-        "uniqueness": 0.9111111111111111,
-    },
-}
-```
-
-### Metrics
-
-| Metric     | Description                                                                                                                                                                                                                                                             |
-| ---------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| Validity   | One minus (Wasserstein distance between distribution of space group numbers for train and generated structures divided by distance of dummy case between train and `space_group_number == 1`). See also <https://github.com/sparks-baird/matbench-genmetrics/issues/44> |
-| Coverage   | Match counts between held-out test structures and generated structures divided by number of test structures ("predict the future").                                                                                                                                     |
-| Novelty    | One minus (match counts between train structures and generated structures divided by number of generated structures).                                                                                                                                                   |
-| Uniqueness | One minus (non-self-comparing match counts within generated structures divided by total possible non-self-comparing matches).                                                                                                                                           |
-
-A match is when <code><a href="https://pymatgen.org/pymatgen.analysis.structure_matcher.html#pymatgen.analysis.structure_matcher.StructureMatcher">StructureMatcher</a>(stol=0.5, ltol=0.3, angle_tol=10.0).fit(s1, s2)</code> evaluates to `True`.
-
-
-
-## Advanced Installation
 
-### Anaconda (`conda`) installation (recommended)
+# matbench-genmetrics
 
-(2022-07-30, conda-forge installation pending, fallback to `pip install matbench-genmetrics` as separate command)
+> Generative materials benchmarking metrics, inspired by CDVAE.
 
-Create and activate a new `conda` environment named `matbench-genmetrics` (`-n`) that will search for and install the `matbench-genmetrics` package from the `conda-forge` Anaconda channel (`-c`).
+A longer description of your project goes here...
 
-```bash
-conda env create -n matbench-genmetrics -c conda-forge matbench-genmetrics
-conda activate matbench-genmetrics
-```
-
-Alternatively, in an already activated environment:
-
-```bash
-conda install -c conda-forge matbench-genmetrics
-```
-
-If you run into conflicts with packages you are integrating with `matbench-genmetrics`, please try installing all packages in a single line of code (or two if mixing `conda` and `pip` packages in the same environment) and installing with `mamba` ([source](https://stackoverflow.com/a/69137255/13697228)).
-
-### PyPI (`pip`) installation
-
-Create and activate a new `conda` environment named `matbench-genmetrics` (`-n`) with `python==3.9.*` or your preferred Python version, then install `matbench-genmetrics` via `pip`.
-
-```bash
-conda create -n matbench-genmetrics python==3.9.*
-conda activate matbench-genmetrics
-pip install matbench-genmetrics
-```
-
-## Editable installation
+## Installation
 
 In order to set up the necessary environment:
 
-1. clone and enter the repository via:
-
-   ```bash
-   git clone https://github.com/sparks-baird/matbench-genmetrics.git
-   cd matbench-genmetrics
+1. review and uncomment what you need in `environment.yml` and create an environment `matbench-genmetrics` with the help of [conda]:
+   ```
+   conda env create -f environment.yml
+   ```
+2. activate the new environment with:
    ```
-
-2. create and activate a new conda environment (optional, but recommended)
-
-   ```bash
-   conda env create --name matbench-genmetrics python==3.9.*
    conda activate matbench-genmetrics
    ```
 
-3. perform an editable (`-e`) installation in the current directory (`.`):
-
-   ```bash
-   pip install -e .
-   ```
+> **_NOTE:_**  The conda environment will have matbench-genmetrics installed in editable mode.
+> Some changes, e.g. in `setup.cfg`, might require you to run `pip install -e .` again.
 
-> **_NOTE:_**  Some changes, e.g. in `setup.cfg`, might require you to run `pip install -e .` again.
 
 Optional and needed only once after `git clone`:
 
 3. install several [pre-commit] git hooks with:
-
    ```bash
    pre-commit install
    # You might also want to run `pre-commit autoupdate`
    ```
-
    and checkout the configuration under `.pre-commit-config.yaml`.
    The `-n, --no-verify` flag of `git commit` can be used to deactivate pre-commit hooks temporarily.
 
 4. install [nbstripout] git hooks to remove the output cells of committed notebooks with:
-
    ```bash
    nbstripout --install --attributes notebooks/.gitattributes
    ```
-
    This is useful to avoid large diffs due to plots in your notebooks.
    A simple `nbstripout --uninstall` will revert these changes.
 
+
 Then take a look into the `scripts` and `notebooks` folders.
 
 ## Dependency Management & Reproducibility
@@ -199,21 +58,17 @@ Then take a look into the `scripts` and `notebooks` folders.
    in `setup.cfg` if you want to ship and install your package via `pip` later on.
 2. Create concrete dependencies as `environment.lock.yml` for the exact reproduction of your
    environment with:
-
    ```bash
    conda env export -n matbench-genmetrics -f environment.lock.yml
    ```
-
    For multi-OS development, consider using `--no-builds` during the export.
 3. Update your current environment with respect to a new `environment.lock.yml` using:
-
    ```bash
    conda env update -f environment.lock.yml --prune
    ```
-
 ## Project Organization
 
-```txt
+```
 ├── AUTHORS.md              <- List of developers and maintainers.
 ├── CHANGELOG.md            <- Changelog to keep track of new features and fixes.
 ├── CONTRIBUTING.md         <- Guidelines for contributing to this project.
@@ -244,7 +99,7 @@ Then take a look into the `scripts` and `notebooks` folders.
 ├── setup.py                <- [DEPRECATED] Use `python setup.py develop` to install for
 │                              development or `python setup.py bdist_wheel` to build.
 ├── src
-│   └── matbench_genmetrics <- Actual Python package where the main functionality goes.
+│   └── core                <- Actual Python package where the main functionality goes.
 ├── tests                   <- Unit tests which can be run with `pytest`.
 ├── .coveragerc             <- Configuration for coverage reports of unit tests.
 ├── .isort.cfg              <- Configuration for git hook that sorts imports.
@@ -255,7 +110,7 @@ Then take a look into the `scripts` and `notebooks` folders.
 
 ## Note
 
-This project has been set up using [PyScaffold] 4.2.2.post1.dev2+ge50b5e1 and the [dsproject extension] 0.7.2.post1.dev2+geb5d6b6.
+This project has been set up using [PyScaffold] 4.4.1 and the [dsproject extension] 0.7.2.
 
 [conda]: https://docs.conda.io/
 [pre-commit]: https://pre-commit.com/