diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml new file mode 100644 index 0000000..f884711 --- /dev/null +++ b/.github/workflows/CI.yml @@ -0,0 +1,138 @@ +# This file is autogenerated by maturin v1.5.1 +# To update, run +# +# maturin generate-ci github +# +name: CI + +on: + push: + branches: + - main + - master + tags: + - '*' + pull_request: + workflow_dispatch: + +permissions: + contents: read + +jobs: + linux: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: ubuntu-latest + target: x86_64 + - runner: ubuntu-latest + target: x86 + - runner: ubuntu-latest + target: aarch64 + - runner: ubuntu-latest + target: armv7 + - runner: ubuntu-latest + target: s390x + - runner: ubuntu-latest + target: ppc64le + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + manylinux: auto + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-linux-${{ matrix.platform.target }} + path: dist + + windows: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: windows-latest + target: x64 + - runner: windows-latest + target: x86 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + architecture: ${{ matrix.platform.target }} + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-windows-${{ matrix.platform.target }} + path: dist + + macos: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: macos-latest + target: x86_64 + - runner: macos-14 + target: aarch64 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-macos-${{ matrix.platform.target }} + path: dist + + sdist: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Build sdist + uses: PyO3/maturin-action@v1 + with: + command: sdist + args: --out dist + - name: Upload sdist + uses: actions/upload-artifact@v4 + with: + name: wheels-sdist + path: dist + + release: + name: Release + runs-on: ubuntu-latest + if: "startsWith(github.ref, 'refs/tags/')" + needs: [linux, windows, macos, sdist] + steps: + - uses: actions/download-artifact@v4 + - name: Publish to PyPI + uses: PyO3/maturin-action@v1 + env: + MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} + with: + command: upload + args: --non-interactive --skip-existing wheels-*/* diff --git a/.github/workflows/apply-pip-compile.yml b/.github/workflows/apply-pip-compile.yml new file mode 100644 index 0000000..6e21652 --- /dev/null +++ b/.github/workflows/apply-pip-compile.yml @@ -0,0 +1,26 @@ +name: Apply pip compile (generate lockfiles) + +on: workflow_dispatch + +jobs: + apply-pip-compile: + name: Apply pip compile + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version-file: pyproject.toml + - name: Install uv + run: | + pip3 install uv + - name: Run uv pip compile and push + run: | + set +e # Do not exit shell on failure + bash scripts/compile_requirements.sh + git config user.name github-actions[bot] + git config user.email github-actions[bot]@users.noreply.github.com + git add . + git commit -m "build: update requirements using uv pip compile [skip ci]" + git push diff --git a/.github/workflows/check-pip-compile.yml b/.github/workflows/check-pip-compile.yml new file mode 100644 index 0000000..a3e60ca --- /dev/null +++ b/.github/workflows/check-pip-compile.yml @@ -0,0 +1,45 @@ +name: Check pip compile sync + +on: [push, pull_request] + +jobs: + check-pip-compile: + name: Check pip compile + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version-file: pyproject.toml + - name: Install uv + run: | + pip3 install uv + - name: Generate lockfile and print diff + run: | + set +e # Do not exit shell on failure + + out=$(bash scripts/compile_requirements.sh 2> _stderr.txt) + exit_code=$? + err=$(<_stderr.txt) + + if [[ -n "$out" ]]; then + # Display the raw output in the step + echo "${out}" + # Display the Markdown output in the job summary + { echo "\`\`\`"; echo "${out}"; echo "\`\`\`"; } >> "$GITHUB_STEP_SUMMARY" + fi + if [[ -n "$err" ]]; then + echo "${err}" + { echo "\`\`\`"; echo "${err}"; echo "\`\`\`"; } >> "$GITHUB_STEP_SUMMARY" + fi + + if [[ $exit_code -eq 0 ]]; then + # When the script fails, there are changes in requirements that are not compiled yet. + # Print the suggested changes. + { echo "\`\`\`diff"; git diff; echo "\`\`\`"; } >> "$GITHUB_STEP_SUMMARY" + exit 1 + fi + + # When the script fails, it means it does not have anything to compile. + exit 0 diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000..8019b21 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,228 @@ +name: Deploy a new version + +on: + workflow_dispatch: + inputs: + version_tag: + description: 'Version tag' + required: true + default: v0.1.0 + dry_run: + type: boolean + description: 'Dry run' + default: false + +permissions: + contents: read + +jobs: + dry-run: + if: ${{ github.event.inputs.dry_run == 'true' }} + runs-on: ubuntu-latest + environment: mkdocs + + steps: + - uses: actions/checkout@v4 + - name: Push new version tag temporarily for changelog generation + run: | + git config user.name github-actions[bot] + git config user.email github-actions[bot]@users.noreply.github.com + git tag -a ${{ github.event.inputs.version_tag }} -m ${{ github.event.inputs.version_tag }} + git push --tags + + - name: Get CHANGELOG + id: changelog-dry-run + uses: requarks/changelog-action@v1.10.2 + with: + includeInvalidCommits: true + excludeTypes: build,docs,style,other + token: ${{ github.token }} + tag: ${{ github.event.inputs.version_tag }} + + - name: Display CHANGELOG + run: | + echo '${{ steps.changelog-dry-run.outputs.changes }}' + echo '${{ steps.changelog-dry-run.outputs.changes }}' > "$GITHUB_STEP_SUMMARY" + + - name: Remove temporary version tag + run: | + git tag -d ${{ github.event.inputs.version_tag }} + git push origin --delete ${{ github.event.inputs.version_tag }} + + deploy: + if: ${{ github.event.inputs.dry_run == 'false' }} + runs-on: ubuntu-latest + environment: mkdocs + + steps: + - uses: actions/checkout@v4 + - name: Push new version tag temporarily for changelog generation + run: | + git config user.name github-actions[bot] + git config user.email github-actions[bot]@users.noreply.github.com + git tag -a ${{ github.event.inputs.version_tag }} -m ${{ github.event.inputs.version_tag }} + git push --tags + + - name: Update CHANGELOG + id: changelog + uses: requarks/changelog-action@v1.10.2 + with: + includeInvalidCommits: true + excludeTypes: build,docs,style,other + token: ${{ github.token }} + tag: ${{ github.event.inputs.version_tag }} + changelogFilePath: docs/CHANGELOG.md + + - name: Commit docs/CHANGELOG.md and update tag + run: | + git tag -d ${{ github.event.inputs.version_tag }} + git push origin --delete ${{ github.event.inputs.version_tag }} + git add docs/CHANGELOG.md + git commit -m "docs: update docs/CHANGELOG.md for ${{ github.event.inputs.version_tag }} [skip ci]" + git tag -a ${{ github.event.inputs.version_tag }} -m ${{ github.event.inputs.version_tag }} + git push + git push --tags + + - name: Create Release + uses: ncipollo/release-action@v1.14.0 + with: + allowUpdates: true + draft: false + makeLatest: true + name: ${{ github.event.inputs.version_tag }} + tag: ${{ github.event.inputs.version_tag }} + body: ${{ steps.changelog.outputs.changes }} + + # Below is generated by maturin + linux: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: ubuntu-latest + target: x86_64 + - runner: ubuntu-latest + target: x86 + - runner: ubuntu-latest + target: aarch64 + - runner: ubuntu-latest + target: armv7 + - runner: ubuntu-latest + target: s390x + - runner: ubuntu-latest + target: ppc64le + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Update version in Cargo.toml + run: | + python3 scripts/update_version_in_cargo_toml.py ${{ github.event.inputs.version_tag }} + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + manylinux: auto + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-linux-${{ matrix.platform.target }} + path: dist + + windows: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: windows-latest + target: x64 + - runner: windows-latest + target: x86 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + architecture: ${{ matrix.platform.target }} + - name: Update version in Cargo.toml + run: | + python3 scripts/update_version_in_cargo_toml.py ${{ github.event.inputs.version_tag }} + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-windows-${{ matrix.platform.target }} + path: dist + + macos: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: macos-latest + target: x86_64 + - runner: macos-14 + target: aarch64 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Update version in Cargo.toml + run: | + python3 scripts/update_version_in_cargo_toml.py ${{ github.event.inputs.version_tag }} + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-macos-${{ matrix.platform.target }} + path: dist + + sdist: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Update version in Cargo.toml + run: | + python3 scripts/update_version_in_cargo_toml.py ${{ github.event.inputs.version_tag }} + - name: Build sdist + uses: PyO3/maturin-action@v1 + with: + command: sdist + args: --out dist + - name: Upload sdist + uses: actions/upload-artifact@v4 + with: + name: wheels-sdist + path: dist + + release: + name: Release + if: ${{ github.event.inputs.dry_run == 'false' }} + runs-on: ubuntu-latest + needs: [linux, windows, macos, sdist] + steps: + - uses: actions/download-artifact@v4 + - name: Publish to PyPI + uses: PyO3/maturin-action@v1 + env: + MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} + with: + command: upload + args: --non-interactive --skip-existing wheels-*/* diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..58c2400 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,118 @@ +name: Tests + +on: + - push + - pull_request + +jobs: + pre_job: + # continue-on-error: true # Uncomment once integration is finished + runs-on: ubuntu-latest + # Map a step output to a job output + outputs: + should_skip: ${{ steps.skip_check.outputs.should_skip }} + steps: + - id: skip_check + uses: fkirc/skip-duplicate-actions@v5 + with: + # All of these options are optional, so you can remove them if you are happy with the defaults + concurrent_skipping: 'same_content_newer' + skip_after_successful_duplicate: 'true' + paths_ignore: '["**/README.md", "**/docs/**"]' + # do_not_skip: '["pull_request", "workflow_dispatch", "schedule"]' + + pytest: + needs: pre_job + if: needs.pre_job.outputs.should_skip != 'true' + runs-on: ubuntu-latest + defaults: + run: + shell: bash -el {0} # setup-miniconda requires bash + steps: + - uses: actions/checkout@v4 + - run: | + echo "python_version=$(python3 scripts/get_python_version.py)" >> "$GITHUB_OUTPUT" + pip3 install --user uv maturin + id: get-python-version + - uses: conda-incubator/setup-miniconda@v3 + with: + miniforge-version: latest + activate-environment: test + python-version: ${{ steps.get-python-version.outputs.python_version }} + - name: Cache Conda environment + id: cache-conda + uses: actions/cache@v4 + env: + cache-name: cache-conda + with: + path: ~/miniconda3/envs/test + key: ${{ runner.os }}-conda-${{ env.cache-name }}-${{ hashFiles('deps/x86_64-unknown-linux-gnu/requirements_dev.txt', 'scripts/install.sh', '.github/workflows/tests.yml', 'rust/**') }} + # restore-keys: | + # ${{ runner.os }}-conda-${{ env.cache-name }}- + # ${{ runner.os }}-conda- + # ${{ runner.os }}- + - if: steps.cache-conda.outputs.cache-hit == 'true' + run: echo 'conda cache hit!' + - name: Install dependencies + if: steps.cache-conda.outputs.cache-hit != 'true' + run: | + # python -m pip install --upgrade pip + uv pip install -r deps/x86_64-unknown-linux-gnu/requirements_dev.txt + bash scripts/install.sh + - name: Run pytest + run: | + set +e # Do not exit shell on pytest failure + python3 scripts/hf_download.py + + out=$(pytest 2> stderr.txt) + exit_code=$? + err=$(<stderr.txt) + + # Display the raw output in the step + echo "${out}" + echo "${err}" + + # Display the Markdown output in the job summary + echo "\`\`\`python" >> $GITHUB_STEP_SUMMARY + echo "${out}" >> $GITHUB_STEP_SUMMARY + echo "${err}" >> $GITHUB_STEP_SUMMARY + if [[ $exit_code -eq 5 ]] + then + echo + echo 'WARNING: No tests were run and it is considered as success' >> $GITHUB_STEP_SUMMARY + echo "\`\`\`" >> $GITHUB_STEP_SUMMARY + exit 0 + else + echo "\`\`\`" >> $GITHUB_STEP_SUMMARY + # Exit with the exit-code returned by pytest + exit ${exit_code} + fi + + cargo-test: + needs: pre_job + if: needs.pre_job.outputs.should_skip != 'true' + runs-on: ubuntu-latest + defaults: + run: + shell: bash -el {0} # setup-miniconda requires bash + steps: + - uses: actions/checkout@v4 + - name: Run cargo test + run: | + set +e # Do not exit shell on failure + cd rust + out=$(LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu cargo test 2> stderr.txt) + exit_code=$? + err=$(<stderr.txt) + + # Display the raw output in the step + echo "${out}" + echo "${err}" + + # Display the Markdown output in the job summary + echo "\`\`\`python" >> $GITHUB_STEP_SUMMARY + echo "${out}" >> $GITHUB_STEP_SUMMARY + echo "${err}" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`" >> $GITHUB_STEP_SUMMARY + # Exit with the exit-code returned by test + exit ${exit_code} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..41e204a --- /dev/null +++ b/.gitignore @@ -0,0 +1,73 @@ +/data/ +/rust/target/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +.pytest_cache/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +.venv/ +env/ +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +include/ +man/ +venv/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt +pip-selfcheck.json + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Rope +.ropeproject + +# Django stuff: +*.log +*.pot + +.DS_Store + +# Sphinx documentation +docs/_build/ + +# PyCharm +.idea/ + +# VSCode +.vscode/ + +# Pyenv +.python-version diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0cd1fd9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2024 Deargen, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..a81dd3e --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# fast-graph + +Graph algorithms implemented in Rust, available as a Python package. diff --git a/deps/aarch64-apple-darwin/.requirements_dev.in.sha256 b/deps/aarch64-apple-darwin/.requirements_dev.in.sha256 new file mode 100644 index 0000000..79dcd18 --- /dev/null +++ b/deps/aarch64-apple-darwin/.requirements_dev.in.sha256 @@ -0,0 +1 @@ +f7126eac53ee1fbc3577b6bd0ccf708422f56d24df7cd45dad06523d591b8704 requirements_dev.in diff --git a/deps/aarch64-apple-darwin/requirements_dev.txt b/deps/aarch64-apple-darwin/requirements_dev.txt new file mode 100644 index 0000000..065a093 --- /dev/null +++ b/deps/aarch64-apple-darwin/requirements_dev.txt @@ -0,0 +1,40 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements_dev.in -o aarch64-apple-darwin/requirements_dev.txt --python-platform aarch64-apple-darwin --python-version 3.8 +certifi==2024.6.2 + # via requests +charset-normalizer==3.3.2 + # via requests +exceptiongroup==1.2.1 + # via pytest +filelock==3.15.4 + # via huggingface-hub +fsspec==2024.6.1 + # via huggingface-hub +huggingface-hub==0.23.4 + # via -r requirements_dev.in +idna==3.7 + # via requests +iniconfig==2.0.0 + # via pytest +networkx==3.1 + # via -r requirements_dev.in +packaging==24.1 + # via + # huggingface-hub + # pytest +pluggy==1.5.0 + # via pytest +pytest==8.2.2 + # via -r requirements_dev.in +pyyaml==6.0.1 + # via huggingface-hub +requests==2.32.3 + # via huggingface-hub +tomli==2.0.1 + # via pytest +tqdm==4.66.4 + # via huggingface-hub +typing-extensions==4.12.2 + # via huggingface-hub +urllib3==2.2.2 + # via requests diff --git a/deps/requirements_dev.in b/deps/requirements_dev.in new file mode 100644 index 0000000..5da8c5d --- /dev/null +++ b/deps/requirements_dev.in @@ -0,0 +1,9 @@ +pytest +networkx +huggingface_hub +toml ; python_version < "3.11" + +# test requirements +numpy +scipy +trimesh diff --git a/deps/x86_64-apple-darwin/.requirements_dev.in.sha256 b/deps/x86_64-apple-darwin/.requirements_dev.in.sha256 new file mode 100644 index 0000000..79dcd18 --- /dev/null +++ b/deps/x86_64-apple-darwin/.requirements_dev.in.sha256 @@ -0,0 +1 @@ +f7126eac53ee1fbc3577b6bd0ccf708422f56d24df7cd45dad06523d591b8704 requirements_dev.in diff --git a/deps/x86_64-apple-darwin/requirements_dev.txt b/deps/x86_64-apple-darwin/requirements_dev.txt new file mode 100644 index 0000000..ebfa014 --- /dev/null +++ b/deps/x86_64-apple-darwin/requirements_dev.txt @@ -0,0 +1,40 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements_dev.in -o x86_64-apple-darwin/requirements_dev.txt --python-platform x86_64-apple-darwin --python-version 3.8 +certifi==2024.6.2 + # via requests +charset-normalizer==3.3.2 + # via requests +exceptiongroup==1.2.1 + # via pytest +filelock==3.15.4 + # via huggingface-hub +fsspec==2024.6.1 + # via huggingface-hub +huggingface-hub==0.23.4 + # via -r requirements_dev.in +idna==3.7 + # via requests +iniconfig==2.0.0 + # via pytest +networkx==3.1 + # via -r requirements_dev.in +packaging==24.1 + # via + # huggingface-hub + # pytest +pluggy==1.5.0 + # via pytest +pytest==8.2.2 + # via -r requirements_dev.in +pyyaml==6.0.1 + # via huggingface-hub +requests==2.32.3 + # via huggingface-hub +tomli==2.0.1 + # via pytest +tqdm==4.66.4 + # via huggingface-hub +typing-extensions==4.12.2 + # via huggingface-hub +urllib3==2.2.2 + # via requests diff --git a/deps/x86_64-pc-windows-msvc/.requirements_dev.in.sha256 b/deps/x86_64-pc-windows-msvc/.requirements_dev.in.sha256 new file mode 100644 index 0000000..79dcd18 --- /dev/null +++ b/deps/x86_64-pc-windows-msvc/.requirements_dev.in.sha256 @@ -0,0 +1 @@ +f7126eac53ee1fbc3577b6bd0ccf708422f56d24df7cd45dad06523d591b8704 requirements_dev.in diff --git a/deps/x86_64-pc-windows-msvc/requirements_dev.txt b/deps/x86_64-pc-windows-msvc/requirements_dev.txt new file mode 100644 index 0000000..57b513a --- /dev/null +++ b/deps/x86_64-pc-windows-msvc/requirements_dev.txt @@ -0,0 +1,44 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements_dev.in -o x86_64-pc-windows-msvc/requirements_dev.txt --python-platform x86_64-pc-windows-msvc --python-version 3.8 +certifi==2024.6.2 + # via requests +charset-normalizer==3.3.2 + # via requests +colorama==0.4.6 + # via + # pytest + # tqdm +exceptiongroup==1.2.1 + # via pytest +filelock==3.15.4 + # via huggingface-hub +fsspec==2024.6.1 + # via huggingface-hub +huggingface-hub==0.23.4 + # via -r requirements_dev.in +idna==3.7 + # via requests +iniconfig==2.0.0 + # via pytest +networkx==3.1 + # via -r requirements_dev.in +packaging==24.1 + # via + # huggingface-hub + # pytest +pluggy==1.5.0 + # via pytest +pytest==8.2.2 + # via -r requirements_dev.in +pyyaml==6.0.1 + # via huggingface-hub +requests==2.32.3 + # via huggingface-hub +tomli==2.0.1 + # via pytest +tqdm==4.66.4 + # via huggingface-hub +typing-extensions==4.12.2 + # via huggingface-hub +urllib3==2.2.2 + # via requests diff --git a/deps/x86_64-unknown-linux-gnu/.requirements_dev.in.sha256 b/deps/x86_64-unknown-linux-gnu/.requirements_dev.in.sha256 new file mode 100644 index 0000000..79dcd18 --- /dev/null +++ b/deps/x86_64-unknown-linux-gnu/.requirements_dev.in.sha256 @@ -0,0 +1 @@ +f7126eac53ee1fbc3577b6bd0ccf708422f56d24df7cd45dad06523d591b8704 requirements_dev.in diff --git a/deps/x86_64-unknown-linux-gnu/requirements_dev.txt b/deps/x86_64-unknown-linux-gnu/requirements_dev.txt new file mode 100644 index 0000000..e6eb43c --- /dev/null +++ b/deps/x86_64-unknown-linux-gnu/requirements_dev.txt @@ -0,0 +1,40 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements_dev.in -o x86_64-unknown-linux-gnu/requirements_dev.txt --python-platform x86_64-unknown-linux-gnu --python-version 3.8 +certifi==2024.6.2 + # via requests +charset-normalizer==3.3.2 + # via requests +exceptiongroup==1.2.1 + # via pytest +filelock==3.15.4 + # via huggingface-hub +fsspec==2024.6.1 + # via huggingface-hub +huggingface-hub==0.23.4 + # via -r requirements_dev.in +idna==3.7 + # via requests +iniconfig==2.0.0 + # via pytest +networkx==3.1 + # via -r requirements_dev.in +packaging==24.1 + # via + # huggingface-hub + # pytest +pluggy==1.5.0 + # via pytest +pytest==8.2.2 + # via -r requirements_dev.in +pyyaml==6.0.1 + # via huggingface-hub +requests==2.32.3 + # via huggingface-hub +tomli==2.0.1 + # via pytest +tqdm==4.66.4 + # via huggingface-hub +typing-extensions==4.12.2 + # via huggingface-hub +urllib3==2.2.2 + # via requests diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md new file mode 100644 index 0000000..d6637e0 --- /dev/null +++ b/docs/CHANGELOG.md @@ -0,0 +1,5 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f3bef1d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,142 @@ +[build-system] +requires = ["maturin>=1.5,<2.0"] +build-backend = "maturin" + +[project] +name = "fast-graph" +requires-python = ">=3.8" +description = "Python project template" # OPTIONALLY CHANGE +authors = [ + { name = "Kiyoon Kim" }, # OPTIONALLY CHANGE +] +readme = "README.md" +license = { file = "LICENSE" } +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] +dynamic = ["version"] + +[project.urls] +"Homepage" = "https://github.com/deargen/fast-graph" + +[tool.maturin] +features = ["pyo3/extension-module"] +sdist-include = ["LICENSE", "README.md"] + +[tool.pytest.ini_options] +testpaths = ["tests"] + + +[tool.ruff] +src = ["src"] # for ruff isort +extend-exclude = [ + "src/fast_graph/_version.py", # CHANGE +] + +[tool.ruff.lint] +# OPTIONALLY ADD MORE LATER +select = [ + # flake8 + "E", + "F", + "W", + "B", # Bugbear + "D", # Docstring + "D213", # Multi-line docstring summary should start at the second line (replace D212) + "N", # Naming + "C4", # flake8-comprehensions + "UP", # pyupgrade + "SIM", # simplify + "RUF", # ruff-specific + "RET501", # return + "RET502", # return + "RET503", # return + "PTH", # path + "NPY", # numpy + "PYI", # type stubs for pyright/pylance + "PT", # pytest + "PIE", # + "LOG", # logging + "COM818", # comma misplaced + "COM819", # comma + "DTZ", # datetime + "YTT", + "ASYNC", + + # Not important + "T10", # debug statements + "T20", # print statements +] + +ignore = [ + "E402", # Module level import not at top of file + "W293", # Blank line contains whitespace + "W291", # Trailing whitespace + "D10", # Missing docstring in public module / function / etc. + "D200", # One-line docstring should fit on one line with quotes + "D212", # Multi-line docstring summary should start at the first line + "D417", # require documentation for every function parameter. + "D401", # require an imperative mood for all docstrings. + "PTH123", # Path.open should be used instead of built-in open + "PT006", # Pytest parameterize style + "N812", # Lowercase `functional` imported as non-lowercase `F` (import torch.nn.functional as F) + "NPY002", # legacy numpy random + "UP017", # datetime.timezone.utc -> datetime.UTC + "SIM108", # use ternary operator instead of if-else +] + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.lint.pycodestyle] +# Black or ruff will enforce line length to be 88, except for docstrings and comments. +# We set it to 120 so we have more space for docstrings and comments. +max-line-length = 120 + +[tool.ruff.lint.isort] +# combine-as-imports = true +known-third-party = ["wandb"] + +## Uncomment this if you want to use Python < 3.10 +# required-imports = [ +# "from __future__ import annotations", +# ] + +[tool.ruff.lint.flake8-tidy-imports] +# Ban certain modules from being imported at module level, instead requiring +# that they're imported lazily (e.g., within a function definition, if TYPE_CHECKING, etc.) +# NOTE: Ruff code TID is currently disabled, so this settings doesn't do anything. +banned-module-level-imports = ["torch", "tensorflow"] + +[tool.pyright] +include = ["src"] + +typeCheckingMode = "standard" +autoSearchPaths = true +useLibraryCodeForTypes = true +autoImportCompletions = true +diagnosticsMode = "openFilesOnly" + +reportUnusedImports = false +reportUnusedVariable = false +# reportUnusedClass = "warning" +# reportUnusedFunction = "warning" +reportUndefinedVariable = false # ruff handles this with F821 + +pythonVersion = "3.8" +pythonPlatform = "Linux" + +[tool.coverage.report] +omit = [ + "src/fast_graph/_version.py", # CHANGE + # OPTIONALLY ADD MORE LATER +] diff --git a/rust/Cargo.lock b/rust/Cargo.lock new file mode 100644 index 0000000..39f95e7 --- /dev/null +++ b/rust/Cargo.lock @@ -0,0 +1,347 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "fast-graph" +version = "0.0.0" +dependencies = [ + "pyo3", + "rayon", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "indoc" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "portable-atomic" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "parking_lot", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7883df5835fafdad87c0d888b266c8ec0f4c9ca48a5bed6bbb592e8dedee1b50" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01be5843dc60b916ab4dad1dca6d20b9b4e6ddc8e15f50c47fe6d85f1fb97403" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77b34069fc0682e11b31dbd10321cbf94808394c56fd996796ce45217dfac53c" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08260721f32db5e1a5beae69a55553f56b99bd0e1c3e6e0a5e8851a9d0f5a85c" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd" +dependencies = [ + "bitflags", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "syn" +version = "2.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.12.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unindent" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 index 0000000..aeec9a6 --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "fast-graph" +version = "0.0.0" +edition = "2021" + +[lib] +name = "fast_graph" +crate-type = [ "cdylib", "rlib",] + +[dependencies] +pyo3 = "0.21.2" +rayon = "1.10" diff --git a/rust/src/lib.rs b/rust/src/lib.rs new file mode 100644 index 0000000..959caaf --- /dev/null +++ b/rust/src/lib.rs @@ -0,0 +1,426 @@ +// Implements the dijkstra algorithm with cutoff, and multiple sources from networkx in rust. +// No need predecesors, paths, or target for now. + +// class Graph: +// def __init__(self, incoming_graph_data=None, **attr): +// """Initialize a graph with edges, name, or graph attributes. +// +// Parameters +// ---------- +// incoming_graph_data : input graph (optional, default: None) +// Data to initialize graph. If None (default) an empty +// graph is created. The data can be an edge list, or any +// NetworkX graph object. If the corresponding optional Python +// packages are installed the data can also be a 2D NumPy array, a +// SciPy sparse array, or a PyGraphviz graph. +// +// attr : keyword arguments, optional (default= no attributes) +// Attributes to add to graph as key=value pairs. +// +// See Also +// -------- +// convert +// +// Examples +// -------- +// >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc +// >>> G = nx.Graph(name="my graph") +// >>> e = [(1, 2), (2, 3), (3, 4)] # list of edges +// >>> G = nx.Graph(e) +// +// Arbitrary graph attribute pairs (key=value) may be assigned +// +// >>> G = nx.Graph(e, day="Friday") +// >>> G.graph +// {'day': 'Friday'} +// +// """ +// self.graph = self.graph_attr_dict_factory() # dictionary for graph attributes +// self._node = self.node_dict_factory() # empty node attribute dict +// self._adj = self.adjlist_outer_dict_factory() # empty adjacency dict +// # attempt to load graph with data +// if incoming_graph_data is not None: +// convert.to_networkx_graph(incoming_graph_data, create_using=self) +// # load graph attributes (must be after convert) +// self.graph.update(attr) +// +// @cached_property +// def adj(self): +// """Graph adjacency object holding the neighbors of each node. +// +// This object is a read-only dict-like structure with node keys +// and neighbor-dict values. The neighbor-dict is keyed by neighbor +// to the edge-data-dict. So `G.adj[3][2]['color'] = 'blue'` sets +// the color of the edge `(3, 2)` to `"blue"`. +// +// Iterating over G.adj behaves like a dict. Useful idioms include +// `for nbr, datadict in G.adj[n].items():`. +// +// The neighbor information is also provided by subscripting the graph. +// So `for nbr, foovalue in G[node].data('foo', default=1):` works. +// +// For directed graphs, `G.adj` holds outgoing (successor) info. +// """ +// return AdjacencyView(self._adj) +// +// def add_edges_from(self, ebunch_to_add, **attr): +// """Add all the edges in ebunch_to_add. +// +// Parameters +// ---------- +// ebunch_to_add : container of edges +// Each edge given in the container will be added to the +// graph. The edges must be given as 2-tuples (u, v) or +// 3-tuples (u, v, d) where d is a dictionary containing edge data. +// attr : keyword arguments, optional +// Edge data (or labels or objects) can be assigned using +// keyword arguments. +// +// See Also +// -------- +// add_edge : add a single edge +// add_weighted_edges_from : convenient way to add weighted edges +// +// Notes +// ----- +// Adding the same edge twice has no effect but any edge data +// will be updated when each duplicate edge is added. +// +// Edge attributes specified in an ebunch take precedence over +// attributes specified via keyword arguments. +// +// When adding edges from an iterator over the graph you are changing, +// a `RuntimeError` can be raised with message: +// `RuntimeError: dictionary changed size during iteration`. This +// happens when the graph's underlying dictionary is modified during +// iteration. To avoid this error, evaluate the iterator into a separate +// object, e.g. by using `list(iterator_of_edges)`, and pass this +// object to `G.add_edges_from`. +// +// Examples +// -------- +// >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc +// >>> G.add_edges_from([(0, 1), (1, 2)]) # using a list of edge tuples +// >>> e = zip(range(0, 3), range(1, 4)) +// >>> G.add_edges_from(e) # Add the path graph 0-1-2-3 +// +// Associate data to edges +// +// >>> G.add_edges_from([(1, 2), (2, 3)], weight=3) +// >>> G.add_edges_from([(3, 4), (1, 4)], label="WN2898") +// +// Evaluate an iterator over a graph if using it to modify the same graph +// +// >>> G = nx.Graph([(1, 2), (2, 3), (3, 4)]) +// >>> # Grow graph by one new node, adding edges to all existing nodes. +// >>> # wrong way - will raise RuntimeError +// >>> # G.add_edges_from(((5, n) for n in G.nodes)) +// >>> # correct way - note that there will be no self-edge for node 5 +// >>> G.add_edges_from(list((5, n) for n in G.nodes)) +// """ +// for e in ebunch_to_add: +// ne = len(e) +// if ne == 3: +// u, v, dd = e +// elif ne == 2: +// u, v = e +// dd = {} # doesn't need edge_attr_dict_factory +// else: +// raise NetworkXError(f"Edge tuple {e} must be a 2-tuple or 3-tuple.") +// if u not in self._node: +// if u is None: +// raise ValueError("None cannot be a node") +// self._adj[u] = self.adjlist_inner_dict_factory() +// self._node[u] = self.node_attr_dict_factory() +// if v not in self._node: +// if v is None: +// raise ValueError("None cannot be a node") +// self._adj[v] = self.adjlist_inner_dict_factory() +// self._node[v] = self.node_attr_dict_factory() +// datadict = self._adj[u].get(v, self.edge_attr_dict_factory()) +// datadict.update(attr) +// datadict.update(dd) +// self._adj[u][v] = datadict +// self._adj[v][u] = datadict + +// def _dijkstra_multisource( +// G, sources, weight, pred=None, paths=None, cutoff=None, target=None +// ): +// """Uses Dijkstra's algorithm to find shortest weighted paths +// +// Parameters +// ---------- +// G : NetworkX graph +// +// sources : non-empty iterable of nodes +// Starting nodes for paths. If this is just an iterable containing +// a single node, then all paths computed by this function will +// start from that node. If there are two or more nodes in this +// iterable, the computed paths may begin from any one of the start +// nodes. +// +// weight: function +// Function with (u, v, data) input that returns that edge's weight +// or None to indicate a hidden edge +// +// pred: dict of lists, optional(default=None) +// dict to store a list of predecessors keyed by that node +// If None, predecessors are not stored. +// +// paths: dict, optional (default=None) +// dict to store the path list from source to each node, keyed by node. +// If None, paths are not stored. +// +// target : node label, optional +// Ending node for path. Search is halted when target is found. +// +// cutoff : integer or float, optional +// Length (sum of edge weights) at which the search is stopped. +// If cutoff is provided, only return paths with summed weight <= cutoff. +// +// Returns +// ------- +// distance : dictionary +// A mapping from node to shortest distance to that node from one +// of the source nodes. +// +// Raises +// ------ +// NodeNotFound +// If any of `sources` is not in `G`. +// +// Notes +// ----- +// The optional predecessor and path dictionaries can be accessed by +// the caller through the original pred and paths objects passed +// as arguments. No need to explicitly return pred or paths. +// +// """ +// G_succ = G._adj # For speed-up (and works for both directed and undirected graphs) +// +// push = heappush +// pop = heappop +// dist = {} # dictionary of final distances +// seen = {} +// # fringe is heapq with 3-tuples (distance,c,node) +// # use the count c to avoid comparing nodes (may not be able to) +// c = count() +// fringe = [] +// for source in sources: +// seen[source] = 0 +// push(fringe, (0, next(c), source)) +// while fringe: +// (d, _, v) = pop(fringe) +// if v in dist: +// continue # already searched this node. +// dist[v] = d +// if v == target: +// break +// for u, e in G_succ[v].items(): +// cost = weight(v, u, e) +// if cost is None: +// continue +// vu_dist = dist[v] + cost +// if cutoff is not None: +// if vu_dist > cutoff: +// continue +// if u in dist: +// u_dist = dist[u] +// if vu_dist < u_dist: +// raise ValueError("Contradictory paths found:", "negative weights?") +// elif pred is not None and vu_dist == u_dist: +// pred[u].append(v) +// elif u not in seen or vu_dist < seen[u]: +// seen[u] = vu_dist +// push(fringe, (vu_dist, next(c), u)) +// if paths is not None: +// paths[u] = paths[v] + [u] +// if pred is not None: +// pred[u] = [v] +// elif vu_dist == seen[u]: +// if pred is not None: +// pred[u].append(v) +// +// # The optional predecessor and path dictionaries can be accessed +// # by the caller via the pred and paths objects passed as arguments. +// return dist + +use pyo3::prelude::*; +use rayon::prelude::*; +use std::collections::{BinaryHeap, HashMap, HashSet}; + +// implement Ord and PartialOrd for DistCountNode +// NOTE: compare based on the distance first, then the count, and finally the node. +// reverse the comparison to make it a min heap. +// The count is used to pop FIFO when the distance is the same. + +#[derive(Debug)] +struct DistCountNode(f64, u32, u32); +impl Ord for DistCountNode { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0 + .partial_cmp(&other.0) + .unwrap() + .reverse() + .then_with(|| self.1.cmp(&other.1).reverse()) + .then_with(|| self.2.cmp(&other.2).reverse()) + } +} +impl PartialOrd for DistCountNode { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + Some(self.cmp(other)) + } +} +impl Eq for DistCountNode {} +impl PartialEq for DistCountNode { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 && self.1 == other.1 && self.2 == other.2 + } +} + +// Undirected graph data structure: +// Graph has node that stores node numbers, and adj that stores the adjacency list of the graph. +// The adjacency list is a hashmap that stores the weight of the edge between two nodes. +pub struct Graph { + pub node: HashSet<u32>, + pub adj: HashMap<u32, HashMap<u32, f64>>, +} + +// Implementation of the Graph data structure. +// The Graph data structure has the following methods: +// 1. add_weighted_edges_from (only allow 3-tuple and not 2-tuple) +// 2. dijkstra_multisource + +impl Graph { + pub fn new() -> Graph { + Graph { + node: HashSet::new(), + adj: HashMap::new(), + } + } + + pub fn add_weighted_edges_from(&mut self, ebunch_to_add: Vec<(u32, u32, f64)>) { + for e in ebunch_to_add { + let (u, v, dd) = e; + if !self.node.contains(&u) { + self.adj.insert(u, HashMap::new()); + self.node.insert(u); + } + if !self.node.contains(&v) { + self.adj.insert(v, HashMap::new()); + self.node.insert(v); + } + self.adj.get_mut(&u).unwrap().insert(v, dd); + self.adj.get_mut(&v).unwrap().insert(u, dd); + } + } + + pub fn dijkstra_singlesource(&self, source: u32, cutoff: Option<f64>) -> HashMap<u32, f64> { + let mut dist = HashMap::new(); + let mut seen = HashMap::<u32, f64>::new(); + + // fringe is heapq with 3-tuples (distance,c,node) + // use the count c to avoid comparing nodes (may not be able to) + let mut fringe = BinaryHeap::<DistCountNode>::new(); + let mut c: u32 = 0; + + seen.insert(source, 0.0); + fringe.push(DistCountNode(0.0, c, source)); + c += 1; + + while !fringe.is_empty() { + let dist_count_node = fringe.pop().unwrap(); + let (d, v) = (dist_count_node.0, dist_count_node.2); + if dist.contains_key(&v) { + continue; + } + dist.insert(v, d); + + for (u, e) in self.adj.get(&v).unwrap().iter() { + let cost = *e; + let vu_dist = dist.get(&v).unwrap() + cost; + if let Some(cutoff) = cutoff { + if vu_dist > cutoff { + continue; + } + } + + if dist.contains_key(u) { + let u_dist = *dist.get(u).unwrap(); + if vu_dist < u_dist { + panic!("Contradictory paths found: negative weights?"); + } + } else if !seen.contains_key(u) || vu_dist < *seen.get(u).unwrap() { + seen.insert(*u, vu_dist); + fringe.push(DistCountNode(vu_dist, c, *u)); + c += 1; + } + } + } + + // remove source - source distance (0.0). maybe improve performance? + // dist.remove(&source); + + // for consistency with networkx, add source - source distance (0.0) + dist.insert(source, 0.0); + dist + } + + pub fn all_pairs_dijkstra_path_length( + &self, + cutoff: Option<f64>, + ) -> HashMap<u32, HashMap<u32, f64>> { + // sequential version + + // let mut dist = HashMap::new(); + // for u in self.node.iter() { + // dist.insert(*u, self.dijkstra_singlesource(*u, cutoff)); + // } + + // parallel version + self.node + .par_iter() + .map(|u| (*u, self.dijkstra_singlesource(*u, cutoff))) + .collect() + } +} + +impl Default for Graph { + fn default() -> Self { + Self::new() + } +} + +#[pyfunction] +fn all_pairs_dijkstra_path_length(edges: Vec<(u32, u32, f64)>, cutoff: Option<f64>) -> Py<PyAny> { + let mut graph = Graph::new(); + graph.add_weighted_edges_from(edges); + + let dist = graph.all_pairs_dijkstra_path_length(cutoff); + Python::with_gil(|py| dist.to_object(py)) +} + +/// A Python module implemented in Rust. +#[pymodule] +fn fast_graph(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_function(wrap_pyfunction!(all_pairs_dijkstra_path_length, m)?)?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use std::collections::BinaryHeap; + + use crate::DistCountNode; + + #[test] + fn test_minheap() { + let mut heap = BinaryHeap::new(); + heap.push(DistCountNode(1.0, 1, 1)); + heap.push(DistCountNode(5.0, 2, 3)); + heap.push(DistCountNode(3.0, 4, 5)); + assert_eq!(heap.pop(), Some(DistCountNode(1.0, 1, 1))); + heap.push(DistCountNode(0.0, 4, 5)); + assert_eq!(heap.pop(), Some(DistCountNode(0.0, 4, 5))); + } +} diff --git a/rust/tests/test_dijkstra.rs b/rust/tests/test_dijkstra.rs new file mode 100644 index 0000000..5b706d1 --- /dev/null +++ b/rust/tests/test_dijkstra.rs @@ -0,0 +1,65 @@ +use std::collections::HashMap; + +use fast_graph::Graph; + +#[test] +fn test_dijkstra_singlesource() { + let mut graph = Graph::new(); + graph.add_weighted_edges_from(vec![ + (0, 1, 4.0), + (0, 7, 8.0), + (1, 2, 8.0), + (1, 7, 11.0), + (2, 3, 7.0), + (2, 8, 2.0), + (2, 5, 4.0), + (3, 4, 9.0), + (3, 5, 14.0), + (4, 5, 10.0), + (5, 6, 2.0), + (6, 7, 1.0), + (6, 8, 6.0), + (7, 8, 7.0), + ]); + + let source = 0; + let result = graph.dijkstra_singlesource(source, None); + assert_eq!( + result, + HashMap::<u32, f64>::from_iter(vec![ + (0, 0.0), + (1, 4.0), + (2, 12.0), + (3, 19.0), + (4, 21.0), + (5, 11.0), + (6, 9.0), + (7, 8.0), + (8, 14.0) + ]) + ); + + let source = 1; + let result = graph.dijkstra_singlesource(source, None); + assert_eq!( + result, + HashMap::<u32, f64>::from_iter(vec![ + (0, 4.0), + (1, 0.0), + (2, 8.0), + (3, 15.0), + (4, 22.0), + (5, 12.0), + (6, 12.0), + (7, 11.0), + (8, 10.0) + ]) + ); + + let source = 0; + let result = graph.dijkstra_singlesource(source, Some(10.0)); + assert_eq!( + result, + HashMap::<u32, f64>::from_iter(vec![(0, 0.0), (1, 4.0), (6, 9.0), (7, 8.0),]) + ); +} diff --git a/scripts/compile_requirements.sh b/scripts/compile_requirements.sh new file mode 100644 index 0000000..6677190 --- /dev/null +++ b/scripts/compile_requirements.sh @@ -0,0 +1,132 @@ +#!/usr/bin/env bash + +# This script compiles all requirements.in files to requirements.txt files +# This means that all dependencies are locked to a specific version +# Plus, it checks if the requirements.in file has changed since the last time it was compiled +# If not, it skips the file rather than recompiling it (which may change version unnecessarily often) + +TARGET_PLATFORMS=(x86_64-unknown-linux-gnu aarch64-apple-darwin x86_64-apple-darwin x86_64-pc-windows-msvc) + +if ! command -v uv &> /dev/null; then + echo "uv is not installed. Please run 'pip3 install --user uv'" >&2 + exit 1 +fi + +if ! command -v sha256sum &> /dev/null; then + echo "sha256sum is not installed." >&2 + echo "If you're on Mac, run 'brew install coreutils'" >&2 + exit 1 +fi + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +# NOTE: sha256sum will put the file path in the hash file. +# To simplify the directory (using relative paths), we change the working directory. +cd "$SCRIPT_DIR/../deps" || { echo "Failure"; exit 1; } + +PYTHON_VERSION=$(python3 "$SCRIPT_DIR/get_python_version.py") + +for platform in "${TARGET_PLATFORMS[@]}"; do + mkdir -p "$platform" +done + +shopt -s globstar + +function get_shafile() { + local file=$1 + local target_platform=$2 + # .requirements.in.sha256 + echo "$target_platform/.$file.sha256" +} + +function get_lockfile() { + local file=$1 + local target_platform=$2 + # requirements.txt + echo "$target_platform/${file%.in}.txt" +} + +function file_content_changed() { + # Check if the file has changed since the last time it was compiled, using the hash file. + # NOTE: returns 0 if the file has changed + local file=$1 + local target_platform=$2 + local shafile + shafile=$(get_shafile "$file" "$target_platform") + if [[ -f "$shafile" ]] && sha256sum -c "$shafile" &> /dev/null; then + return 1 + fi + return 0 +} + + +function deps_changed() { + # Check if the requirements*.in file has changed since the last time it was compiled, including its dependencies (-r another_requirements.in). + # + # When the requirements have dependencies on other requirements files, we need to check if those have changed as well + # e.g. requirements_dev.in has a dependency on requirements.in (-r requirements.in) + # Note that we also need to recursively check if the dependencies of the dependencies have changed. + # We need to recompile requirements_dev.txt if requirements.in has changed. + # NOTE: returns 0 if the deps have changed + local file=$1 + local target_platform=$2 + + if file_content_changed "$file" "$target_platform"; then + return 0 + fi + + + local file_deps + file_deps=$(grep -Eo -- '-r [^ ]+' "$file") + file_deps=${file_deps//"-r "/} # remove -r + for dep in $file_deps; do + echo "βΉοΈ $file depends on $dep" + dep=${dep#-r } # requirements.in + if deps_changed "$dep" "$target_platform"; then + return 0 + fi + done + return 1 +} + +num_files=0 +num_up_to_date=0 +files_changed=() + +# First, collect all files that need to be compiled. +# We don't compile them yet, because it will mess up the hash comparison. +for file in requirements*.in; do + for target_platform in "${TARGET_PLATFORMS[@]}"; do + # $file: requirements.in + ((num_files++)) + + lockfile=$(get_lockfile "$file" "$target_platform") + shafile=$(get_shafile "$file" "$target_platform") + # Process only changed files by comparing hash + if [[ -f "$lockfile" ]]; then + if ! deps_changed "$file" "$target_platform"; then + echo "β‘ Skipping $file due to no changes" + ((num_up_to_date++)) + continue + fi + fi + files_changed+=("$file") + done +done + +for file in "${files_changed[@]}"; do + for target_platform in "${TARGET_PLATFORMS[@]}"; do + lockfile=$(get_lockfile "$file" "$target_platform") + shafile=$(get_shafile "$file" "$target_platform") + echo "π Generating lockfile $lockfile from $file" + uv pip compile "$file" -o "$lockfile" --python-platform "$target_platform" --python-version "$PYTHON_VERSION" > /dev/null + sha256sum "$file" > "$shafile" # update hash + done +done + +# exit code 2 when all files are up to date +if [[ $num_files -eq $num_up_to_date ]]; then + echo "π All files are up to date!" + exit 2 +fi + diff --git a/scripts/get_python_version.py b/scripts/get_python_version.py new file mode 100644 index 0000000..26334d7 --- /dev/null +++ b/scripts/get_python_version.py @@ -0,0 +1,30 @@ +""" +Get minimum python version from pyproject.toml. + +Note: + It only works if the format is like this: ">=3.11", ">=3.11,<3.12" +""" + +from pathlib import Path + +pyproject_toml_path = Path(__file__).parent.parent / "pyproject.toml" + +try: + import toml + + pyproject = toml.load(pyproject_toml_path) + version_range = pyproject["project"]["requires-python"] +except ImportError: + # alternatively, search for requires-python in pyproject.toml + with open(pyproject_toml_path) as f: + for line in f: + if line.startswith("requires-python"): + version_range = line.replace("requires-python", "").strip(" ='\"\n") + break + else: + raise ValueError("requires-python not found in pyproject.toml") + +# get minimum python version +# it has a format like this: ">=3.6", ">=3.7,<3.8" +min_version = version_range.split(",")[0].replace(">=", "") +print(min_version) # noqa: T201 diff --git a/scripts/hf_download.py b/scripts/hf_download.py new file mode 100644 index 0000000..cd6c963 --- /dev/null +++ b/scripts/hf_download.py @@ -0,0 +1,22 @@ +import logging + +from huggingface_hub import snapshot_download + +logger = logging.getLogger(__name__) + + +def main(): + snapshot_download( + repo_id="Deargen/fast-graph", + repo_type="dataset", + revision="b30677dd3a76a0ecf51944861adacf433e9ecc04", + local_dir="data", + etag_timeout=1200, # μ²μ download μ, λλ΅ 10λΆ κ°λ μλͺ¨ λμ΄ κ·Έ 2λ°°λ‘ μ€μ (default: 10s) + ) + + +if __name__ == "__main__": + try: + main() + except Exception: + logger.exception("Exception occurred") diff --git a/scripts/install.sh b/scripts/install.sh new file mode 100644 index 0000000..0443d83 --- /dev/null +++ b/scripts/install.sh @@ -0,0 +1,7 @@ +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +python3 "$SCRIPT_DIR/update_version_in_cargo_toml.py" 0.0.0 +python3 "$SCRIPT_DIR/update_version_in_cargo_toml.py" +cd "$SCRIPT_DIR/.." || exit +maturin develop --release --target "$(rustc -vV | sed -n 's|host: ||p')" +python3 "$SCRIPT_DIR/update_version_in_cargo_toml.py" 0.0.0 diff --git a/scripts/update_version_in_cargo_toml.py b/scripts/update_version_in_cargo_toml.py new file mode 100644 index 0000000..9e95b02 --- /dev/null +++ b/scripts/update_version_in_cargo_toml.py @@ -0,0 +1,787 @@ +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. +# Generated by versioneer-0.29 +# https://github.com/python-versioneer/python-versioneer +from __future__ import annotations + +import errno +import functools +import json +import os +import re +import subprocess +import sys +from collections.abc import Callable +from pathlib import Path +from typing import Any + +import toml + + +def get_keywords() -> dict[str, str]: + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "$Format:%d$" + git_full = "$Format:%H$" + git_date = "$Format:%ci$" + keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + VCS: str + style: str + tag_prefix: str + parentdir_prefix: str + versionfile_source: str + verbose: bool + + +def get_config() -> VersioneerConfig: + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "pep440" + cfg.tag_prefix = "v" + cfg.parentdir_prefix = "mlproject-" + cfg.versionfile_source = "version.py" + cfg.verbose = False + + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +LONG_VERSION_PY: dict[str, str] = {} +HANDLERS: dict[str, dict[str, Callable]] = {} + + +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator + """Create decorator to mark a method as the handler of a VCS.""" + + def decorate(f: Callable) -> Callable: + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + + return decorate + + +def run_command( + commands: list[str], + args: list[str], + cwd: str | None = None, + verbose: bool = False, + hide_stderr: bool = False, + env: dict[str, str] | None = None, +) -> tuple[str | None, int | None]: + """Call the given command(s).""" + assert isinstance(commands, list) + process = None + + popen_kwargs: dict[str, Any] = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + + for command in commands: + try: + dispcmd = str([command] + args) + # remember shell=False, so use git.cmd on windows, not just git + process = subprocess.Popen( + [command] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + **popen_kwargs, + ) + break + except OSError as e: + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None, None + stdout = process.communicate()[0].strip().decode() + if process.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + print("stdout was %s" % stdout) + return None, process.returncode + return stdout, process.returncode + + +def versions_from_parentdir( + parentdir_prefix: str, + root: str, + verbose: bool, +) -> dict[str, Any]: + """ + Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for _ in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs: str) -> dict[str, str]: + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords: dict[str, str] = {} + try: + with open(versionfile_abs, "r") as fobj: + for line in fobj: + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + except OSError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords( + keywords: dict[str, str], + tag_prefix: str, + verbose: bool, +) -> dict[str, Any]: + """Get version information from git keywords.""" + if "refnames" not in keywords: + raise NotThisMethod("Short version file found") + date = keywords.get("date") + if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = {r.strip() for r in refnames.strip("()").split(",")} + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = {r for r in refs if re.search(r"\d", r)} + if verbose: + print("discarding '%s', no digits" % ",".join(refs - tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix) :] + # Filter out refs that exactly match prefix or that don't start + # with a number once the prefix is stripped (mostly a concern + # when prefix is '') + if not re.match(r"\d", r): + continue + if verbose: + print("picking %s" % r) + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs( + tag_prefix: str, root: str, verbose: bool, runner: Callable = run_command +) -> dict[str, Any]: + """ + Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=not verbose) + if rc != 0: + if verbose: + print("Directory %s not under git control" % root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = runner( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + f"{tag_prefix}[[:digit:]]*", + ], + cwd=root, + ) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces: dict[str, Any] = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) + # --abbrev-ref was added in git-1.6.3 + if rc != 0 or branch_name is None: + raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") + branch_name = branch_name.strip() + + if branch_name == "HEAD": + # If we aren't exactly on a branch, pick a branch which represents + # the current commit. If all else fails, we are on a branchless + # commit. + branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) + # --contains was added in git-1.5.4 + if rc != 0 or branches is None: + raise NotThisMethod("'git branch --contains' returned error") + branches = branches.split("\n") + + # Remove the first line if we're running detached + if "(" in branches[0]: + branches.pop(0) + + # Strip off the leading "* " from the list of branches. + branches = [branch[2:] for branch in branches] + if "master" in branches: + branch_name = "master" + elif not branches: + branch_name = None + else: + # Pick the first branch that is returned. Good or bad. + branch_name = branches[0] + + pieces["branch"] = branch_name + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[: git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) + if not mo: + # unparsable. Maybe git-describe is misbehaving? + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix) :] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def plus_or_dot(pieces: dict[str, Any]) -> str: + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces: dict[str, Any]) -> str: + """ + Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_branch(pieces: dict[str, Any]) -> str: + """ + TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . + + The ".dev0" means not master branch. Note that .dev0 sorts backwards + (a feature branch will appear "older" than the master branch). + + Exceptions: + 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0" + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def pep440_split_post(ver: str) -> tuple[str, int | None]: + """ + Split pep440 version string at the post-release segment. + + Returns the release segments before the post-release and the + post-release version number (or -1 if no post-release segment is present). + """ + vc = str.split(ver, ".post") + return vc[0], int(vc[1] or 0) if len(vc) == 2 else None + + +def render_pep440_pre(pieces: dict[str, Any]) -> str: + """ + TAG[.postN.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: + if pieces["distance"]: + # update the post release segment + tag_version, post_version = pep440_split_post(pieces["closest-tag"]) + rendered = tag_version + if post_version is not None: + rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) + else: + rendered += ".post0.dev%d" % (pieces["distance"]) + else: + # no commits, use the tag as the version + rendered = pieces["closest-tag"] + else: + # exception #1 + rendered = "0.post0.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces: dict[str, Any]) -> str: + """ + TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_post_branch(pieces: dict[str, Any]) -> str: + """ + TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . + + The ".dev0" means not master branch. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_old(pieces: dict[str, Any]) -> str: + """ + TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces: dict[str, Any]) -> str: + """ + TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces: dict[str, Any]) -> str: + """ + TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces: dict[str, Any], style: str) -> dict[str, Any]: + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-branch": + rendered = render_pep440_branch(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-post-branch": + rendered = render_pep440_post_branch(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } + + +def get_versions() -> dict[str, Any]: + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for _ in cfg.versionfile_source.split("/"): + root = os.path.dirname(root) + except NameError: + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None, + } + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } + + +def get_chrome_ext_compat_version() -> str: + cfg = get_config() + cfg.verbose = True + root = str(Path(__file__).parent.resolve()) + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, cfg.verbose) + if pieces["error"]: + print("unable to compute version") + sys.exit(1) + + if pieces["closest-tag"]: + closest_tag = pieces["closest-tag"] + else: + closest_tag = "0" + + version = closest_tag + if pieces["distance"] or pieces["dirty"]: + if pieces["dirty"]: + version += f".{pieces['distance'] + 1}" + else: + version += f".{pieces['distance']}" + + return version + + +if __name__ == "__main__": + manifest_path = Path(__file__).parent.parent / "rust" / "Cargo.toml" + with manifest_path.open() as f: + manifest = toml.load(f) + + if len(sys.argv) > 1: + new_version = sys.argv[1] + manifest["package"]["version"] = new_version + with manifest_path.open("w") as f: + toml.dump(manifest, f) + + print(f"Version in {manifest_path} has been set to {new_version}") + else: + if manifest["package"]["version"] == "0.0.0": + new_version = get_versions()["version"] + + if new_version == "0+unknown": + print("Unable to compute version from git tags.") + sys.exit(1) + + manifest["package"]["version"] = new_version + with manifest_path.open("w") as f: + toml.dump(manifest, f) + + print(f"Version in {manifest_path} has been set to {new_version}") + else: + print( + f"Version in {manifest_path} is already set to {manifest['package']['version']}" + ) diff --git a/src/fast_graph/__init__.py b/src/fast_graph/__init__.py new file mode 100644 index 0000000..ce953ba --- /dev/null +++ b/src/fast_graph/__init__.py @@ -0,0 +1,5 @@ +from .fast_graph import * + +__doc__ = fast_graph.__doc__ +if hasattr(fast_graph, "__all__"): + __all__ = fast_graph.__all__ diff --git a/src/fast_graph/fast_graph.pyi b/src/fast_graph/fast_graph.pyi new file mode 100644 index 0000000..6238dbf --- /dev/null +++ b/src/fast_graph/fast_graph.pyi @@ -0,0 +1,3 @@ +def all_pairs_dijkstra_path_length( + weighted_edges: list[tuple[int, int, float]], /, cutoff: float | None +) -> dict[int, dict[int, float]]: ... diff --git a/src/fast_graph/py.typed b/src/fast_graph/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_dijkstra.py b/tests/test_dijkstra.py new file mode 100644 index 0000000..98a8c13 --- /dev/null +++ b/tests/test_dijkstra.py @@ -0,0 +1,106 @@ +import logging +import time +from pathlib import Path + +import networkx as nx +import numpy as np +import scipy +import trimesh +from scipy.sparse import coo_matrix, csr_matrix + +from fast_graph import all_pairs_dijkstra_path_length + +logger = logging.getLogger(__name__) + +TEST_DIR = Path(__file__).parent.parent / "data" / "tests" + + +def compare_dijkstra_rust_with_networkx(ply_path): + radius = 12.0 + mesh: trimesh.Trimesh = trimesh.load(ply_path, force="mesh") # type: ignore + + # Graph + graph = nx.Graph() + n = len(mesh.vertices) + graph.add_nodes_from(np.arange(n)) + logger.info(f"{graph = }") + + # Get edges + f = np.array(mesh.faces, dtype=int) + logger.info(f"{f.shape = }") + rowi = np.concatenate( + [f[:, 0], f[:, 0], f[:, 1], f[:, 1], f[:, 2], f[:, 2]], axis=0 + ) + rowj = np.concatenate( + [f[:, 1], f[:, 2], f[:, 0], f[:, 2], f[:, 0], f[:, 1]], axis=0 + ) + logger.info(f"{rowi.shape = }, {rowj.shape = }") + verts = mesh.vertices + logger.info(f"{verts.shape = }") + + # Get weights + edgew = verts[rowi] - verts[rowj] + edgew = scipy.linalg.norm(edgew, axis=1) + wedges = np.stack([rowi, rowj, edgew]).T + + weighted_edges = list(zip(rowi, rowj, edgew)) + + graph.add_weighted_edges_from(wedges) + logger.info(graph) + + start = time.time() + dists = nx.all_pairs_dijkstra_path_length(graph, cutoff=radius) + d2 = {} + + for key_tuple in dists: + d2[key_tuple[0]] = key_tuple[1] + end = time.time() + logger.info(f"Dijkstra took {end - start:.2f} s") + + start = time.time() + d3 = all_pairs_dijkstra_path_length(weighted_edges, cutoff=radius) + end = time.time() + logger.info(f"Rust Dijkstra took {end - start:.2f} s") + + # compare the two dictionaries (key: int, value: dict[int, float]) + assert d2.keys() == d3.keys(), f"{d2.keys() = }, {d3.keys() = }" + for key in d2: + assert d2[key].keys() == d3[key].keys() + for key2 in d2[key]: + assert d2[key][key2] == d3[key][key2] + + # sparse_d2 = dict_to_sparse(d2) + # sparse_d3 = dict_to_sparse(d3) + # + # # PERF: comparing sparse matrices with == is slow, so we use != + # + # # compare the two sparse matrices + # assert (sparse_d2 != sparse_d3).nnz == 0 + + +def dict_to_sparse(mydict): + """Create a sparse matrix from a dictionary.""" + # Create the appropriate format for the COO format. + data = [] + row = [] + col = [] + for r in mydict: + for c in mydict[r]: + r = int(r) + c = int(c) + v = mydict[r][c] + data.append(v) + row.append(r) + col.append(c) + # Create the COO-matrix + coo = coo_matrix((data, (row, col))) + # Let Scipy convert COO to CSR format and return + return csr_matrix(coo) + + +def test_dijkstra_rust_with_networkx_1(): + compare_dijkstra_rust_with_networkx(TEST_DIR / "plys" / "2P1Q_B.ply") + + +def test_dijkstra_rust_with_networkx_2(): + compare_dijkstra_rust_with_networkx(TEST_DIR / "plys" / "2P1Q_C.ply")