Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more canonical equivalence tests #42

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Build
run: cargo build --verbose
- name: Run tests
Expand All @@ -28,14 +28,15 @@ jobs:
run: cargo fmt --check
- name: Check clippy
run: cargo clippy --lib --tests

regen:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Regen
run: cd scripts && python3 unicode.py
run: rm tests/NormalizationTest.txt && cd scripts && python3 unicode.py
- name: Diff
run: diff src/tables.rs scripts/tables.rs
run: git update-index --refresh && git diff-index --quiet HEAD --
3 changes: 0 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@ std = { version = "1.0", package = "rustc-std-workspace-std", optional = true }
core = { version = "1.0", package = "rustc-std-workspace-core", optional = true }
compiler_builtins = { version = "0.1", optional = true }

[dev-dependencies]
unicode-normalization = "0.1.23"

[features]
default = []
rustc-dep-of-std = ['std', 'core', 'compiler_builtins']
Expand Down
35 changes: 23 additions & 12 deletions scripts/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,13 @@
import os
import re
import sys
import urllib.request
from collections import defaultdict
from itertools import batched

UNICODE_VERSION = "15.1.0"
"""The version of the Unicode data files to download."""

NUM_CODEPOINTS = 0x110000
"""An upper bound for which `range(0, NUM_CODEPOINTS)` contains Unicode's codespace."""

Expand Down Expand Up @@ -61,24 +65,28 @@ class OffsetType(enum.IntEnum):

If this is edited, you must ensure that `emit_module` reflects your changes."""

MODULE_FILENAME = "tables.rs"
"""The filename of the emitted Rust module (will be created in the working directory)"""
MODULE_PATH = "../src/tables.rs"
"""The path of the emitted Rust module (relative to the working directory)"""

Codepoint = int
BitPos = int


def fetch_open(filename: str):
def fetch_open(filename: str, local_prefix: str = ""):
"""Opens `filename` and return its corresponding file object. If `filename` isn't on disk,
fetches it from `http://www.unicode.org/Public/UNIDATA/`. Exits with code 1 on failure.
fetches it from `https://www.unicode.org/Public/`. Exits with code 1 on failure.
"""
basename = os.path.basename(filename)
if not os.path.exists(basename):
os.system(f"curl -O http://www.unicode.org/Public/UNIDATA/{filename}")
localname = os.path.join(local_prefix, basename)
if not os.path.exists(localname):
urllib.request.urlretrieve(
f"https://www.unicode.org/Public/{UNICODE_VERSION}/ucd/{filename}",
localname,
)
try:
return open(basename, encoding="utf-8")
return open(localname, encoding="utf-8")
except OSError:
sys.stderr.write(f"cannot load {basename}")
sys.stderr.write(f"cannot load {localname}")
sys.exit(1)


Expand Down Expand Up @@ -637,7 +645,7 @@ def emit_module(
module.write("}\n")


def main(module_filename: str):
def main(module_path: str):
"""Obtain character data from the latest version of Unicode, transform it into a multi-level
lookup table for character width, and write a Rust module utilizing that table to
`module_filename`.
Expand Down Expand Up @@ -677,6 +685,9 @@ def main(module_filename: str):
emoji_variations = load_variation_sequences()
variation_table = make_variation_sequence_table(emoji_variations, width_map)

# Download normalization test file for use by tests
fetch_open("NormalizationTest.txt", "../tests/")

print("------------------------")
total_size = 0
for i, table in enumerate(tables):
Expand All @@ -692,9 +703,9 @@ def main(module_filename: str):
print("------------------------")
print(f" Total size: {total_size} bytes")

emit_module(module_filename, version, tables, variation_table)
print(f'Wrote to "{module_filename}"')
emit_module(module_path, version, tables, variation_table)
print(f'Wrote to "{module_path}"')


if __name__ == "__main__":
main(MODULE_FILENAME)
main(MODULE_PATH)
Loading