Skip to content

Commit

Permalink
Test emoji sequences from Unicode test files
Browse files Browse the repository at this point in the history
  • Loading branch information
Jules-Bertholet committed Jun 6, 2024
1 parent b3cdccc commit 060cbbb
Show file tree
Hide file tree
Showing 4 changed files with 5,365 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,6 @@ jobs:
with:
python-version: '3.12'
- name: Regen
run: rm tests/NormalizationTest.txt && cd scripts && python3 unicode.py
run: rm tests/emoji-test.txt && cd scripts && python3 unicode.py
- name: Diff
run: git update-index --refresh && git diff-index --quiet HEAD --
10 changes: 8 additions & 2 deletions scripts/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,19 @@ class OffsetType(enum.IntEnum):
BitPos = int


def fetch_open(filename: str, local_prefix: str = ""):
def fetch_open(filename: str, local_prefix: str = "", emoji: bool = False):
"""Opens `filename` and return its corresponding file object. If `filename` isn't on disk,
fetches it from `https://www.unicode.org/Public/`. Exits with code 1 on failure.
"""
basename = os.path.basename(filename)
localname = os.path.join(local_prefix, basename)
if not os.path.exists(localname):
if emoji:
prefix = f"emoji/{UNICODE_VERSION[:-2]}"
else:
prefix = f"{UNICODE_VERSION}/ucd"
urllib.request.urlretrieve(
f"https://www.unicode.org/Public/{UNICODE_VERSION}/ucd/{filename}",
f"https://www.unicode.org/Public/{prefix}/{filename}",
localname,
)
try:
Expand Down Expand Up @@ -2055,6 +2059,8 @@ def main(module_path: str):

normalization_tests = load_normalization_tests()

fetch_open("emoji-test.txt", "../tests", emoji=True)

print("------------------------")
total_size = 0
for i, table in enumerate(tables):
Expand Down
Loading

0 comments on commit 060cbbb

Please sign in to comment.