Skip to content

Commit

Permalink
Ensure tables.rs passes rustfmt
Browse files Browse the repository at this point in the history
  • Loading branch information
Jules-Bertholet committed Feb 29, 2024
1 parent a6a221a commit 6ae7229
Show file tree
Hide file tree
Showing 3 changed files with 9,280 additions and 24,580 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Regen
run: cd scripts && python3 unicode.py
- name: Diff tables
Expand Down
55 changes: 24 additions & 31 deletions scripts/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# out-of-line and check the tables.rs and normalization_tests.rs files into git.
import collections
import urllib.request
from itertools import batched

UNICODE_VERSION = "15.1.0"
UCD_URL = "https://www.unicode.org/Public/%s/ucd/" % UNICODE_VERSION
Expand Down Expand Up @@ -354,20 +355,26 @@ def is_first_and_last(first, last):
return False
return first[1:-8] == last[1:-7]

def gen_mph_data(name, d, kv_type, kv_callback):
def gen_mph_data(name, d, kv_type, kv_callback, kv_row_width):
(salt, keys) = minimal_perfect_hash(d)
out.write("pub(crate) const %s_SALT: &[u16] = &[\n" % name.upper())
for s in salt:
out.write(" 0x{:x},\n".format(s))
out.write(f"\npub(crate) const {name.upper()}_SALT: &[u16] = &[\n")
for s_row in batched(salt, 13):
out.write(" ")
for s in s_row:
out.write(f" 0x{s:03X},")
out.write("\n")
out.write("];\n")
out.write(f"pub(crate) const {name.upper()}_KV: &[{kv_type}] = &[\n")
for k_row in batched(keys, kv_row_width):
out.write(" ")
for k in k_row:
out.write(f" {kv_callback(k)},")
out.write("\n")
out.write("];\n")
out.write("pub(crate) const {}_KV: &[{}] = &[\n".format(name.upper(), kv_type))
for k in keys:
out.write(" {},\n".format(kv_callback(k)))
out.write("];\n\n")

def gen_combining_class(combining_classes, out):
gen_mph_data('canonical_combining_class', combining_classes, 'u32',
lambda k: "0x{:X}".format(int(combining_classes[k]) | (k << 8)))
lambda k: f"0x{int(combining_classes[k]) | (k << 8):07X}", 8)

def gen_composition_table(canon_comp, out):
table = {}
Expand All @@ -376,7 +383,7 @@ def gen_composition_table(canon_comp, out):
table[(c1 << 16) | c2] = c3
(salt, keys) = minimal_perfect_hash(table)
gen_mph_data('COMPOSITION_TABLE', table, '(u32, char)',
lambda k: "(0x%s, '\\u{%s}')" % (hexify(k), hexify(table[k])))
lambda k: f"(0x{k:08X}, '\\u{{{table[k]:06X}}}')", 1)

out.write("pub(crate) fn composition_table_astral(c1: char, c2: char) -> Option<char> {\n")
out.write(" match (c1, c2) {\n")
Expand All @@ -403,7 +410,7 @@ def gen_decomposition_tables(canon_decomp, compat_decomp, cjk_compat_variants_de
assert offset < 65536
out.write("];\n")
gen_mph_data(name + '_decomposed', table, "(u32, (u16, u16))",
lambda k: "(0x{:x}, ({}, {}))".format(k, offsets[k], len(table[k])))
lambda k: f"(0x{k:05X}, (0x{offsets[k]:03X}, 0x{len(table[k]):X}))", 1)

def gen_qc_match(prop_table, out):
out.write(" match c {\n")
Expand All @@ -421,7 +428,7 @@ def gen_qc_match(prop_table, out):
out.write(" }\n")

def gen_nfc_qc(prop_tables, out):
out.write("#[inline]\n")
out.write("\n#[inline]\n")
out.write("#[allow(ellipsis_inclusive_range_patterns)]\n")
out.write("pub fn qc_nfc(c: char) -> IsNormalized {\n")
gen_qc_match(prop_tables['NFC_QC'], out)
Expand Down Expand Up @@ -450,13 +457,13 @@ def gen_nfkd_qc(prop_tables, out):

def gen_combining_mark(general_category_mark, out):
gen_mph_data('combining_mark', general_category_mark, 'u32',
lambda k: '0x{:04x}'.format(k))
lambda k: '0x{:05X}'.format(k), 10)

def gen_public_assigned(general_category_public_assigned, out):
# This could be done as a hash but the table is somewhat small.
out.write("#[inline]\n")
out.write("pub fn is_public_assigned(c: char) -> bool {\n")
out.write(" match c {\n")
out.write(" matches!(c,\n")

start = True
for first, last in general_category_public_assigned:
Expand All @@ -469,12 +476,9 @@ def gen_public_assigned(general_category_public_assigned, out):
out.write("'\\u{%s}'\n" % hexify(first))
else:
out.write("'\\u{%s}'..='\\u{%s}'\n" % (hexify(first), hexify(last)))
out.write(" => true,\n")

out.write(" _ => false,\n")
out.write(" }\n")
out.write(" )\n")
out.write("}\n")
out.write("\n")

def gen_stream_safe(leading, trailing, out):
# This could be done as a hash but the table is very small.
Expand All @@ -488,10 +492,9 @@ def gen_stream_safe(leading, trailing, out):
out.write(" _ => 0,\n")
out.write(" }\n")
out.write("}\n")
out.write("\n")

gen_mph_data('trailing_nonstarters', trailing, 'u32',
lambda k: "0x{:X}".format(int(trailing[k]) | (k << 8)))
lambda k: f"0x{int(trailing[k]) | (k << 8):07X}", 8)

def gen_tests(tests, out):
out.write("""#[derive(Debug)]
Expand Down Expand Up @@ -579,43 +582,33 @@ def minimal_perfect_hash(d):
data = UnicodeData()
with open("tables.rs", "w", newline = "\n") as out:
out.write(PREAMBLE)
out.write("#![cfg_attr(rustfmt, rustfmt::skip)]\n")
out.write("use crate::quick_check::IsNormalized;\n")
out.write("use crate::quick_check::IsNormalized::*;\n")
out.write("\n")

version = "(%s, %s, %s)" % tuple(UNICODE_VERSION.split("."))
out.write("#[allow(unused)]\n")
out.write("pub const UNICODE_VERSION: (u8, u8, u8) = %s;\n\n" % version)
out.write("pub const UNICODE_VERSION: (u8, u8, u8) = %s;\n" % version)

gen_combining_class(data.combining_classes, out)
out.write("\n")

gen_composition_table(data.canon_comp, out)
out.write("\n")

gen_decomposition_tables(data.canon_fully_decomp, data.compat_fully_decomp, data.cjk_compat_variants_fully_decomp, out)

gen_combining_mark(data.general_category_mark, out)
out.write("\n")

gen_public_assigned(data.general_category_public_assigned, out)
out.write("\n")

gen_nfc_qc(data.norm_props, out)
out.write("\n")

gen_nfkc_qc(data.norm_props, out)
out.write("\n")

gen_nfd_qc(data.norm_props, out)
out.write("\n")

gen_nfkd_qc(data.norm_props, out)
out.write("\n")

gen_stream_safe(data.ss_leading, data.ss_trailing, out)
out.write("\n")

with open("normalization_tests.rs", "w", newline = "\n") as out:
out.write(PREAMBLE)
Expand Down
Loading

0 comments on commit 6ae7229

Please sign in to comment.