Skip to content

Commit

Permalink
feat: implement line-break.txt v13 (#15)
Browse files Browse the repository at this point in the history
  • Loading branch information
niklasvh authored Jul 15, 2021
1 parent 330cb73 commit bc95c80
Show file tree
Hide file tree
Showing 10 changed files with 2,875 additions and 2,036 deletions.
9 changes: 4 additions & 5 deletions scripts/generate_line_break_tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@ import {BREAK_MANDATORY, BREAK_NOT_ALLOWED, BREAK_ALLOWED} from '../src/LineBrea

const data = readFileSync(resolve(__dirname, '../tests/LineBreakTest.txt')).toString();
const tests: string[] = [];
data
.split('\n')
.filter(s => s.length > 0)
.forEach(s => {
data.split('\n')
.filter((s) => s.length > 0)
.forEach((s) => {
let [input, comment] = s.split('#');
input = input.trim();

Expand All @@ -16,7 +15,7 @@ data
const inputs = input.split(/\s+/g);
const codePoints: string[] = [];
const breaks: string[] = [];
inputs.forEach(input => {
inputs.forEach((input) => {
if ([BREAK_ALLOWED, BREAK_MANDATORY, BREAK_NOT_ALLOWED].indexOf(input) !== -1) {
breaks.push(input);
} else {
Expand Down
2 changes: 1 addition & 1 deletion scripts/generate_line_break_trie.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ let rangeType: number | null = null;

rawData
.split('\n')
.map(s => {
.map((s) => {
const index = s.indexOf('#');
const first = (index === -1 ? s : s.substring(0, index)).trim();
return index === -1
Expand Down
19 changes: 10 additions & 9 deletions src/LineBreak.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ const RI = 41; // Keep pairs together. For pairs; break before and after other
const SA = 42; // Provide a line break opportunity contingent on additional, language-specific context analysis
const XX = 43; // Have as yet unknown line breaking behavior or unassigned code positions

const ea_OP = [0x2329, 0xff08];

export const classes: {[key: string]: number} = {
BK,
CR,
Expand Down Expand Up @@ -304,7 +306,7 @@ const _lineBreakAtIndex = (
}

// LB8a Do not break between a zero width joiner and an ideograph, emoji base or emoji modifier.
if (UnicodeTrie.get(codePoints[currentIndex]) === ZWJ && (next === ID || next === EB || next === EM)) {
if (UnicodeTrie.get(codePoints[currentIndex]) === ZWJ) {
return BREAK_NOT_ALLOWED;
}

Expand Down Expand Up @@ -378,8 +380,8 @@ const _lineBreakAtIndex = (
return BREAK_NOT_ALLOWED;
}

// LB22 Do not break between two ellipses, or between letters, numbers or exclamations and ellipsis.
if (next === IN && ALPHABETICS.concat(IN, EX, NU, ID, EB, EM).indexOf(current) !== -1) {
// LB22 Do not break before ellipsis.
if (next === IN) {
return BREAK_NOT_ALLOWED;
}

Expand Down Expand Up @@ -476,7 +478,9 @@ const _lineBreakAtIndex = (

// LB30 Do not break between letters, numbers, or ordinary symbols and opening or closing parentheses.
if (
(ALPHABETICS.concat(NU).indexOf(current) !== -1 && next === OP) ||
(ALPHABETICS.concat(NU).indexOf(current) !== -1 &&
next === OP &&
ea_OP.indexOf(codePoints[afterIndex]) === -1) ||
(ALPHABETICS.concat(NU).indexOf(next) !== -1 && current === CP)
) {
return BREAK_NOT_ALLOWED;
Expand Down Expand Up @@ -532,17 +536,14 @@ interface IOptions {
wordBreak?: WORD_BREAK;
}

const cssFormattedClasses = (
codePoints: number[],
options?: IOptions
): [number[], number[], boolean[] | undefined] => {
const cssFormattedClasses = (codePoints: number[], options?: IOptions): [number[], number[], boolean[] | undefined] => {
if (!options) {
options = {lineBreak: 'normal', wordBreak: 'normal'};
}
let [indicies, classTypes, isLetterNumber] = codePointsToCharacterClasses(codePoints, options.lineBreak);

if (options.wordBreak === 'break-all') {
classTypes = classTypes.map(type => ([NU, AL, SA].indexOf(type) !== -1 ? ID : type));
classTypes = classTypes.map((type) => ([NU, AL, SA].indexOf(type) !== -1 ? ID : type));
}

const forbiddenBreakpoints =
Expand Down
367 changes: 263 additions & 104 deletions src/LineBreak.txt

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/Trie.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ const slice16 = (view: number[] | Uint16Array, start: number, end?: number) => {
return view.slice(start, end);
}

return new Uint16Array(Array.prototype.slice.call(view, start, end))
return new Uint16Array(Array.prototype.slice.call(view, start, end));
};

const slice32 = (view: number[] | Uint32Array, start: number, end?: number) => {
Expand Down
68 changes: 34 additions & 34 deletions src/TrieBuilder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import {
UTRIE2_INDEX_2_MASK,
UTRIE2_SHIFT_1_2,
Trie,
int
int,
} from './Trie';

import {encode} from 'base64-arraybuffer';
Expand Down Expand Up @@ -288,10 +288,10 @@ export class TrieBuilder {
*/
setRange(start: int, end: int, value: int, overwrite: boolean = false): TrieBuilder {
/*
* repeat value in [start..end]
* mark index values for repeat-data blocks by setting bit 31 of the index values
* fill around existing values if any, if(overwrite)
*/
* repeat value in [start..end]
* mark index values for repeat-data blocks by setting bit 31 of the index values
* fill around existing values if any, if(overwrite)
*/
let block, rest, repeatBlock;
if (start > 0x10ffff || start < 0 || end > 0x10ffff || end < 0 || start > end) {
throw new Error('Invalid code point range.');
Expand Down Expand Up @@ -351,32 +351,32 @@ export class TrieBuilder {
/* already allocated */
if (overwrite && block >= UNEWTRIE2_DATA_0800_OFFSET) {
/*
* We overwrite all values, and it's not a
* protected (ASCII-linear or 2-byte UTF-8) block:
* replace with the repeatBlock.
*/
* We overwrite all values, and it's not a
* protected (ASCII-linear or 2-byte UTF-8) block:
* replace with the repeatBlock.
*/
setRepeatBlock = true;
} else {
/* !overwrite, or protected block: just write the values into this block */
this.fillBlock(block, 0, UTRIE2_DATA_BLOCK_LENGTH, value, this.initialValue, overwrite);
}
} else if (this.data[block] !== value && (overwrite || block === this.dataNullOffset)) {
/*
* Set the repeatBlock instead of the null block or previous repeat block:
*
* If !isWritableBlock() then all entries in the block have the same value
* because it's the null block or a range block (the repeatBlock from a previous
* call to utrie2_setRange32()).
* No other blocks are used multiple times before compacting.
*
* The null block is the only non-writable block with the initialValue because
* of the repeatBlock initialization above. (If value==initialValue, then
* the repeatBlock will be the null data block.)
*
* We set our repeatBlock if the desired value differs from the block's value,
* and if we overwrite any data or if the data is all initial values
* (which is the same as the block being the null block, see above).
*/
* Set the repeatBlock instead of the null block or previous repeat block:
*
* If !isWritableBlock() then all entries in the block have the same value
* because it's the null block or a range block (the repeatBlock from a previous
* call to utrie2_setRange32()).
* No other blocks are used multiple times before compacting.
*
* The null block is the only non-writable block with the initialValue because
* of the repeatBlock initialization above. (If value==initialValue, then
* the repeatBlock will be the null data block.)
*
* We set our repeatBlock if the desired value differs from the block's value,
* and if we overwrite any data or if the data is all initial values
* (which is the same as the block being the null block, see above).
*/
setRepeatBlock = true;
}
if (setRepeatBlock) {
Expand Down Expand Up @@ -484,9 +484,9 @@ export class TrieBuilder {
}

/*
* write the index-2 array values for supplementary code points,
* shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove
*/
* write the index-2 array values for supplementary code points,
* shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove
*/
for (i = 0; i < this.index2Length - index2Offset; i++) {
index[destIdx++] = (dataMove + this.index2[index2Offset + i]) >> UTRIE2_INDEX_SHIFT;
}
Expand Down Expand Up @@ -615,17 +615,17 @@ export class TrieBuilder {
this.map[i] = start;
}
/*
* Start with a block length of 64 for 2-byte UTF-8,
* then switch to UTRIE2_DATA_BLOCK_LENGTH.
*/
* Start with a block length of 64 for 2-byte UTF-8,
* then switch to UTRIE2_DATA_BLOCK_LENGTH.
*/
blockLength = 64;
blockCount = blockLength >> UTRIE2_SHIFT_2;
for (start = newStart; start < this.dataLength; ) {
/*
* start: index of first entry of current block
* newStart: index where the current block is to be moved
* (right after current end of already-compacted data)
*/
* start: index of first entry of current block
* newStart: index where the current block is to be moved
* (right after current end of already-compacted data)
*/
if (start === UNEWTRIE2_DATA_0800_OFFSET) {
blockLength = UTRIE2_DATA_BLOCK_LENGTH;
blockCount = 1;
Expand Down
2 changes: 1 addition & 1 deletion src/Util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ export const fromCodePoint = (...codePoints: number[]): string => {
codeUnits.push(codePoint);
} else {
codePoint -= 0x10000;
codeUnits.push((codePoint >> 10) + 0xd800, codePoint % 0x400 + 0xdc00);
codeUnits.push((codePoint >> 10) + 0xd800, (codePoint % 0x400) + 0xdc00);
}
if (index + 1 === length || codeUnits.length > 0x4000) {
result += String.fromCharCode(...codeUnits);
Expand Down
2 changes: 1 addition & 1 deletion src/linebreak-trie.ts

Large diffs are not rendered by default.

Loading

0 comments on commit bc95c80

Please sign in to comment.