Skip to content

Commit

Permalink
Merge pull request #1904 from jerch/utf8_input
Browse files Browse the repository at this point in the history
Support for raw UTF8 input
  • Loading branch information
jerch authored May 12, 2019
2 parents 031a30b + c0e9bbe commit eef8556
Show file tree
Hide file tree
Showing 11 changed files with 581 additions and 40 deletions.
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"@types/mocha": "^2.2.33",
"@types/node": "6.0.108",
"@types/puppeteer": "^1.12.4",
"@types/utf8": "^2.1.6",
"@types/webpack": "^4.4.11",
"browserify": "^13.3.0",
"chai": "3.5.0",
Expand All @@ -39,6 +40,7 @@
"ts-loader": "^4.5.0",
"tslint": "^5.9.1",
"tslint-consistent-codestyle": "^1.13.0",
"utf8": "^3.0.0",
"typescript": "3.4",
"vinyl-buffer": "^1.0.0",
"vinyl-source-stream": "^1.1.0",
Expand Down
29 changes: 28 additions & 1 deletion src/InputHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import { EscapeSequenceParser } from './EscapeSequenceParser';
import { IDisposable } from 'xterm';
import { Disposable } from './common/Lifecycle';
import { concat } from './common/TypedArrayUtils';
import { StringToUtf32, stringFromCodePoint, utf32ToString } from './core/input/TextDecoder';
import { StringToUtf32, stringFromCodePoint, utf32ToString, Utf8ToUtf32 } from './core/input/TextDecoder';
import { CellData, Attributes, FgFlags, BgFlags, AttributeData, NULL_CELL_WIDTH, NULL_CELL_CODE, DEFAULT_ATTR_DATA } from './core/buffer/BufferLine';
import { EventEmitter2, IEvent } from './common/EventEmitter2';

Expand Down Expand Up @@ -104,6 +104,7 @@ class DECRQSS implements IDcsHandler {
export class InputHandler extends Disposable implements IInputHandler {
private _parseBuffer: Uint32Array = new Uint32Array(4096);
private _stringDecoder: StringToUtf32 = new StringToUtf32();
private _utf8Decoder: Utf8ToUtf32 = new Utf8ToUtf32();
private _workCell: CellData = new CellData();

private _onCursorMove = new EventEmitter2<void>();
Expand Down Expand Up @@ -318,6 +319,32 @@ export class InputHandler extends Disposable implements IInputHandler {
}
}

public parseUtf8(data: Uint8Array): void {
// Ensure the terminal is not disposed
if (!this._terminal) {
return;
}

let buffer = this._terminal.buffer;
const cursorStartX = buffer.x;
const cursorStartY = buffer.y;

// TODO: Consolidate debug/logging #1560
if ((<any>this._terminal).debug) {
this._terminal.log('data: ' + data);
}

if (this._parseBuffer.length < data.length) {
this._parseBuffer = new Uint32Array(data.length);
}
this._parser.parse(this._parseBuffer, this._utf8Decoder.decode(data, this._parseBuffer));

buffer = this._terminal.buffer;
if (buffer.x !== cursorStartX || buffer.y !== cursorStartY) {
this._terminal.emit('cursormove');
}
}

public print(data: Uint32Array, start: number, end: number): void {
let code: number;
let chWidth: number;
Expand Down
84 changes: 84 additions & 0 deletions src/Terminal.ts
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ export class Terminal extends EventEmitter implements ITerminal, IDisposable, II

// user input states
public writeBuffer: string[];
public writeBufferUtf8: Uint8Array[];
private _writeInProgress: boolean;

/**
Expand Down Expand Up @@ -340,6 +341,7 @@ export class Terminal extends EventEmitter implements ITerminal, IDisposable, II

// user input states
this.writeBuffer = [];
this.writeBufferUtf8 = [];
this._writeInProgress = false;

this._xoffSentToCatchUp = false;
Expand Down Expand Up @@ -1365,6 +1367,88 @@ export class Terminal extends EventEmitter implements ITerminal, IDisposable, II
}
}

/**
* Writes raw utf8 bytes to the terminal.
* @param data UintArray with UTF8 bytes to write to the terminal.
*/
public writeUtf8(data: Uint8Array): void {
// Ensure the terminal isn't disposed
if (this._isDisposed) {
return;
}

// Ignore falsy data values
if (!data) {
return;
}

this.writeBufferUtf8.push(data);

// Send XOFF to pause the pty process if the write buffer becomes too large so
// xterm.js can catch up before more data is sent. This is necessary in order
// to keep signals such as ^C responsive.
if (this.options.useFlowControl && !this._xoffSentToCatchUp && this.writeBufferUtf8.length >= WRITE_BUFFER_PAUSE_THRESHOLD) {
// XOFF - stop pty pipe
// XON will be triggered by emulator before processing data chunk
this.handler(C0.DC3);
this._xoffSentToCatchUp = true;
}

if (!this._writeInProgress && this.writeBufferUtf8.length > 0) {
// Kick off a write which will write all data in sequence recursively
this._writeInProgress = true;
// Kick off an async innerWrite so more writes can come in while processing data
setTimeout(() => {
this._innerWriteUtf8();
});
}
}

protected _innerWriteUtf8(bufferOffset: number = 0): void {
// Ensure the terminal isn't disposed
if (this._isDisposed) {
this.writeBufferUtf8 = [];
}

const startTime = Date.now();
while (this.writeBufferUtf8.length > bufferOffset) {
const data = this.writeBufferUtf8[bufferOffset];
bufferOffset++;

// If XOFF was sent in order to catch up with the pty process, resume it if
// we reached the end of the writeBuffer to allow more data to come in.
if (this._xoffSentToCatchUp && this.writeBufferUtf8.length === bufferOffset) {
this.handler(C0.DC1);
this._xoffSentToCatchUp = false;
}

this._refreshStart = this.buffer.y;
this._refreshEnd = this.buffer.y;

// HACK: Set the parser state based on it's state at the time of return.
// This works around the bug #662 which saw the parser state reset in the
// middle of parsing escape sequence in two chunks. For some reason the
// state of the parser resets to 0 after exiting parser.parse. This change
// just sets the state back based on the correct return statement.

this._inputHandler.parseUtf8(data);

this.updateRange(this.buffer.y);
this.refresh(this._refreshStart, this._refreshEnd);

if (Date.now() - startTime >= WRITE_TIMEOUT_MS) {
break;
}
}
if (this.writeBufferUtf8.length > bufferOffset) {
// Allow renderer to catch up before processing the next batch
setTimeout(() => this._innerWriteUtf8(bufferOffset), 0);
} else {
this._writeInProgress = false;
this.writeBufferUtf8 = [];
}
}

/**
* Writes text to the terminal.
* @param data The text to write to the terminal.
Expand Down
3 changes: 3 additions & 0 deletions src/TestUtils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ export class MockTerminal implements ITerminal {
write(data: string): void {
throw new Error('Method not implemented.');
}
writeUtf8(data: Uint8Array): void {
throw new Error('Method not implemented.');
}
bracketedPasteMode: boolean;
mouseHelper: IMouseHelper;
renderer: IRenderer;
Expand Down
2 changes: 2 additions & 0 deletions src/Types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ export interface ICompositionHelper {
*/
export interface IInputHandler {
parse(data: string): void;
parseUtf8(data: Uint8Array): void;
print(data: Uint32Array, start: number, end: number): void;

/** C0 BEL */ bell(): void;
Expand Down Expand Up @@ -265,6 +266,7 @@ export interface IPublicTerminal extends IDisposable, IEventEmitter {
scrollToLine(line: number): void;
clear(): void;
write(data: string): void;
writeUtf8(data: Uint8Array): void;
getOption(key: string): any;
setOption(key: string, value: any): void;
refresh(start: number, end: number): void;
Expand Down
160 changes: 156 additions & 4 deletions src/core/input/TextDecoder.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,41 @@
*/

import { assert } from 'chai';
import { StringToUtf32, stringFromCodePoint, utf32ToString } from './TextDecoder';
import { StringToUtf32, stringFromCodePoint, Utf8ToUtf32, utf32ToString } from './TextDecoder';
import { encode } from 'utf8';

// convert UTF32 codepoints to string
function toString(data: Uint32Array, length: number): string {
if ((String as any).fromCodePoint) {
return (String as any).fromCodePoint.apply(null, data.subarray(0, length));
}
let result = '';
for (let i = 0; i < length; ++i) {
result += stringFromCodePoint(data[i]);
}
return result;
}

// convert "bytestring" (charCode 0-255) to bytes
function fromByteString(s: string): Uint8Array {
const result = new Uint8Array(s.length);
for (let i = 0; i < s.length; ++i) {
result[i] = s.charCodeAt(i);
}
return result;
}

const TEST_STRINGS = [
'Лорем ипсум долор сит амет, ех сеа аццусам диссентиет. Ан еос стет еирмод витуперата. Иус дицерет урбанитас ет. Ан при алтера долорес сплендиде, цу яуо интегре денияуе, игнота волуптариа инструцтиор цу вим.',
'ლორემ იფსუმ დოლორ სით ამეთ, ფაცერ მუციუს ცონსეთეთურ ყუო იდ, ფერ ვივენდუმ ყუაერენდუმ ეა, ესთ ამეთ მოვეთ სუავითათე ცუ. ვითაე სენსიბუს ან ვიხ. ეხერცი დეთერრუისსეთ უთ ყუი. ვოცენთ დებითის ადიფისცი ეთ ფერ. ნეც ან ფეუგაით ფორენსიბუს ინთერესსეთ. იდ დიცო რიდენს იუს. დისსენთიეთ ცონსეყუუნთურ სედ ნე, ნოვუმ მუნერე ეუმ ათ, ნე ეუმ ნიჰილ ირაცუნდია ურბანითას.',
'अधिकांश अमितकुमार प्रोत्साहित मुख्य जाने प्रसारन विश्लेषण विश्व दारी अनुवादक अधिकांश नवंबर विषय गटकउसि गोपनीयता विकास जनित परस्पर गटकउसि अन्तरराष्ट्रीयकरन होसके मानव पुर्णता कम्प्युटर यन्त्रालय प्रति साधन',
'覧六子当聞社計文護行情投身斗来。増落世的況上席備界先関権能万。本物挙歯乳全事携供板栃果以。頭月患端撤競見界記引去法条公泊候。決海備駆取品目芸方用朝示上用報。講申務紙約週堂出応理田流団幸稿。起保帯吉対阜庭支肯豪彰属本躍。量抑熊事府募動極都掲仮読岸。自続工就断庫指北速配鳴約事新住米信中験。婚浜袋著金市生交保他取情距。',
'八メル務問へふらく博辞説いわょ読全タヨムケ東校どっ知壁テケ禁去フミ人過を装5階がねぜ法逆はじ端40落ミ予竹マヘナセ任1悪た。省ぜりせ製暇ょへそけ風井イ劣手はぼまず郵富法く作断タオイ取座ゅょが出作ホシ月給26島ツチ皇面ユトクイ暮犯リワナヤ断連こうでつ蔭柔薄とレにの。演めけふぱ損田転10得観びトげぎ王物鉄夜がまけ理惜くち牡提づ車惑参ヘカユモ長臓超漫ぼドかわ。',
'모든 국민은 행위시의 법률에 의하여 범죄를 구성하지 아니하는 행위로 소추되지 아니하며. 전직대통령의 신분과 예우에 관하여는 법률로 정한다, 국회는 헌법 또는 법률에 특별한 규정이 없는 한 재적의원 과반수의 출석과 출석의원 과반수의 찬성으로 의결한다. 군인·군무원·경찰공무원 기타 법률이 정하는 자가 전투·훈련등 직무집행과 관련하여 받은 손해에 대하여는 법률이 정하는 보상외에 국가 또는 공공단체에 공무원의 직무상 불법행위로 인한 배상은 청구할 수 없다.',
'كان فشكّل الشرقي مع, واحدة للمجهود تزامناً بعض بل. وتم جنوب للصين غينيا لم, ان وبدون وكسبت الأمور ذلك, أسر الخاسر الانجليزية هو. نفس لغزو مواقعها هو. الجو علاقة الصعداء انه أي, كما مع بمباركة للإتحاد الوزراء. ترتيب الأولى أن حدى, الشتوية باستحداث مدن بل, كان قد أوسع عملية. الأوضاع بالمطالبة كل قام, دون إذ شمال الربيع،. هُزم الخاصّة ٣٠ أما, مايو الصينية مع قبل.',
'או סדר החול מיזמי קרימינולוגיה. קהילה בגרסה לויקיפדים אל היא, של צעד ציור ואלקטרוניקה. מדע מה ברית המזנון ארכיאולוגיה, אל טבלאות מבוקשים כלל. מאמרשיחהצפה העריכהגירסאות שכל אל, כתב עיצוב מושגי של. קבלו קלאסיים ב מתן. נבחרים אווירונאוטיקה אם מלא, לוח למנוע ארכיאולוגיה מה. ארץ לערוך בקרבת מונחונים או, עזרה רקטות לויקיפדים אחר גם.',
'Лорем ლორემ अधिकांश 覧六子 八メル 모든 בקרבת 💮 😂 äggg 123€ 𝄞.'
];

describe('text encodings', () => {
it('stringFromCodePoint/utf32ToString', () => {
Expand All @@ -17,7 +51,7 @@ describe('text encodings', () => {
assert.equal(utf32ToString(data), s);
});

describe('StringToUtf32 Decoder', () => {
describe('StringToUtf32 decoder', () => {
describe('full codepoint test', () => {
it('0..65535', () => {
const decoder = new StringToUtf32();
Expand All @@ -34,7 +68,8 @@ describe('text encodings', () => {
decoder.clear();
}
});
it('65536..0x10FFFF (surrogates)', function(): void {

it('65536..0x10FFFF (surrogates)', function (): void {
this.timeout(20000);
const decoder = new StringToUtf32();
const target = new Uint32Array(5);
Expand All @@ -50,6 +85,16 @@ describe('text encodings', () => {
});
});

it('test strings', () => {
const decoder = new StringToUtf32();
const target = new Uint32Array(500);
for (let i = 0; i < TEST_STRINGS.length; ++i) {
const length = decoder.decode(TEST_STRINGS[i], target);
assert.equal(toString(target, length), TEST_STRINGS[i]);
decoder.clear();
}
});

describe('stream handling', () => {
it('surrogates mixed advance by 1', () => {
const decoder = new StringToUtf32();
Expand All @@ -58,7 +103,114 @@ describe('text encodings', () => {
let decoded = '';
for (let i = 0; i < input.length; ++i) {
const written = decoder.decode(input[i], target);
decoded += utf32ToString(target, written);
decoded += toString(target, written);
}
assert(decoded, 'Ä€𝄞Ö𝄞€Ü𝄞€');
});
});
});

describe('Utf8ToUtf32 decoder', () => {
describe('full codepoint test', () => {

it('0..65535 (1/2/3 byte sequences)', () => {
const decoder = new Utf8ToUtf32();
const target = new Uint32Array(5);
for (let i = 0; i < 65536; ++i) {
// skip surrogate pairs
if (i >= 0xD800 && i <= 0xDFFF) {
continue;
}
const utf8Data = fromByteString(encode(String.fromCharCode(i)));
const length = decoder.decode(utf8Data, target);
assert.equal(length, 1);
assert.equal(toString(target, length), String.fromCharCode(i));
decoder.clear();
}
});

it('65536..0x10FFFF (4 byte sequences)', function (): void {
this.timeout(20000);
const decoder = new Utf8ToUtf32();
const target = new Uint32Array(5);
for (let i = 65536; i < 0x10FFFF; ++i) {
const utf8Data = fromByteString(encode(stringFromCodePoint(i)));
const length = decoder.decode(utf8Data, target);
assert.equal(length, 1);
assert.equal(target[0], i);
decoder.clear();
}
});
});

it('test strings', () => {
const decoder = new Utf8ToUtf32();
const target = new Uint32Array(500);
for (let i = 0; i < TEST_STRINGS.length; ++i) {
const utf8Data = fromByteString(encode(TEST_STRINGS[i]));
const length = decoder.decode(utf8Data, target);
assert.equal(toString(target, length), TEST_STRINGS[i]);
decoder.clear();
}
});

describe('stream handling', () => {
it('2 byte sequences - advance by 1', () => {
const decoder = new Utf8ToUtf32();
const target = new Uint32Array(5);
const utf8Data = fromByteString('\xc3\x84\xc3\x96\xc3\x9c\xc3\x9f\xc3\xb6\xc3\xa4\xc3\xbc');
let decoded = '';
for (let i = 0; i < utf8Data.length; ++i) {
const written = decoder.decode(utf8Data.slice(i, i + 1), target);
decoded += toString(target, written);
}
assert(decoded, 'ÄÖÜßöäü');
});

it('2/3 byte sequences - advance by 1', () => {
const decoder = new Utf8ToUtf32();
const target = new Uint32Array(5);
const utf8Data = fromByteString('\xc3\x84\xe2\x82\xac\xc3\x96\xe2\x82\xac\xc3\x9c\xe2\x82\xac\xc3\x9f\xe2\x82\xac\xc3\xb6\xe2\x82\xac\xc3\xa4\xe2\x82\xac\xc3\xbc');
let decoded = '';
for (let i = 0; i < utf8Data.length; ++i) {
const written = decoder.decode(utf8Data.slice(i, i + 1), target);
decoded += toString(target, written);
}
assert(decoded, 'Āր܀߀ö€ä€ü');
});

it('2/3/4 byte sequences - advance by 1', () => {
const decoder = new Utf8ToUtf32();
const target = new Uint32Array(5);
const utf8Data = fromByteString('\xc3\x84\xe2\x82\xac\xf0\x9d\x84\x9e\xc3\x96\xf0\x9d\x84\x9e\xe2\x82\xac\xc3\x9c\xf0\x9d\x84\x9e\xe2\x82\xac');
let decoded = '';
for (let i = 0; i < utf8Data.length; ++i) {
const written = decoder.decode(utf8Data.slice(i, i + 1), target);
decoded += toString(target, written);
}
assert(decoded, 'Ä€𝄞Ö𝄞€Ü𝄞€');
});

it('2/3/4 byte sequences - advance by 2', () => {
const decoder = new Utf8ToUtf32();
const target = new Uint32Array(5);
const utf8Data = fromByteString('\xc3\x84\xe2\x82\xac\xf0\x9d\x84\x9e\xc3\x96\xf0\x9d\x84\x9e\xe2\x82\xac\xc3\x9c\xf0\x9d\x84\x9e\xe2\x82\xac');
let decoded = '';
for (let i = 0; i < utf8Data.length; i += 2) {
const written = decoder.decode(utf8Data.slice(i, i + 2), target);
decoded += toString(target, written);
}
assert(decoded, 'Ä€𝄞Ö𝄞€Ü𝄞€');
});

it('2/3/4 byte sequences - advance by 3', () => {
const decoder = new Utf8ToUtf32();
const target = new Uint32Array(5);
const utf8Data = fromByteString('\xc3\x84\xe2\x82\xac\xf0\x9d\x84\x9e\xc3\x96\xf0\x9d\x84\x9e\xe2\x82\xac\xc3\x9c\xf0\x9d\x84\x9e\xe2\x82\xac');
let decoded = '';
for (let i = 0; i < utf8Data.length; i += 3) {
const written = decoder.decode(utf8Data.slice(i, i + 3), target);
decoded += toString(target, written);
}
assert(decoded, 'Ä€𝄞Ö𝄞€Ü𝄞€');
});
Expand Down
Loading

0 comments on commit eef8556

Please sign in to comment.