Skip to content

Commit

Permalink
Merge pull request #181 from fakusb/fixSentenceParsing
Browse files Browse the repository at this point in the history
Faster sentence parsing in huge files
  • Loading branch information
fakusb authored Mar 15, 2021
2 parents 4e362c1 + 1e62c3c commit d533b64
Showing 1 changed file with 34 additions and 2 deletions.
36 changes: 34 additions & 2 deletions server/src/sentence-model/SentenceCollection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ export class SentenceCollection implements vscode.TextDocument {
}

try {
var oldSentenceCandidate : number = start;
for(let idx = 0; /**/; ++idx) {
const parseText = this.documentText.substring(currentOffset);
const sent = parser.parseSentence(parseText);
Expand All @@ -352,11 +353,39 @@ export class SentenceCollection implements vscode.TextDocument {
if(reparsed.length == 0 && this.sentences[start-1])
this.sentences[start-1].next = null;
//
if (removed.length>0 && reparsed.length > 0 && removed[removed.length-1].getText()===reparsed[reparsed.length-1].getText() )
console.log("Internal inefficiency: detecting unchanged suffix after editing failed, and we parsed the whole document until end, please report.");
removed.forEach((sent) => sent.dispose());
return {removed: removed, added: reparsed, endOfSentences: true};
} if(idx >= minCount && start+idx < this.sentences.length && currentOffset+sent.text.length === this.sentences[start+idx].getDocumentEndOffset() && sent.text === this.sentences[start+idx].getText()) {
}

var fixByLocalGlueing : undefined | number = undefined;

while(oldSentenceCandidate < this.sentences.length && currentOffset+sent.text.length > this.sentences[oldSentenceCandidate].getDocumentEndOffset())
++oldSentenceCandidate;

if (idx >= minCount && oldSentenceCandidate < this.sentences.length
&& currentOffset+sent.text.length === this.sentences[oldSentenceCandidate].getDocumentEndOffset()
&& sent.text === this.sentences[oldSentenceCandidate].getText())
fixByLocalGlueing = oldSentenceCandidate - start; //found the old, parsed document again
/*
if (idx >= minCount && start+idx < this.sentences.length
&& currentOffset+sent.text.length === this.sentences[start+idx].getDocumentEndOffset()
&& sent.text === this.sentences[start+idx].getText())
fixByLocalGlueing = 0; //we probably edited inside the sentence before this
else if(idx >= minCount && start+idx+1 < this.sentences.length
&& currentOffset+sent.text.length === this.sentences[start+idx+1].getDocumentEndOffset()
&& sent.text === this.sentences[start+idx+1].getText())
fixByLocalGlueing = 1; //we probably joined two sentences by removing a "."
else if(idx >= minCount && 0 <= start+idx-1 && start+idx-1 < this.sentences.length
&& currentOffset+sent.text.length === this.sentences[start+idx-1].getDocumentEndOffset()
&& sent.text === this.sentences[start+idx-1].getText())
fixByLocalGlueing = -1;//we probably seperated a sentence into two by adding a "."*/

if(fixByLocalGlueing !== undefined) {
// no need to parse further; keep remaining sentences
const removed = this.sentences.splice(start, idx, ...reparsed)
const removed = this.sentences.splice(start, idx+fixByLocalGlueing, ...reparsed)

// adjust prev/next reference at the last reparsed sentence
if(reparsed.length > 0) {
const lastReparsed = reparsed[reparsed.length-1];
Expand All @@ -373,6 +402,8 @@ export class SentenceCollection implements vscode.TextDocument {
// if(start+reparsed.length < this.sentences.length)
// this.sentences[start+reparsed.length].prev = this.sentences[start+reparsed.length-1]||null;
//
if (removed.length>1 && reparsed.length > 1 && removed[removed.length-2].getText()===reparsed[reparsed.length-2].getText())
console.log("Internal inefficiency: detecting unchanged suffix after editing and reparsed more than needed ("+reparsed.length+" total), please report.");
removed.forEach((sent) => sent.dispose());
return {removed: removed, added: reparsed, endOfSentences: false};
}
Expand All @@ -394,6 +425,7 @@ export class SentenceCollection implements vscode.TextDocument {
// treat the rest of the document as unparsed
const removed = this.sentences.splice(start, this.sentences.length - start, ...reparsed)
removed.forEach((sent) => sent.dispose());
console.log("Notice: detecting unchanged suffix after editing lead to syntax error.")
return {removed: removed, added: reparsed, endOfSentences: true};
} else {
server.connection.console.warn("unknown parsing error: " + util.inspect(error,false,undefined))
Expand Down

0 comments on commit d533b64

Please sign in to comment.