aboutsummaryrefslogtreecommitdiff
path: root/ext/js/background
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2021-02-28 16:38:01 -0500
committerGitHub <noreply@github.com>2021-02-28 16:38:01 -0500
commit8f057c63fea6f06e921f2134d881192002dd23bc (patch)
treedbdaf163493d2f54e6420297ddfbd5f81c50b596 /ext/js/background
parentec1a8380b5dd1b091fcdbb96edcdab56c9df9f9e (diff)
Improve text parser (#1469)
* Merge ungrouped characters * Update iteration * Fix incorrect code point handling * Simplify text * Specify language * Update how parsed status is represented
Diffstat (limited to 'ext/js/background')
-rw-r--r--ext/js/background/backend.js30
1 files changed, 20 insertions, 10 deletions
diff --git a/ext/js/background/backend.js b/ext/js/background/backend.js
index 1238673b..715b916b 100644
--- a/ext/js/background/backend.js
+++ b/ext/js/background/backend.js
@@ -1038,31 +1038,41 @@ class Backend {
const {scanning: {length: scanningLength}, parsing: {readingMode}} = options;
const findTermsOptions = this._getTranslatorFindTermsOptions({wildcard: null}, options);
const results = [];
- while (text.length > 0) {
- const term = [];
+ let previousUngroupedSegment = null;
+ let i = 0;
+ const ii = text.length;
+ while (i < ii) {
const [definitions, sourceLength] = await this._translator.findTerms(
'simple',
- text.substring(0, scanningLength),
+ text.substring(i, i + scanningLength),
findTermsOptions
);
+ const codePoint = text.codePointAt(i);
+ const character = String.fromCodePoint(codePoint);
if (
definitions.length > 0 &&
sourceLength > 0 &&
- (sourceLength !== 1 || this._japaneseUtil.isCodePointJapanese(text[0]))
+ (sourceLength !== character.length || this._japaneseUtil.isCodePointJapanese(codePoint))
) {
+ previousUngroupedSegment = null;
const {expression, reading} = definitions[0];
- const source = text.substring(0, sourceLength);
+ const source = text.substring(i, i + sourceLength);
+ const term = [];
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) {
const reading2 = jp.convertReading(text2, furigana, readingMode);
term.push({text: text2, reading: reading2});
}
- text = text.substring(source.length);
+ results.push(term);
+ i += sourceLength;
} else {
- const reading = jp.convertReading(text[0], '', readingMode);
- term.push({text: text[0], reading});
- text = text.substring(1);
+ if (previousUngroupedSegment === null) {
+ previousUngroupedSegment = {text: character, reading: ''};
+ results.push([previousUngroupedSegment]);
+ } else {
+ previousUngroupedSegment.text += character;
+ }
+ i += character.length;
}
- results.push(term);
}
return results;
}