diff options
| author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2021-02-28 16:38:01 -0500 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-02-28 16:38:01 -0500 | 
| commit | 8f057c63fea6f06e921f2134d881192002dd23bc (patch) | |
| tree | dbdaf163493d2f54e6420297ddfbd5f81c50b596 /ext/js | |
| parent | ec1a8380b5dd1b091fcdbb96edcdab56c9df9f9e (diff) | |
Improve text parser (#1469)
* Merge ungrouped characters
* Update iteration
* Fix incorrect code point handling
* Simplify text
* Specify language
* Update how parsed status is represented
Diffstat (limited to 'ext/js')
| -rw-r--r-- | ext/js/background/backend.js | 30 | ||||
| -rw-r--r-- | ext/js/display/query-parser.js | 15 | 
2 files changed, 25 insertions, 20 deletions
| diff --git a/ext/js/background/backend.js b/ext/js/background/backend.js index 1238673b..715b916b 100644 --- a/ext/js/background/backend.js +++ b/ext/js/background/backend.js @@ -1038,31 +1038,41 @@ class Backend {          const {scanning: {length: scanningLength}, parsing: {readingMode}} = options;          const findTermsOptions = this._getTranslatorFindTermsOptions({wildcard: null}, options);          const results = []; -        while (text.length > 0) { -            const term = []; +        let previousUngroupedSegment = null; +        let i = 0; +        const ii = text.length; +        while (i < ii) {              const [definitions, sourceLength] = await this._translator.findTerms(                  'simple', -                text.substring(0, scanningLength), +                text.substring(i, i + scanningLength),                  findTermsOptions              ); +            const codePoint = text.codePointAt(i); +            const character = String.fromCodePoint(codePoint);              if (                  definitions.length > 0 &&                  sourceLength > 0 && -                (sourceLength !== 1 || this._japaneseUtil.isCodePointJapanese(text[0])) +                (sourceLength !== character.length || this._japaneseUtil.isCodePointJapanese(codePoint))              ) { +                previousUngroupedSegment = null;                  const {expression, reading} = definitions[0]; -                const source = text.substring(0, sourceLength); +                const source = text.substring(i, i + sourceLength); +                const term = [];                  for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) {                      const reading2 = jp.convertReading(text2, furigana, readingMode);                      term.push({text: text2, reading: reading2});                  } -                text = text.substring(source.length); +                results.push(term); +                i += sourceLength;              } else { -                const reading = jp.convertReading(text[0], '', readingMode); -                term.push({text: text[0], reading}); -                text = text.substring(1); +                if (previousUngroupedSegment === null) { +                    previousUngroupedSegment = {text: character, reading: ''}; +                    results.push([previousUngroupedSegment]); +                } else { +                    previousUngroupedSegment.text += character; +                } +                i += character.length;              } -            results.push(term);          }          return results;      } diff --git a/ext/js/display/query-parser.js b/ext/js/display/query-parser.js index d6a3b4da..c21033de 100644 --- a/ext/js/display/query-parser.js +++ b/ext/js/display/query-parser.js @@ -132,7 +132,8 @@ class QueryParser extends EventDispatcher {      _setPreview(text) {          const terms = [[{text, reading: ''}]];          this._queryParser.textContent = ''; -        this._queryParser.appendChild(this._createParseResult(terms, true)); +        this._queryParser.dataset.parsed = 'false'; +        this._queryParser.appendChild(this._createParseResult(terms));      }      _renderParserSelect() { @@ -146,6 +147,7 @@ class QueryParser extends EventDispatcher {      _renderParseResult() {          const parseResult = this._getParseResult();          this._queryParser.textContent = ''; +        this._queryParser.dataset.parsed = 'true';          if (!parseResult) { return; }          this._queryParser.appendChild(this._createParseResult(parseResult.content, false));      } @@ -182,13 +184,11 @@ class QueryParser extends EventDispatcher {          select.selectedIndex = selectedIndex;      } -    _createParseResult(terms, preview) { -        const type = preview ? 'preview' : 'normal'; +    _createParseResult(terms) {          const fragment = document.createDocumentFragment();          for (const term of terms) {              const termNode = document.createElement('span');              termNode.className = 'query-parser-term'; -            termNode.dataset.type = type;              for (const segment of term) {                  if (segment.reading.trim().length === 0) {                      this._addSegmentText(segment.text, termNode); @@ -221,11 +221,6 @@ class QueryParser extends EventDispatcher {      }      _addSegmentText(text, container) { -        for (const character of text) { -            const node = document.createElement('span'); -            node.className = 'query-parser-char'; -            node.textContent = character; -            container.appendChild(node); -        } +        container.textContent = text;      }  } |