From 658e5ddff13f4ec392dc110004635e22d468525a Mon Sep 17 00:00:00 2001 From: siikamiika Date: Wed, 14 Aug 2019 22:54:42 +0300 Subject: ignore zero-width non-joiner fixes #179 --- ext/fg/js/document.js | 2 +- ext/fg/js/frontend.js | 13 +++++++++++-- ext/fg/js/source.js | 19 +++++++++++++++++-- 3 files changed, 29 insertions(+), 5 deletions(-) (limited to 'ext/fg') diff --git a/ext/fg/js/document.js b/ext/fg/js/document.js index f58a64fc..0697edb3 100644 --- a/ext/fg/js/document.js +++ b/ext/fg/js/document.js @@ -116,7 +116,7 @@ function docSentenceExtract(source, extent) { const sourceLocal = source.clone(); const position = sourceLocal.setStartOffset(extent); sourceLocal.setEndOffset(position + extent); - const content = sourceLocal.text(); + const {text: content} = sourceLocal.text(); let quoteStack = []; diff --git a/ext/fg/js/frontend.js b/ext/fg/js/frontend.js index bd652f3b..afb182d4 100644 --- a/ext/fg/js/frontend.js +++ b/ext/fg/js/frontend.js @@ -311,10 +311,18 @@ class Frontend { async searchTerms(textSource) { textSource.setEndOffset(this.options.scanning.length); - const {definitions, length} = await apiTermsFind(textSource.text()); + const {text, strippedIndices} = textSource.text(); + let {definitions, length} = await apiTermsFind(text); if (definitions.length === 0) { return false; } + for (let index of strippedIndices) { + if (index < length) { + length++; + } else { + break; + } + } textSource.setEndOffset(length); @@ -338,7 +346,8 @@ class Frontend { async searchKanji(textSource) { textSource.setEndOffset(1); - const definitions = await apiKanjiFind(textSource.text()); + const {text} = textSource.text(); + const definitions = await apiKanjiFind(text); if (definitions.length === 0) { return false; } diff --git a/ext/fg/js/source.js b/ext/fg/js/source.js index 664dbec7..cd8f63fd 100644 --- a/ext/fg/js/source.js +++ b/ext/fg/js/source.js @@ -16,6 +16,9 @@ * along with this program. If not, see . */ +// \u200c (Zero-width non-joiner) appears on Google Docs from Chrome 76 onwards +const IGNORE_TEXT_PATTERN = /\u200c/g; + /* * TextSourceRange @@ -32,7 +35,13 @@ class TextSourceRange { } text() { - return this.content; + let strippedIndices = []; + const text = this.content.replace(IGNORE_TEXT_PATTERN, (match, offset) => { + strippedIndices.push(offset); + return ''; + }); + + return {text, strippedIndices}; } setEndOffset(length) { @@ -195,7 +204,13 @@ class TextSourceElement { } text() { - return this.content; + let strippedIndices = []; + const text = this.content.replace(IGNORE_TEXT_PATTERN, (match, offset) => { + strippedIndices.push(offset); + return ''; + }); + + return {text, strippedIndices}; } setEndOffset(length) { -- cgit v1.2.3