diff options
author | siikamiika <siikamiika@users.noreply.github.com> | 2019-08-14 22:54:42 +0300 |
---|---|---|
committer | siikamiika <siikamiika@users.noreply.github.com> | 2019-08-14 22:54:42 +0300 |
commit | 658e5ddff13f4ec392dc110004635e22d468525a (patch) | |
tree | 731d79146922885e951fbec57a05d6393bcf13f1 | |
parent | e23d4b9a82581f3cf1118e31d077fc9cdaff7573 (diff) |
ignore zero-width non-joiner
fixes #179
-rw-r--r-- | ext/fg/js/document.js | 2 | ||||
-rw-r--r-- | ext/fg/js/frontend.js | 13 | ||||
-rw-r--r-- | ext/fg/js/source.js | 19 | ||||
-rw-r--r-- | ext/mixed/js/display.js | 3 |
4 files changed, 31 insertions, 6 deletions
diff --git a/ext/fg/js/document.js b/ext/fg/js/document.js index f58a64fc..0697edb3 100644 --- a/ext/fg/js/document.js +++ b/ext/fg/js/document.js @@ -116,7 +116,7 @@ function docSentenceExtract(source, extent) { const sourceLocal = source.clone(); const position = sourceLocal.setStartOffset(extent); sourceLocal.setEndOffset(position + extent); - const content = sourceLocal.text(); + const {text: content} = sourceLocal.text(); let quoteStack = []; diff --git a/ext/fg/js/frontend.js b/ext/fg/js/frontend.js index bd652f3b..afb182d4 100644 --- a/ext/fg/js/frontend.js +++ b/ext/fg/js/frontend.js @@ -311,10 +311,18 @@ class Frontend { async searchTerms(textSource) { textSource.setEndOffset(this.options.scanning.length); - const {definitions, length} = await apiTermsFind(textSource.text()); + const {text, strippedIndices} = textSource.text(); + let {definitions, length} = await apiTermsFind(text); if (definitions.length === 0) { return false; } + for (let index of strippedIndices) { + if (index < length) { + length++; + } else { + break; + } + } textSource.setEndOffset(length); @@ -338,7 +346,8 @@ class Frontend { async searchKanji(textSource) { textSource.setEndOffset(1); - const definitions = await apiKanjiFind(textSource.text()); + const {text} = textSource.text(); + const definitions = await apiKanjiFind(text); if (definitions.length === 0) { return false; } diff --git a/ext/fg/js/source.js b/ext/fg/js/source.js index 664dbec7..cd8f63fd 100644 --- a/ext/fg/js/source.js +++ b/ext/fg/js/source.js @@ -16,6 +16,9 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +// \u200c (Zero-width non-joiner) appears on Google Docs from Chrome 76 onwards +const IGNORE_TEXT_PATTERN = /\u200c/g; + /* * TextSourceRange @@ -32,7 +35,13 @@ class TextSourceRange { } text() { - return this.content; + let strippedIndices = []; + const text = this.content.replace(IGNORE_TEXT_PATTERN, (match, offset) => { + strippedIndices.push(offset); + return ''; + }); + + return {text, strippedIndices}; } setEndOffset(length) { @@ -195,7 +204,13 @@ class TextSourceElement { } text() { - return this.content; + let strippedIndices = []; + const text = this.content.replace(IGNORE_TEXT_PATTERN, (match, offset) => { + strippedIndices.push(offset); + return ''; + }); + + return {text, strippedIndices}; } setEndOffset(length) { diff --git a/ext/mixed/js/display.js b/ext/mixed/js/display.js index 3bb78fe1..6b9c295b 100644 --- a/ext/mixed/js/display.js +++ b/ext/mixed/js/display.js @@ -83,7 +83,8 @@ class Display { const textSource = docRangeFromPoint({x: e.clientX, y: e.clientY}); textSource.setEndOffset(this.options.scanning.length); - const {definitions, length} = await apiTermsFind(textSource.text()); + const {text} = textSource.text(); + const {definitions, length} = await apiTermsFind(text); if (definitions.length === 0) { return false; } |