summaryrefslogtreecommitdiff
path: root/ext/fg
diff options
context:
space:
mode:
authorsiikamiika <siikamiika@users.noreply.github.com>2019-08-14 22:54:42 +0300
committersiikamiika <siikamiika@users.noreply.github.com>2019-08-14 22:54:42 +0300
commit658e5ddff13f4ec392dc110004635e22d468525a (patch)
tree731d79146922885e951fbec57a05d6393bcf13f1 /ext/fg
parente23d4b9a82581f3cf1118e31d077fc9cdaff7573 (diff)
ignore zero-width non-joiner
fixes #179
Diffstat (limited to 'ext/fg')
-rw-r--r--ext/fg/js/document.js2
-rw-r--r--ext/fg/js/frontend.js13
-rw-r--r--ext/fg/js/source.js19
3 files changed, 29 insertions, 5 deletions
diff --git a/ext/fg/js/document.js b/ext/fg/js/document.js
index f58a64fc..0697edb3 100644
--- a/ext/fg/js/document.js
+++ b/ext/fg/js/document.js
@@ -116,7 +116,7 @@ function docSentenceExtract(source, extent) {
const sourceLocal = source.clone();
const position = sourceLocal.setStartOffset(extent);
sourceLocal.setEndOffset(position + extent);
- const content = sourceLocal.text();
+ const {text: content} = sourceLocal.text();
let quoteStack = [];
diff --git a/ext/fg/js/frontend.js b/ext/fg/js/frontend.js
index bd652f3b..afb182d4 100644
--- a/ext/fg/js/frontend.js
+++ b/ext/fg/js/frontend.js
@@ -311,10 +311,18 @@ class Frontend {
async searchTerms(textSource) {
textSource.setEndOffset(this.options.scanning.length);
- const {definitions, length} = await apiTermsFind(textSource.text());
+ const {text, strippedIndices} = textSource.text();
+ let {definitions, length} = await apiTermsFind(text);
if (definitions.length === 0) {
return false;
}
+ for (let index of strippedIndices) {
+ if (index < length) {
+ length++;
+ } else {
+ break;
+ }
+ }
textSource.setEndOffset(length);
@@ -338,7 +346,8 @@ class Frontend {
async searchKanji(textSource) {
textSource.setEndOffset(1);
- const definitions = await apiKanjiFind(textSource.text());
+ const {text} = textSource.text();
+ const definitions = await apiKanjiFind(text);
if (definitions.length === 0) {
return false;
}
diff --git a/ext/fg/js/source.js b/ext/fg/js/source.js
index 664dbec7..cd8f63fd 100644
--- a/ext/fg/js/source.js
+++ b/ext/fg/js/source.js
@@ -16,6 +16,9 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+// \u200c (Zero-width non-joiner) appears on Google Docs from Chrome 76 onwards
+const IGNORE_TEXT_PATTERN = /\u200c/g;
+
/*
* TextSourceRange
@@ -32,7 +35,13 @@ class TextSourceRange {
}
text() {
- return this.content;
+ let strippedIndices = [];
+ const text = this.content.replace(IGNORE_TEXT_PATTERN, (match, offset) => {
+ strippedIndices.push(offset);
+ return '';
+ });
+
+ return {text, strippedIndices};
}
setEndOffset(length) {
@@ -195,7 +204,13 @@ class TextSourceElement {
}
text() {
- return this.content;
+ let strippedIndices = [];
+ const text = this.content.replace(IGNORE_TEXT_PATTERN, (match, offset) => {
+ strippedIndices.push(offset);
+ return '';
+ });
+
+ return {text, strippedIndices};
}
setEndOffset(length) {