From c35a05cd62d43ff435c022a353de55510b020277 Mon Sep 17 00:00:00 2001
From: siikamiika <siikamiika@users.noreply.github.com>
Date: Wed, 30 Oct 2019 03:58:24 +0200
Subject: add kana to text

---
 ext/fg/js/api.js | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'ext/fg/js')

diff --git a/ext/fg/js/api.js b/ext/fg/js/api.js
index 945ba076..cc1e0e90 100644
--- a/ext/fg/js/api.js
+++ b/ext/fg/js/api.js
@@ -29,6 +29,10 @@ function apiTermsFind(text, details, optionsContext) {
     return utilInvoke('termsFind', {text, details, optionsContext});
 }
 
+function apiTextParse(text, optionsContext) {
+    return utilInvoke('textParse', {text, optionsContext});
+}
+
 function apiKanjiFind(text, optionsContext) {
     return utilInvoke('kanjiFind', {text, optionsContext});
 }
-- 
cgit v1.2.3


From 41020289ab68ef22a0691a9f268a79d6a706df6b Mon Sep 17 00:00:00 2001
From: siikamiika <siikamiika@users.noreply.github.com>
Date: Sun, 3 Nov 2019 05:08:57 +0200
Subject: add mecab support

---
 ext/bg/background.html           |  1 +
 ext/bg/js/api.js                 | 48 ++++++++++++++++++------------
 ext/bg/js/backend.js             |  2 ++
 ext/bg/js/mecab.js               | 63 ++++++++++++++++++++++++++++++++++++++++
 ext/bg/js/search-query-parser.js |  3 +-
 ext/fg/js/api.js                 |  4 +++
 ext/manifest.json                |  3 +-
 ext/mixed/js/japanese.js         | 35 ++++++++++++++++++++--
 8 files changed, 136 insertions(+), 23 deletions(-)
 create mode 100644 ext/bg/js/mecab.js

(limited to 'ext/fg/js')

diff --git a/ext/bg/background.html b/ext/bg/background.html
index bbfbd1e1..6e6e7c26 100644
--- a/ext/bg/background.html
+++ b/ext/bg/background.html
@@ -21,6 +21,7 @@
         <script src="/mixed/js/extension.js"></script>
 
         <script src="/bg/js/anki.js"></script>
+        <script src="/bg/js/mecab.js"></script>
         <script src="/bg/js/api.js"></script>
         <script src="/bg/js/audio.js"></script>
         <script src="/bg/js/backend-api-forwarder.js"></script>
diff --git a/ext/bg/js/api.js b/ext/bg/js/api.js
index 7c9a72a7..2ab01af3 100644
--- a/ext/bg/js/api.js
+++ b/ext/bg/js/api.js
@@ -91,25 +91,10 @@ async function apiTextParse(text, optionsContext) {
             definitions = dictTermsSort(definitions);
             const {expression, reading} = definitions[0];
             const source = text.slice(0, sourceLength);
-
-            let stemLength = 0;
-            const shortest = Math.min(source.length, expression.length);
-            while (stemLength < shortest && source[stemLength] === expression[stemLength]) {
-                ++stemLength;
-            }
-            const offset = source.length - stemLength;
-
-            for (const {text, furigana} of jpDistributeFurigana(
-                source.slice(0, offset === 0 ? source.length : source.length - offset),
-                reading.slice(0, offset === 0 ? reading.length : reading.length - expression.length + stemLength)
-            )) {
-                term.push({text, reading: furigana || ''});
-            }
-
-            if (stemLength !== source.length) {
-                term.push({text: source.slice(stemLength)});
+            for (const {text, furigana} of jpDistributeFuriganaInflected(expression, reading, source)) {
+                // can't use 'furigana' in templates
+                term.push({text, reading: furigana});
             }
-
             text = text.slice(source.length);
         } else {
             term.push({text: text[0]});
@@ -120,6 +105,33 @@ async function apiTextParse(text, optionsContext) {
     return results;
 }
 
+async function apiTextParseMecab(text, optionsContext) {
+    const options = await apiOptionsGet(optionsContext);
+    const mecab = utilBackend().mecab;
+
+    const results = [];
+    for (const parsedLine of await mecab.parseText(text)) {
+        for (const {expression, reading, source} of parsedLine) {
+            const term = [];
+            if (expression && reading) {
+                for (const {text, furigana} of jpDistributeFuriganaInflected(
+                    expression,
+                    jpKatakanaToHiragana(reading),
+                    source
+                )) {
+                    // can't use 'furigana' in templates
+                    term.push({text, reading: furigana});
+                }
+            } else {
+                term.push({text: source});
+            }
+            results.push(term);
+        }
+        results.push([{text: '\n'}]);
+    }
+    return results;
+}
+
 async function apiKanjiFind(text, optionsContext) {
     const options = await apiOptionsGet(optionsContext);
     const definitions = await utilBackend().translator.findKanji(text, options);
diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js
index d0e404f2..e97f32b5 100644
--- a/ext/bg/js/backend.js
+++ b/ext/bg/js/backend.js
@@ -21,6 +21,7 @@ class Backend {
     constructor() {
         this.translator = new Translator();
         this.anki = new AnkiNull();
+        this.mecab = new Mecab();
         this.options = null;
         this.optionsContext = {
             depth: 0,
@@ -181,6 +182,7 @@ Backend.messageHandlers = {
     kanjiFind: ({text, optionsContext}) => apiKanjiFind(text, optionsContext),
     termsFind: ({text, details, optionsContext}) => apiTermsFind(text, details, optionsContext),
     textParse: ({text, optionsContext}) => apiTextParse(text, optionsContext),
+    textParseMecab: ({text, optionsContext}) => apiTextParseMecab(text, optionsContext),
     definitionAdd: ({definition, mode, context, optionsContext}) => apiDefinitionAdd(definition, mode, context, optionsContext),
     definitionsAddable: ({definitions, modes, optionsContext}) => apiDefinitionsAddable(definitions, modes, optionsContext),
     noteView: ({noteId}) => apiNoteView(noteId),
diff --git a/ext/bg/js/mecab.js b/ext/bg/js/mecab.js
new file mode 100644
index 00000000..dc46ded2
--- /dev/null
+++ b/ext/bg/js/mecab.js
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2019  Alex Yatskov <alex@foosoft.net>
+ * Author: Alex Yatskov <alex@foosoft.net>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+class Mecab {
+    constructor() {
+        this.listeners = {};
+        this.sequence = 0;
+        this.startListener();
+    }
+
+    async parseText(text) {
+        return await this.invoke('parse_text', {text});
+    }
+
+    startListener() {
+        this.port = chrome.runtime.connectNative('mecab');
+        this.port.onMessage.addListener((message) => {
+            const {sequence, data} = message;
+            const {callback, timer} = this.listeners[sequence] || {};
+            if (timer) {
+                clearTimeout(timer);
+                delete this.listeners[sequence];
+                callback(data);
+            }
+        });
+    }
+
+    invoke(action, params) {
+        return new Promise((resolve, reject) => {
+            const sequence = this.sequence++;
+
+            this.listeners[sequence] = {
+                callback: (data) => {
+                    resolve(data);
+                },
+                timer: setTimeout(() => {
+                    delete this.listeners[sequence];
+                    reject(`Mecab invoke timed out in ${Mecab.timeout} ms`);
+                }, 1000)
+            }
+
+            this.port.postMessage({action, params, sequence});
+        });
+    }
+}
+
+Mecab.timeout = 1000;
diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js
index 8a7db69a..0c74e550 100644
--- a/ext/bg/js/search-query-parser.js
+++ b/ext/bg/js/search-query-parser.js
@@ -74,7 +74,8 @@ class QueryParser {
             preview: true
         });
 
-        const results = await apiTextParse(text, this.search.getOptionsContext());
+        // const results = await apiTextParse(text, this.search.getOptionsContext());
+        const results = await apiTextParseMecab(text, this.search.getOptionsContext());
 
         const content = await apiTemplateRender('query-parser.html', {
             terms: results.map((term) => {
diff --git a/ext/fg/js/api.js b/ext/fg/js/api.js
index cc1e0e90..92330d9c 100644
--- a/ext/fg/js/api.js
+++ b/ext/fg/js/api.js
@@ -33,6 +33,10 @@ function apiTextParse(text, optionsContext) {
     return utilInvoke('textParse', {text, optionsContext});
 }
 
+function apiTextParseMecab(text, optionsContext) {
+    return utilInvoke('textParseMecab', {text, optionsContext});
+}
+
 function apiKanjiFind(text, optionsContext) {
     return utilInvoke('kanjiFind', {text, optionsContext});
 }
diff --git a/ext/manifest.json b/ext/manifest.json
index fabceafd..4d75cd54 100644
--- a/ext/manifest.json
+++ b/ext/manifest.json
@@ -42,7 +42,8 @@
         "<all_urls>",
         "storage",
         "clipboardWrite",
-        "unlimitedStorage"
+        "unlimitedStorage",
+        "nativeMessaging"
     ],
     "optional_permissions": [
         "clipboardRead"
diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js
index d24f56a6..78c419b2 100644
--- a/ext/mixed/js/japanese.js
+++ b/ext/mixed/js/japanese.js
@@ -61,12 +61,11 @@ function jpDistributeFurigana(expression, reading) {
 
         const group = groups[0];
         if (group.mode === 'kana') {
-            if (reading.startsWith(group.text)) {
-                const readingUsed = reading.substring(0, group.text.length);
+            if (jpKatakanaToHiragana(reading).startsWith(jpKatakanaToHiragana(group.text))) {
                 const readingLeft = reading.substring(group.text.length);
                 const segs = segmentize(readingLeft, groups.splice(1));
                 if (segs) {
-                    return [{text: readingUsed}].concat(segs);
+                    return [{text: group.text}].concat(segs);
                 }
             }
         } else {
@@ -95,3 +94,33 @@ function jpDistributeFurigana(expression, reading) {
 
     return segmentize(reading, groups) || fallback;
 }
+
+function jpDistributeFuriganaInflected(expression, reading, source) {
+    const output = [];
+
+    let stemLength = 0;
+    const shortest = Math.min(source.length, expression.length);
+    const sourceHiragana = jpKatakanaToHiragana(source);
+    const expressionHiragana = jpKatakanaToHiragana(expression);
+    while (
+        stemLength < shortest &&
+        // sometimes an expression can use a kanji that's different from the source
+        (!jpIsKana(source[stemLength]) || (sourceHiragana[stemLength] === expressionHiragana[stemLength]))
+    ) {
+        ++stemLength;
+    }
+    const offset = source.length - stemLength;
+
+    for (const segment of jpDistributeFurigana(
+        source.slice(0, offset === 0 ? source.length : source.length - offset),
+        reading.slice(0, offset === 0 ? reading.length : reading.length - expression.length + stemLength)
+    )) {
+        output.push(segment);
+    }
+
+    if (stemLength !== source.length) {
+        output.push({text: source.slice(stemLength)});
+    }
+
+    return output;
+}
-- 
cgit v1.2.3