summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authorsiikamiika <siikamiika@users.noreply.github.com>2019-11-03 05:08:57 +0200
committersiikamiika <siikamiika@users.noreply.github.com>2019-11-23 17:45:44 +0200
commit41020289ab68ef22a0691a9f268a79d6a706df6b (patch)
tree0cd10c38b37cc475dc306c5cf95e8a2e4247a98a /ext
parent3881457e4ed3f9c7833ac21a5e7fc44c2ba00b0f (diff)
add mecab support
Diffstat (limited to 'ext')
-rw-r--r--ext/bg/background.html1
-rw-r--r--ext/bg/js/api.js48
-rw-r--r--ext/bg/js/backend.js2
-rw-r--r--ext/bg/js/mecab.js63
-rw-r--r--ext/bg/js/search-query-parser.js3
-rw-r--r--ext/fg/js/api.js4
-rw-r--r--ext/manifest.json3
-rw-r--r--ext/mixed/js/japanese.js35
8 files changed, 136 insertions, 23 deletions
diff --git a/ext/bg/background.html b/ext/bg/background.html
index bbfbd1e1..6e6e7c26 100644
--- a/ext/bg/background.html
+++ b/ext/bg/background.html
@@ -21,6 +21,7 @@
<script src="/mixed/js/extension.js"></script>
<script src="/bg/js/anki.js"></script>
+ <script src="/bg/js/mecab.js"></script>
<script src="/bg/js/api.js"></script>
<script src="/bg/js/audio.js"></script>
<script src="/bg/js/backend-api-forwarder.js"></script>
diff --git a/ext/bg/js/api.js b/ext/bg/js/api.js
index 7c9a72a7..2ab01af3 100644
--- a/ext/bg/js/api.js
+++ b/ext/bg/js/api.js
@@ -91,25 +91,10 @@ async function apiTextParse(text, optionsContext) {
definitions = dictTermsSort(definitions);
const {expression, reading} = definitions[0];
const source = text.slice(0, sourceLength);
-
- let stemLength = 0;
- const shortest = Math.min(source.length, expression.length);
- while (stemLength < shortest && source[stemLength] === expression[stemLength]) {
- ++stemLength;
- }
- const offset = source.length - stemLength;
-
- for (const {text, furigana} of jpDistributeFurigana(
- source.slice(0, offset === 0 ? source.length : source.length - offset),
- reading.slice(0, offset === 0 ? reading.length : reading.length - expression.length + stemLength)
- )) {
- term.push({text, reading: furigana || ''});
- }
-
- if (stemLength !== source.length) {
- term.push({text: source.slice(stemLength)});
+ for (const {text, furigana} of jpDistributeFuriganaInflected(expression, reading, source)) {
+ // can't use 'furigana' in templates
+ term.push({text, reading: furigana});
}
-
text = text.slice(source.length);
} else {
term.push({text: text[0]});
@@ -120,6 +105,33 @@ async function apiTextParse(text, optionsContext) {
return results;
}
+async function apiTextParseMecab(text, optionsContext) {
+ const options = await apiOptionsGet(optionsContext);
+ const mecab = utilBackend().mecab;
+
+ const results = [];
+ for (const parsedLine of await mecab.parseText(text)) {
+ for (const {expression, reading, source} of parsedLine) {
+ const term = [];
+ if (expression && reading) {
+ for (const {text, furigana} of jpDistributeFuriganaInflected(
+ expression,
+ jpKatakanaToHiragana(reading),
+ source
+ )) {
+ // can't use 'furigana' in templates
+ term.push({text, reading: furigana});
+ }
+ } else {
+ term.push({text: source});
+ }
+ results.push(term);
+ }
+ results.push([{text: '\n'}]);
+ }
+ return results;
+}
+
async function apiKanjiFind(text, optionsContext) {
const options = await apiOptionsGet(optionsContext);
const definitions = await utilBackend().translator.findKanji(text, options);
diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js
index d0e404f2..e97f32b5 100644
--- a/ext/bg/js/backend.js
+++ b/ext/bg/js/backend.js
@@ -21,6 +21,7 @@ class Backend {
constructor() {
this.translator = new Translator();
this.anki = new AnkiNull();
+ this.mecab = new Mecab();
this.options = null;
this.optionsContext = {
depth: 0,
@@ -181,6 +182,7 @@ Backend.messageHandlers = {
kanjiFind: ({text, optionsContext}) => apiKanjiFind(text, optionsContext),
termsFind: ({text, details, optionsContext}) => apiTermsFind(text, details, optionsContext),
textParse: ({text, optionsContext}) => apiTextParse(text, optionsContext),
+ textParseMecab: ({text, optionsContext}) => apiTextParseMecab(text, optionsContext),
definitionAdd: ({definition, mode, context, optionsContext}) => apiDefinitionAdd(definition, mode, context, optionsContext),
definitionsAddable: ({definitions, modes, optionsContext}) => apiDefinitionsAddable(definitions, modes, optionsContext),
noteView: ({noteId}) => apiNoteView(noteId),
diff --git a/ext/bg/js/mecab.js b/ext/bg/js/mecab.js
new file mode 100644
index 00000000..dc46ded2
--- /dev/null
+++ b/ext/bg/js/mecab.js
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2019 Alex Yatskov <alex@foosoft.net>
+ * Author: Alex Yatskov <alex@foosoft.net>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+class Mecab {
+ constructor() {
+ this.listeners = {};
+ this.sequence = 0;
+ this.startListener();
+ }
+
+ async parseText(text) {
+ return await this.invoke('parse_text', {text});
+ }
+
+ startListener() {
+ this.port = chrome.runtime.connectNative('mecab');
+ this.port.onMessage.addListener((message) => {
+ const {sequence, data} = message;
+ const {callback, timer} = this.listeners[sequence] || {};
+ if (timer) {
+ clearTimeout(timer);
+ delete this.listeners[sequence];
+ callback(data);
+ }
+ });
+ }
+
+ invoke(action, params) {
+ return new Promise((resolve, reject) => {
+ const sequence = this.sequence++;
+
+ this.listeners[sequence] = {
+ callback: (data) => {
+ resolve(data);
+ },
+ timer: setTimeout(() => {
+ delete this.listeners[sequence];
+ reject(`Mecab invoke timed out in ${Mecab.timeout} ms`);
+ }, 1000)
+ }
+
+ this.port.postMessage({action, params, sequence});
+ });
+ }
+}
+
+Mecab.timeout = 1000;
diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js
index 8a7db69a..0c74e550 100644
--- a/ext/bg/js/search-query-parser.js
+++ b/ext/bg/js/search-query-parser.js
@@ -74,7 +74,8 @@ class QueryParser {
preview: true
});
- const results = await apiTextParse(text, this.search.getOptionsContext());
+ // const results = await apiTextParse(text, this.search.getOptionsContext());
+ const results = await apiTextParseMecab(text, this.search.getOptionsContext());
const content = await apiTemplateRender('query-parser.html', {
terms: results.map((term) => {
diff --git a/ext/fg/js/api.js b/ext/fg/js/api.js
index cc1e0e90..92330d9c 100644
--- a/ext/fg/js/api.js
+++ b/ext/fg/js/api.js
@@ -33,6 +33,10 @@ function apiTextParse(text, optionsContext) {
return utilInvoke('textParse', {text, optionsContext});
}
+function apiTextParseMecab(text, optionsContext) {
+ return utilInvoke('textParseMecab', {text, optionsContext});
+}
+
function apiKanjiFind(text, optionsContext) {
return utilInvoke('kanjiFind', {text, optionsContext});
}
diff --git a/ext/manifest.json b/ext/manifest.json
index fabceafd..4d75cd54 100644
--- a/ext/manifest.json
+++ b/ext/manifest.json
@@ -42,7 +42,8 @@
"<all_urls>",
"storage",
"clipboardWrite",
- "unlimitedStorage"
+ "unlimitedStorage",
+ "nativeMessaging"
],
"optional_permissions": [
"clipboardRead"
diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js
index d24f56a6..78c419b2 100644
--- a/ext/mixed/js/japanese.js
+++ b/ext/mixed/js/japanese.js
@@ -61,12 +61,11 @@ function jpDistributeFurigana(expression, reading) {
const group = groups[0];
if (group.mode === 'kana') {
- if (reading.startsWith(group.text)) {
- const readingUsed = reading.substring(0, group.text.length);
+ if (jpKatakanaToHiragana(reading).startsWith(jpKatakanaToHiragana(group.text))) {
const readingLeft = reading.substring(group.text.length);
const segs = segmentize(readingLeft, groups.splice(1));
if (segs) {
- return [{text: readingUsed}].concat(segs);
+ return [{text: group.text}].concat(segs);
}
}
} else {
@@ -95,3 +94,33 @@ function jpDistributeFurigana(expression, reading) {
return segmentize(reading, groups) || fallback;
}
+
+function jpDistributeFuriganaInflected(expression, reading, source) {
+ const output = [];
+
+ let stemLength = 0;
+ const shortest = Math.min(source.length, expression.length);
+ const sourceHiragana = jpKatakanaToHiragana(source);
+ const expressionHiragana = jpKatakanaToHiragana(expression);
+ while (
+ stemLength < shortest &&
+ // sometimes an expression can use a kanji that's different from the source
+ (!jpIsKana(source[stemLength]) || (sourceHiragana[stemLength] === expressionHiragana[stemLength]))
+ ) {
+ ++stemLength;
+ }
+ const offset = source.length - stemLength;
+
+ for (const segment of jpDistributeFurigana(
+ source.slice(0, offset === 0 ? source.length : source.length - offset),
+ reading.slice(0, offset === 0 ? reading.length : reading.length - expression.length + stemLength)
+ )) {
+ output.push(segment);
+ }
+
+ if (stemLength !== source.length) {
+ output.push({text: source.slice(stemLength)});
+ }
+
+ return output;
+}