diff options
author | siikamiika <siikamiika@users.noreply.github.com> | 2019-11-03 05:08:57 +0200 |
---|---|---|
committer | siikamiika <siikamiika@users.noreply.github.com> | 2019-11-23 17:45:44 +0200 |
commit | 41020289ab68ef22a0691a9f268a79d6a706df6b (patch) | |
tree | 0cd10c38b37cc475dc306c5cf95e8a2e4247a98a /ext/bg | |
parent | 3881457e4ed3f9c7833ac21a5e7fc44c2ba00b0f (diff) |
add mecab support
Diffstat (limited to 'ext/bg')
-rw-r--r-- | ext/bg/background.html | 1 | ||||
-rw-r--r-- | ext/bg/js/api.js | 48 | ||||
-rw-r--r-- | ext/bg/js/backend.js | 2 | ||||
-rw-r--r-- | ext/bg/js/mecab.js | 63 | ||||
-rw-r--r-- | ext/bg/js/search-query-parser.js | 3 |
5 files changed, 98 insertions, 19 deletions
diff --git a/ext/bg/background.html b/ext/bg/background.html index bbfbd1e1..6e6e7c26 100644 --- a/ext/bg/background.html +++ b/ext/bg/background.html @@ -21,6 +21,7 @@ <script src="/mixed/js/extension.js"></script> <script src="/bg/js/anki.js"></script> + <script src="/bg/js/mecab.js"></script> <script src="/bg/js/api.js"></script> <script src="/bg/js/audio.js"></script> <script src="/bg/js/backend-api-forwarder.js"></script> diff --git a/ext/bg/js/api.js b/ext/bg/js/api.js index 7c9a72a7..2ab01af3 100644 --- a/ext/bg/js/api.js +++ b/ext/bg/js/api.js @@ -91,25 +91,10 @@ async function apiTextParse(text, optionsContext) { definitions = dictTermsSort(definitions); const {expression, reading} = definitions[0]; const source = text.slice(0, sourceLength); - - let stemLength = 0; - const shortest = Math.min(source.length, expression.length); - while (stemLength < shortest && source[stemLength] === expression[stemLength]) { - ++stemLength; - } - const offset = source.length - stemLength; - - for (const {text, furigana} of jpDistributeFurigana( - source.slice(0, offset === 0 ? source.length : source.length - offset), - reading.slice(0, offset === 0 ? reading.length : reading.length - expression.length + stemLength) - )) { - term.push({text, reading: furigana || ''}); - } - - if (stemLength !== source.length) { - term.push({text: source.slice(stemLength)}); + for (const {text, furigana} of jpDistributeFuriganaInflected(expression, reading, source)) { + // can't use 'furigana' in templates + term.push({text, reading: furigana}); } - text = text.slice(source.length); } else { term.push({text: text[0]}); @@ -120,6 +105,33 @@ async function apiTextParse(text, optionsContext) { return results; } +async function apiTextParseMecab(text, optionsContext) { + const options = await apiOptionsGet(optionsContext); + const mecab = utilBackend().mecab; + + const results = []; + for (const parsedLine of await mecab.parseText(text)) { + for (const {expression, reading, source} of parsedLine) { + const term = []; + if (expression && reading) { + for (const {text, furigana} of jpDistributeFuriganaInflected( + expression, + jpKatakanaToHiragana(reading), + source + )) { + // can't use 'furigana' in templates + term.push({text, reading: furigana}); + } + } else { + term.push({text: source}); + } + results.push(term); + } + results.push([{text: '\n'}]); + } + return results; +} + async function apiKanjiFind(text, optionsContext) { const options = await apiOptionsGet(optionsContext); const definitions = await utilBackend().translator.findKanji(text, options); diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index d0e404f2..e97f32b5 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -21,6 +21,7 @@ class Backend { constructor() { this.translator = new Translator(); this.anki = new AnkiNull(); + this.mecab = new Mecab(); this.options = null; this.optionsContext = { depth: 0, @@ -181,6 +182,7 @@ Backend.messageHandlers = { kanjiFind: ({text, optionsContext}) => apiKanjiFind(text, optionsContext), termsFind: ({text, details, optionsContext}) => apiTermsFind(text, details, optionsContext), textParse: ({text, optionsContext}) => apiTextParse(text, optionsContext), + textParseMecab: ({text, optionsContext}) => apiTextParseMecab(text, optionsContext), definitionAdd: ({definition, mode, context, optionsContext}) => apiDefinitionAdd(definition, mode, context, optionsContext), definitionsAddable: ({definitions, modes, optionsContext}) => apiDefinitionsAddable(definitions, modes, optionsContext), noteView: ({noteId}) => apiNoteView(noteId), diff --git a/ext/bg/js/mecab.js b/ext/bg/js/mecab.js new file mode 100644 index 00000000..dc46ded2 --- /dev/null +++ b/ext/bg/js/mecab.js @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2019 Alex Yatskov <alex@foosoft.net> + * Author: Alex Yatskov <alex@foosoft.net> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +class Mecab { + constructor() { + this.listeners = {}; + this.sequence = 0; + this.startListener(); + } + + async parseText(text) { + return await this.invoke('parse_text', {text}); + } + + startListener() { + this.port = chrome.runtime.connectNative('mecab'); + this.port.onMessage.addListener((message) => { + const {sequence, data} = message; + const {callback, timer} = this.listeners[sequence] || {}; + if (timer) { + clearTimeout(timer); + delete this.listeners[sequence]; + callback(data); + } + }); + } + + invoke(action, params) { + return new Promise((resolve, reject) => { + const sequence = this.sequence++; + + this.listeners[sequence] = { + callback: (data) => { + resolve(data); + }, + timer: setTimeout(() => { + delete this.listeners[sequence]; + reject(`Mecab invoke timed out in ${Mecab.timeout} ms`); + }, 1000) + } + + this.port.postMessage({action, params, sequence}); + }); + } +} + +Mecab.timeout = 1000; diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js index 8a7db69a..0c74e550 100644 --- a/ext/bg/js/search-query-parser.js +++ b/ext/bg/js/search-query-parser.js @@ -74,7 +74,8 @@ class QueryParser { preview: true }); - const results = await apiTextParse(text, this.search.getOptionsContext()); + // const results = await apiTextParse(text, this.search.getOptionsContext()); + const results = await apiTextParseMecab(text, this.search.getOptionsContext()); const content = await apiTemplateRender('query-parser.html', { terms: results.map((term) => { |