From 41020289ab68ef22a0691a9f268a79d6a706df6b Mon Sep 17 00:00:00 2001 From: siikamiika Date: Sun, 3 Nov 2019 05:08:57 +0200 Subject: add mecab support --- ext/bg/js/api.js | 48 ++++++++++++++++++------------ ext/bg/js/backend.js | 2 ++ ext/bg/js/mecab.js | 63 ++++++++++++++++++++++++++++++++++++++++ ext/bg/js/search-query-parser.js | 3 +- 4 files changed, 97 insertions(+), 19 deletions(-) create mode 100644 ext/bg/js/mecab.js (limited to 'ext/bg/js') diff --git a/ext/bg/js/api.js b/ext/bg/js/api.js index 7c9a72a7..2ab01af3 100644 --- a/ext/bg/js/api.js +++ b/ext/bg/js/api.js @@ -91,25 +91,10 @@ async function apiTextParse(text, optionsContext) { definitions = dictTermsSort(definitions); const {expression, reading} = definitions[0]; const source = text.slice(0, sourceLength); - - let stemLength = 0; - const shortest = Math.min(source.length, expression.length); - while (stemLength < shortest && source[stemLength] === expression[stemLength]) { - ++stemLength; - } - const offset = source.length - stemLength; - - for (const {text, furigana} of jpDistributeFurigana( - source.slice(0, offset === 0 ? source.length : source.length - offset), - reading.slice(0, offset === 0 ? reading.length : reading.length - expression.length + stemLength) - )) { - term.push({text, reading: furigana || ''}); - } - - if (stemLength !== source.length) { - term.push({text: source.slice(stemLength)}); + for (const {text, furigana} of jpDistributeFuriganaInflected(expression, reading, source)) { + // can't use 'furigana' in templates + term.push({text, reading: furigana}); } - text = text.slice(source.length); } else { term.push({text: text[0]}); @@ -120,6 +105,33 @@ async function apiTextParse(text, optionsContext) { return results; } +async function apiTextParseMecab(text, optionsContext) { + const options = await apiOptionsGet(optionsContext); + const mecab = utilBackend().mecab; + + const results = []; + for (const parsedLine of await mecab.parseText(text)) { + for (const {expression, reading, source} of parsedLine) { + const term = []; + if (expression && reading) { + for (const {text, furigana} of jpDistributeFuriganaInflected( + expression, + jpKatakanaToHiragana(reading), + source + )) { + // can't use 'furigana' in templates + term.push({text, reading: furigana}); + } + } else { + term.push({text: source}); + } + results.push(term); + } + results.push([{text: '\n'}]); + } + return results; +} + async function apiKanjiFind(text, optionsContext) { const options = await apiOptionsGet(optionsContext); const definitions = await utilBackend().translator.findKanji(text, options); diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index d0e404f2..e97f32b5 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -21,6 +21,7 @@ class Backend { constructor() { this.translator = new Translator(); this.anki = new AnkiNull(); + this.mecab = new Mecab(); this.options = null; this.optionsContext = { depth: 0, @@ -181,6 +182,7 @@ Backend.messageHandlers = { kanjiFind: ({text, optionsContext}) => apiKanjiFind(text, optionsContext), termsFind: ({text, details, optionsContext}) => apiTermsFind(text, details, optionsContext), textParse: ({text, optionsContext}) => apiTextParse(text, optionsContext), + textParseMecab: ({text, optionsContext}) => apiTextParseMecab(text, optionsContext), definitionAdd: ({definition, mode, context, optionsContext}) => apiDefinitionAdd(definition, mode, context, optionsContext), definitionsAddable: ({definitions, modes, optionsContext}) => apiDefinitionsAddable(definitions, modes, optionsContext), noteView: ({noteId}) => apiNoteView(noteId), diff --git a/ext/bg/js/mecab.js b/ext/bg/js/mecab.js new file mode 100644 index 00000000..dc46ded2 --- /dev/null +++ b/ext/bg/js/mecab.js @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2019 Alex Yatskov + * Author: Alex Yatskov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +class Mecab { + constructor() { + this.listeners = {}; + this.sequence = 0; + this.startListener(); + } + + async parseText(text) { + return await this.invoke('parse_text', {text}); + } + + startListener() { + this.port = chrome.runtime.connectNative('mecab'); + this.port.onMessage.addListener((message) => { + const {sequence, data} = message; + const {callback, timer} = this.listeners[sequence] || {}; + if (timer) { + clearTimeout(timer); + delete this.listeners[sequence]; + callback(data); + } + }); + } + + invoke(action, params) { + return new Promise((resolve, reject) => { + const sequence = this.sequence++; + + this.listeners[sequence] = { + callback: (data) => { + resolve(data); + }, + timer: setTimeout(() => { + delete this.listeners[sequence]; + reject(`Mecab invoke timed out in ${Mecab.timeout} ms`); + }, 1000) + } + + this.port.postMessage({action, params, sequence}); + }); + } +} + +Mecab.timeout = 1000; diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js index 8a7db69a..0c74e550 100644 --- a/ext/bg/js/search-query-parser.js +++ b/ext/bg/js/search-query-parser.js @@ -74,7 +74,8 @@ class QueryParser { preview: true }); - const results = await apiTextParse(text, this.search.getOptionsContext()); + // const results = await apiTextParse(text, this.search.getOptionsContext()); + const results = await apiTextParseMecab(text, this.search.getOptionsContext()); const content = await apiTemplateRender('query-parser.html', { terms: results.map((term) => { -- cgit v1.2.3