diff options
| author | siikamiika <siikamiika@users.noreply.github.com> | 2019-11-03 05:08:57 +0200 | 
|---|---|---|
| committer | siikamiika <siikamiika@users.noreply.github.com> | 2019-11-23 17:45:44 +0200 | 
| commit | 41020289ab68ef22a0691a9f268a79d6a706df6b (patch) | |
| tree | 0cd10c38b37cc475dc306c5cf95e8a2e4247a98a | |
| parent | 3881457e4ed3f9c7833ac21a5e7fc44c2ba00b0f (diff) | |
add mecab support
| -rw-r--r-- | ext/bg/background.html | 1 | ||||
| -rw-r--r-- | ext/bg/js/api.js | 48 | ||||
| -rw-r--r-- | ext/bg/js/backend.js | 2 | ||||
| -rw-r--r-- | ext/bg/js/mecab.js | 63 | ||||
| -rw-r--r-- | ext/bg/js/search-query-parser.js | 3 | ||||
| -rw-r--r-- | ext/fg/js/api.js | 4 | ||||
| -rw-r--r-- | ext/manifest.json | 3 | ||||
| -rw-r--r-- | ext/mixed/js/japanese.js | 35 | 
8 files changed, 136 insertions, 23 deletions
| diff --git a/ext/bg/background.html b/ext/bg/background.html index bbfbd1e1..6e6e7c26 100644 --- a/ext/bg/background.html +++ b/ext/bg/background.html @@ -21,6 +21,7 @@          <script src="/mixed/js/extension.js"></script>          <script src="/bg/js/anki.js"></script> +        <script src="/bg/js/mecab.js"></script>          <script src="/bg/js/api.js"></script>          <script src="/bg/js/audio.js"></script>          <script src="/bg/js/backend-api-forwarder.js"></script> diff --git a/ext/bg/js/api.js b/ext/bg/js/api.js index 7c9a72a7..2ab01af3 100644 --- a/ext/bg/js/api.js +++ b/ext/bg/js/api.js @@ -91,25 +91,10 @@ async function apiTextParse(text, optionsContext) {              definitions = dictTermsSort(definitions);              const {expression, reading} = definitions[0];              const source = text.slice(0, sourceLength); - -            let stemLength = 0; -            const shortest = Math.min(source.length, expression.length); -            while (stemLength < shortest && source[stemLength] === expression[stemLength]) { -                ++stemLength; -            } -            const offset = source.length - stemLength; - -            for (const {text, furigana} of jpDistributeFurigana( -                source.slice(0, offset === 0 ? source.length : source.length - offset), -                reading.slice(0, offset === 0 ? reading.length : reading.length - expression.length + stemLength) -            )) { -                term.push({text, reading: furigana || ''}); -            } - -            if (stemLength !== source.length) { -                term.push({text: source.slice(stemLength)}); +            for (const {text, furigana} of jpDistributeFuriganaInflected(expression, reading, source)) { +                // can't use 'furigana' in templates +                term.push({text, reading: furigana});              } -              text = text.slice(source.length);          } else {              term.push({text: text[0]}); @@ -120,6 +105,33 @@ async function apiTextParse(text, optionsContext) {      return results;  } +async function apiTextParseMecab(text, optionsContext) { +    const options = await apiOptionsGet(optionsContext); +    const mecab = utilBackend().mecab; + +    const results = []; +    for (const parsedLine of await mecab.parseText(text)) { +        for (const {expression, reading, source} of parsedLine) { +            const term = []; +            if (expression && reading) { +                for (const {text, furigana} of jpDistributeFuriganaInflected( +                    expression, +                    jpKatakanaToHiragana(reading), +                    source +                )) { +                    // can't use 'furigana' in templates +                    term.push({text, reading: furigana}); +                } +            } else { +                term.push({text: source}); +            } +            results.push(term); +        } +        results.push([{text: '\n'}]); +    } +    return results; +} +  async function apiKanjiFind(text, optionsContext) {      const options = await apiOptionsGet(optionsContext);      const definitions = await utilBackend().translator.findKanji(text, options); diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index d0e404f2..e97f32b5 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -21,6 +21,7 @@ class Backend {      constructor() {          this.translator = new Translator();          this.anki = new AnkiNull(); +        this.mecab = new Mecab();          this.options = null;          this.optionsContext = {              depth: 0, @@ -181,6 +182,7 @@ Backend.messageHandlers = {      kanjiFind: ({text, optionsContext}) => apiKanjiFind(text, optionsContext),      termsFind: ({text, details, optionsContext}) => apiTermsFind(text, details, optionsContext),      textParse: ({text, optionsContext}) => apiTextParse(text, optionsContext), +    textParseMecab: ({text, optionsContext}) => apiTextParseMecab(text, optionsContext),      definitionAdd: ({definition, mode, context, optionsContext}) => apiDefinitionAdd(definition, mode, context, optionsContext),      definitionsAddable: ({definitions, modes, optionsContext}) => apiDefinitionsAddable(definitions, modes, optionsContext),      noteView: ({noteId}) => apiNoteView(noteId), diff --git a/ext/bg/js/mecab.js b/ext/bg/js/mecab.js new file mode 100644 index 00000000..dc46ded2 --- /dev/null +++ b/ext/bg/js/mecab.js @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2019  Alex Yatskov <alex@foosoft.net> + * Author: Alex Yatskov <alex@foosoft.net> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + + +class Mecab { +    constructor() { +        this.listeners = {}; +        this.sequence = 0; +        this.startListener(); +    } + +    async parseText(text) { +        return await this.invoke('parse_text', {text}); +    } + +    startListener() { +        this.port = chrome.runtime.connectNative('mecab'); +        this.port.onMessage.addListener((message) => { +            const {sequence, data} = message; +            const {callback, timer} = this.listeners[sequence] || {}; +            if (timer) { +                clearTimeout(timer); +                delete this.listeners[sequence]; +                callback(data); +            } +        }); +    } + +    invoke(action, params) { +        return new Promise((resolve, reject) => { +            const sequence = this.sequence++; + +            this.listeners[sequence] = { +                callback: (data) => { +                    resolve(data); +                }, +                timer: setTimeout(() => { +                    delete this.listeners[sequence]; +                    reject(`Mecab invoke timed out in ${Mecab.timeout} ms`); +                }, 1000) +            } + +            this.port.postMessage({action, params, sequence}); +        }); +    } +} + +Mecab.timeout = 1000; diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js index 8a7db69a..0c74e550 100644 --- a/ext/bg/js/search-query-parser.js +++ b/ext/bg/js/search-query-parser.js @@ -74,7 +74,8 @@ class QueryParser {              preview: true          }); -        const results = await apiTextParse(text, this.search.getOptionsContext()); +        // const results = await apiTextParse(text, this.search.getOptionsContext()); +        const results = await apiTextParseMecab(text, this.search.getOptionsContext());          const content = await apiTemplateRender('query-parser.html', {              terms: results.map((term) => { diff --git a/ext/fg/js/api.js b/ext/fg/js/api.js index cc1e0e90..92330d9c 100644 --- a/ext/fg/js/api.js +++ b/ext/fg/js/api.js @@ -33,6 +33,10 @@ function apiTextParse(text, optionsContext) {      return utilInvoke('textParse', {text, optionsContext});  } +function apiTextParseMecab(text, optionsContext) { +    return utilInvoke('textParseMecab', {text, optionsContext}); +} +  function apiKanjiFind(text, optionsContext) {      return utilInvoke('kanjiFind', {text, optionsContext});  } diff --git a/ext/manifest.json b/ext/manifest.json index fabceafd..4d75cd54 100644 --- a/ext/manifest.json +++ b/ext/manifest.json @@ -42,7 +42,8 @@          "<all_urls>",          "storage",          "clipboardWrite", -        "unlimitedStorage" +        "unlimitedStorage", +        "nativeMessaging"      ],      "optional_permissions": [          "clipboardRead" diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js index d24f56a6..78c419b2 100644 --- a/ext/mixed/js/japanese.js +++ b/ext/mixed/js/japanese.js @@ -61,12 +61,11 @@ function jpDistributeFurigana(expression, reading) {          const group = groups[0];          if (group.mode === 'kana') { -            if (reading.startsWith(group.text)) { -                const readingUsed = reading.substring(0, group.text.length); +            if (jpKatakanaToHiragana(reading).startsWith(jpKatakanaToHiragana(group.text))) {                  const readingLeft = reading.substring(group.text.length);                  const segs = segmentize(readingLeft, groups.splice(1));                  if (segs) { -                    return [{text: readingUsed}].concat(segs); +                    return [{text: group.text}].concat(segs);                  }              }          } else { @@ -95,3 +94,33 @@ function jpDistributeFurigana(expression, reading) {      return segmentize(reading, groups) || fallback;  } + +function jpDistributeFuriganaInflected(expression, reading, source) { +    const output = []; + +    let stemLength = 0; +    const shortest = Math.min(source.length, expression.length); +    const sourceHiragana = jpKatakanaToHiragana(source); +    const expressionHiragana = jpKatakanaToHiragana(expression); +    while ( +        stemLength < shortest && +        // sometimes an expression can use a kanji that's different from the source +        (!jpIsKana(source[stemLength]) || (sourceHiragana[stemLength] === expressionHiragana[stemLength])) +    ) { +        ++stemLength; +    } +    const offset = source.length - stemLength; + +    for (const segment of jpDistributeFurigana( +        source.slice(0, offset === 0 ? source.length : source.length - offset), +        reading.slice(0, offset === 0 ? reading.length : reading.length - expression.length + stemLength) +    )) { +        output.push(segment); +    } + +    if (stemLength !== source.length) { +        output.push({text: source.slice(stemLength)}); +    } + +    return output; +} |