From c35a05cd62d43ff435c022a353de55510b020277 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Wed, 30 Oct 2019 03:58:24 +0200 Subject: add kana to text --- ext/bg/js/api.js | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'ext/bg/js/api.js') diff --git a/ext/bg/js/api.js b/ext/bg/js/api.js index df73aa2a..064903ca 100644 --- a/ext/bg/js/api.js +++ b/ext/bg/js/api.js @@ -79,6 +79,43 @@ async function apiTermsFind(text, details, optionsContext) { return {length, definitions}; } +async function apiTextParse(text, optionsContext) { + const options = await apiOptionsGet(optionsContext); + const translator = utilBackend().translator; + + const results = []; + while (text) { + let [definitions, length] = await translator.findTerms(text, {}, options); + if (definitions.length > 0) { + definitions = dictTermsSort(definitions); + const {expression, source, reading} = definitions[0]; + + let stemLength = source.length; + while (source[stemLength - 1] !== expression[stemLength - 1]) { + --stemLength; + } + const offset = source.length - stemLength; + + for (const result of jpDistributeFurigana( + source.slice(0, offset === 0 ? source.length : source.length - offset), + reading.slice(0, offset === 0 ? reading.length : source.length + (reading.length - expression.length) - offset) + )) { + results.push(result); + } + + if (stemLength !== source.length) { + results.push({text: source.slice(stemLength)}); + } + + text = text.slice(source.length); + } else { + results.push({text: text[0]}); + text = text.slice(1); + } + } + return results; +} + async function apiKanjiFind(text, optionsContext) { const options = await apiOptionsGet(optionsContext); const definitions = await utilBackend().translator.findKanji(text, options); -- cgit v1.2.3 From d19f447b80e286610a83114e2294a976a27adca5 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Wed, 30 Oct 2019 12:04:49 +0200 Subject: fix stem length checking Starting from the end and stopping at first match doesn't guarantee correctness. Starting from the beginning does. --- ext/bg/js/api.js | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'ext/bg/js/api.js') diff --git a/ext/bg/js/api.js b/ext/bg/js/api.js index 064903ca..174a439e 100644 --- a/ext/bg/js/api.js +++ b/ext/bg/js/api.js @@ -90,9 +90,10 @@ async function apiTextParse(text, optionsContext) { definitions = dictTermsSort(definitions); const {expression, source, reading} = definitions[0]; - let stemLength = source.length; - while (source[stemLength - 1] !== expression[stemLength - 1]) { - --stemLength; + let stemLength = 0; + const shortest = Math.min(source.length, expression.length); + while (stemLength < shortest && source[stemLength] === expression[stemLength]) { + ++stemLength; } const offset = source.length - stemLength; -- cgit v1.2.3 From e6a1b781648b8ab965a4508ea29ab85f0e070b35 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Wed, 30 Oct 2019 18:13:45 +0200 Subject: use correct source text --- ext/bg/js/api.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'ext/bg/js/api.js') diff --git a/ext/bg/js/api.js b/ext/bg/js/api.js index 174a439e..dbbe7368 100644 --- a/ext/bg/js/api.js +++ b/ext/bg/js/api.js @@ -85,10 +85,11 @@ async function apiTextParse(text, optionsContext) { const results = []; while (text) { - let [definitions, length] = await translator.findTerms(text, {}, options); + let [definitions, sourceLength] = await translator.findTerms(text, {}, options); if (definitions.length > 0) { definitions = dictTermsSort(definitions); - const {expression, source, reading} = definitions[0]; + const {expression, reading} = definitions[0]; + const source = text.slice(0, sourceLength); let stemLength = 0; const shortest = Math.min(source.length, expression.length); -- cgit v1.2.3 From 3881457e4ed3f9c7833ac21a5e7fc44c2ba00b0f Mon Sep 17 00:00:00 2001 From: siikamiika Date: Thu, 31 Oct 2019 23:56:44 +0200 Subject: use handlebars templates for query parser --- ext/bg/js/api.js | 12 +++++----- ext/bg/js/search-query-parser.js | 40 ++++++++++++++------------------- ext/bg/js/templates.js | 48 ++++++++++++++++++++++++++++++++++++++++ ext/mixed/css/display.css | 9 ++++---- tmpl/query-parser.html | 23 +++++++++++++++++++ 5 files changed, 99 insertions(+), 33 deletions(-) create mode 100644 tmpl/query-parser.html (limited to 'ext/bg/js/api.js') diff --git a/ext/bg/js/api.js b/ext/bg/js/api.js index dbbe7368..7c9a72a7 100644 --- a/ext/bg/js/api.js +++ b/ext/bg/js/api.js @@ -85,6 +85,7 @@ async function apiTextParse(text, optionsContext) { const results = []; while (text) { + const term = []; let [definitions, sourceLength] = await translator.findTerms(text, {}, options); if (definitions.length > 0) { definitions = dictTermsSort(definitions); @@ -98,22 +99,23 @@ async function apiTextParse(text, optionsContext) { } const offset = source.length - stemLength; - for (const result of jpDistributeFurigana( + for (const {text, furigana} of jpDistributeFurigana( source.slice(0, offset === 0 ? source.length : source.length - offset), - reading.slice(0, offset === 0 ? reading.length : source.length + (reading.length - expression.length) - offset) + reading.slice(0, offset === 0 ? reading.length : reading.length - expression.length + stemLength) )) { - results.push(result); + term.push({text, reading: furigana || ''}); } if (stemLength !== source.length) { - results.push({text: source.slice(stemLength)}); + term.push({text: source.slice(stemLength)}); } text = text.slice(source.length); } else { - results.push({text: text[0]}); + term.push({text: text[0]}); text = text.slice(1); } + results.push(term); } return results; } diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js index 9bea6508..8a7db69a 100644 --- a/ext/bg/js/search-query-parser.js +++ b/ext/bg/js/search-query-parser.js @@ -59,39 +59,33 @@ class QueryParser { } async setText(text) { - this.queryParser.innerHTML = ''; this.search.setSpinnerVisible(true); + const previewTerms = []; let previewText = text; while (previewText) { const tempText = previewText.slice(0, 2); + previewTerms.push([{text: tempText}]); previewText = previewText.slice(2); - - const tempRuby = document.createElement('ruby'); - const tempFurigana = document.createElement('rt'); - tempRuby.appendChild(document.createTextNode(tempText)); - tempRuby.appendChild(tempFurigana); - this.queryParser.appendChild(tempRuby); } + this.queryParser.innerHTML = await apiTemplateRender('query-parser.html', { + terms: previewTerms, + preview: true + }); + const results = await apiTextParse(text, this.search.getOptionsContext()); - const textContainer = document.createElement('div'); - for (const {text, furigana} of results) { - const rubyElement = document.createElement('ruby'); - const furiganaElement = document.createElement('rt'); - if (furigana) { - furiganaElement.innerText = furigana; - rubyElement.appendChild(document.createTextNode(text)); - rubyElement.appendChild(furiganaElement); - } else { - rubyElement.appendChild(document.createTextNode(text)); - rubyElement.appendChild(furiganaElement); - } - textContainer.appendChild(rubyElement); - } - this.queryParser.innerHTML = ''; - this.queryParser.appendChild(textContainer); + const content = await apiTemplateRender('query-parser.html', { + terms: results.map((term) => { + return term.map((part) => { + part.raw = !part.text.trim() && (!part.reading || !part.reading.trim()); + return part; + }); + }) + }); + + this.queryParser.innerHTML = content; this.search.setSpinnerVisible(false); } diff --git a/ext/bg/js/templates.js b/ext/bg/js/templates.js index 823b9e6f..cc233d49 100644 --- a/ext/bg/js/templates.js +++ b/ext/bg/js/templates.js @@ -162,6 +162,54 @@ templates['kanji.html'] = template({"1":function(container,depth0,helpers,partia return fn; } +,"useDecorators":true,"usePartial":true,"useData":true,"useDepths":true}); +templates['query-parser.html'] = template({"1":function(container,depth0,helpers,partials,data) { + var stack1, alias1=depth0 != null ? depth0 : (container.nullContext || {}); + + return ((stack1 = helpers["if"].call(alias1,(depth0 != null ? depth0.preview : depth0),{"name":"if","hash":{},"fn":container.program(2, data, 0),"inverse":container.program(4, data, 0),"data":data})) != null ? stack1 : "") + + ((stack1 = helpers.each.call(alias1,depth0,{"name":"each","hash":{},"fn":container.program(6, data, 0),"inverse":container.noop,"data":data})) != null ? stack1 : "") + + ""; +},"2":function(container,depth0,helpers,partials,data) { + return ""; +},"4":function(container,depth0,helpers,partials,data) { + return ""; +},"6":function(container,depth0,helpers,partials,data) { + var stack1; + + return ((stack1 = container.invokePartial(partials.part,depth0,{"name":"part","data":data,"helpers":helpers,"partials":partials,"decorators":container.decorators})) != null ? stack1 : ""); +},"8":function(container,depth0,helpers,partials,data) { + var stack1; + + return ((stack1 = helpers["if"].call(depth0 != null ? depth0 : (container.nullContext || {}),(depth0 != null ? depth0.raw : depth0),{"name":"if","hash":{},"fn":container.program(9, data, 0),"inverse":container.program(11, data, 0),"data":data})) != null ? stack1 : ""); +},"9":function(container,depth0,helpers,partials,data) { + var helper; + + return container.escapeExpression(((helper = (helper = helpers.text || (depth0 != null ? depth0.text : depth0)) != null ? helper : helpers.helperMissing),(typeof helper === "function" ? helper.call(depth0 != null ? depth0 : (container.nullContext || {}),{"name":"text","hash":{},"data":data}) : helper))); +},"11":function(container,depth0,helpers,partials,data) { + var helper, alias1=depth0 != null ? depth0 : (container.nullContext || {}), alias2=helpers.helperMissing, alias3="function", alias4=container.escapeExpression; + + return "" + + alias4(((helper = (helper = helpers.text || (depth0 != null ? depth0.text : depth0)) != null ? helper : alias2),(typeof helper === alias3 ? helper.call(alias1,{"name":"text","hash":{},"data":data}) : helper))) + + "" + + alias4(((helper = (helper = helpers.reading || (depth0 != null ? depth0.reading : depth0)) != null ? helper : alias2),(typeof helper === alias3 ? helper.call(alias1,{"name":"reading","hash":{},"data":data}) : helper))) + + ""; +},"13":function(container,depth0,helpers,partials,data,blockParams,depths) { + var stack1; + + return ((stack1 = container.invokePartial(partials.term,depth0,{"name":"term","hash":{"preview":(depths[1] != null ? depths[1].preview : depths[1])},"data":data,"helpers":helpers,"partials":partials,"decorators":container.decorators})) != null ? stack1 : ""); +},"compiler":[7,">= 4.0.0"],"main":function(container,depth0,helpers,partials,data,blockParams,depths) { + var stack1; + + return ((stack1 = helpers.each.call(depth0 != null ? depth0 : (container.nullContext || {}),(depth0 != null ? depth0.terms : depth0),{"name":"each","hash":{},"fn":container.program(13, data, 0, blockParams, depths),"inverse":container.noop,"data":data})) != null ? stack1 : ""); +},"main_d": function(fn, props, container, depth0, data, blockParams, depths) { + + var decorators = container.decorators; + + fn = decorators.inline(fn,props,container,{"name":"inline","hash":{},"fn":container.program(1, data, 0, blockParams, depths),"inverse":container.noop,"args":["term"],"data":data}) || fn; + fn = decorators.inline(fn,props,container,{"name":"inline","hash":{},"fn":container.program(8, data, 0, blockParams, depths),"inverse":container.noop,"args":["part"],"data":data}) || fn; + return fn; + } + ,"useDecorators":true,"usePartial":true,"useData":true,"useDepths":true}); templates['terms.html'] = template({"1":function(container,depth0,helpers,partials,data) { var stack1, helper, options, alias1=depth0 != null ? depth0 : (container.nullContext || {}), buffer = diff --git a/ext/mixed/css/display.css b/ext/mixed/css/display.css index 65b8b466..d24aa58c 100644 --- a/ext/mixed/css/display.css +++ b/ext/mixed/css/display.css @@ -93,13 +93,12 @@ ol, ul { font-size: 24px; } -html:root[data-yomichan-page=search] body { - min-height: 101vh; /* always show scroll bar to avoid scanning problems */ +.query-parser-term { + margin-right: 5px; } -#query-parser { - margin-top: 10px; - font-size: 24px; +html:root[data-yomichan-page=search] body { + min-height: 101vh; /* always show scroll bar to avoid scanning problems */ } diff --git a/tmpl/query-parser.html b/tmpl/query-parser.html new file mode 100644 index 00000000..818650e6 --- /dev/null +++ b/tmpl/query-parser.html @@ -0,0 +1,23 @@ +{{~#*inline "term"~}} +{{~#if preview~}} + +{{~else~}} + +{{~/if~}} +{{~#each this~}} +{{> part }} +{{~/each~}} + +{{~/inline~}} + +{{~#*inline "part"~}} +{{~#if raw~}} +{{text}} +{{~else~}} +{{text}}{{reading}} +{{~/if~}} +{{~/inline~}} + +{{~#each terms~}} +{{> term preview=../preview }} +{{~/each~}} -- cgit v1.2.3 From 41020289ab68ef22a0691a9f268a79d6a706df6b Mon Sep 17 00:00:00 2001 From: siikamiika Date: Sun, 3 Nov 2019 05:08:57 +0200 Subject: add mecab support --- ext/bg/background.html | 1 + ext/bg/js/api.js | 48 ++++++++++++++++++------------ ext/bg/js/backend.js | 2 ++ ext/bg/js/mecab.js | 63 ++++++++++++++++++++++++++++++++++++++++ ext/bg/js/search-query-parser.js | 3 +- ext/fg/js/api.js | 4 +++ ext/manifest.json | 3 +- ext/mixed/js/japanese.js | 35 ++++++++++++++++++++-- 8 files changed, 136 insertions(+), 23 deletions(-) create mode 100644 ext/bg/js/mecab.js (limited to 'ext/bg/js/api.js') diff --git a/ext/bg/background.html b/ext/bg/background.html index bbfbd1e1..6e6e7c26 100644 --- a/ext/bg/background.html +++ b/ext/bg/background.html @@ -21,6 +21,7 @@ + diff --git a/ext/bg/js/api.js b/ext/bg/js/api.js index 7c9a72a7..2ab01af3 100644 --- a/ext/bg/js/api.js +++ b/ext/bg/js/api.js @@ -91,25 +91,10 @@ async function apiTextParse(text, optionsContext) { definitions = dictTermsSort(definitions); const {expression, reading} = definitions[0]; const source = text.slice(0, sourceLength); - - let stemLength = 0; - const shortest = Math.min(source.length, expression.length); - while (stemLength < shortest && source[stemLength] === expression[stemLength]) { - ++stemLength; - } - const offset = source.length - stemLength; - - for (const {text, furigana} of jpDistributeFurigana( - source.slice(0, offset === 0 ? source.length : source.length - offset), - reading.slice(0, offset === 0 ? reading.length : reading.length - expression.length + stemLength) - )) { - term.push({text, reading: furigana || ''}); - } - - if (stemLength !== source.length) { - term.push({text: source.slice(stemLength)}); + for (const {text, furigana} of jpDistributeFuriganaInflected(expression, reading, source)) { + // can't use 'furigana' in templates + term.push({text, reading: furigana}); } - text = text.slice(source.length); } else { term.push({text: text[0]}); @@ -120,6 +105,33 @@ async function apiTextParse(text, optionsContext) { return results; } +async function apiTextParseMecab(text, optionsContext) { + const options = await apiOptionsGet(optionsContext); + const mecab = utilBackend().mecab; + + const results = []; + for (const parsedLine of await mecab.parseText(text)) { + for (const {expression, reading, source} of parsedLine) { + const term = []; + if (expression && reading) { + for (const {text, furigana} of jpDistributeFuriganaInflected( + expression, + jpKatakanaToHiragana(reading), + source + )) { + // can't use 'furigana' in templates + term.push({text, reading: furigana}); + } + } else { + term.push({text: source}); + } + results.push(term); + } + results.push([{text: '\n'}]); + } + return results; +} + async function apiKanjiFind(text, optionsContext) { const options = await apiOptionsGet(optionsContext); const definitions = await utilBackend().translator.findKanji(text, options); diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index d0e404f2..e97f32b5 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -21,6 +21,7 @@ class Backend { constructor() { this.translator = new Translator(); this.anki = new AnkiNull(); + this.mecab = new Mecab(); this.options = null; this.optionsContext = { depth: 0, @@ -181,6 +182,7 @@ Backend.messageHandlers = { kanjiFind: ({text, optionsContext}) => apiKanjiFind(text, optionsContext), termsFind: ({text, details, optionsContext}) => apiTermsFind(text, details, optionsContext), textParse: ({text, optionsContext}) => apiTextParse(text, optionsContext), + textParseMecab: ({text, optionsContext}) => apiTextParseMecab(text, optionsContext), definitionAdd: ({definition, mode, context, optionsContext}) => apiDefinitionAdd(definition, mode, context, optionsContext), definitionsAddable: ({definitions, modes, optionsContext}) => apiDefinitionsAddable(definitions, modes, optionsContext), noteView: ({noteId}) => apiNoteView(noteId), diff --git a/ext/bg/js/mecab.js b/ext/bg/js/mecab.js new file mode 100644 index 00000000..dc46ded2 --- /dev/null +++ b/ext/bg/js/mecab.js @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2019 Alex Yatskov + * Author: Alex Yatskov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +class Mecab { + constructor() { + this.listeners = {}; + this.sequence = 0; + this.startListener(); + } + + async parseText(text) { + return await this.invoke('parse_text', {text}); + } + + startListener() { + this.port = chrome.runtime.connectNative('mecab'); + this.port.onMessage.addListener((message) => { + const {sequence, data} = message; + const {callback, timer} = this.listeners[sequence] || {}; + if (timer) { + clearTimeout(timer); + delete this.listeners[sequence]; + callback(data); + } + }); + } + + invoke(action, params) { + return new Promise((resolve, reject) => { + const sequence = this.sequence++; + + this.listeners[sequence] = { + callback: (data) => { + resolve(data); + }, + timer: setTimeout(() => { + delete this.listeners[sequence]; + reject(`Mecab invoke timed out in ${Mecab.timeout} ms`); + }, 1000) + } + + this.port.postMessage({action, params, sequence}); + }); + } +} + +Mecab.timeout = 1000; diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js index 8a7db69a..0c74e550 100644 --- a/ext/bg/js/search-query-parser.js +++ b/ext/bg/js/search-query-parser.js @@ -74,7 +74,8 @@ class QueryParser { preview: true }); - const results = await apiTextParse(text, this.search.getOptionsContext()); + // const results = await apiTextParse(text, this.search.getOptionsContext()); + const results = await apiTextParseMecab(text, this.search.getOptionsContext()); const content = await apiTemplateRender('query-parser.html', { terms: results.map((term) => { diff --git a/ext/fg/js/api.js b/ext/fg/js/api.js index cc1e0e90..92330d9c 100644 --- a/ext/fg/js/api.js +++ b/ext/fg/js/api.js @@ -33,6 +33,10 @@ function apiTextParse(text, optionsContext) { return utilInvoke('textParse', {text, optionsContext}); } +function apiTextParseMecab(text, optionsContext) { + return utilInvoke('textParseMecab', {text, optionsContext}); +} + function apiKanjiFind(text, optionsContext) { return utilInvoke('kanjiFind', {text, optionsContext}); } diff --git a/ext/manifest.json b/ext/manifest.json index fabceafd..4d75cd54 100644 --- a/ext/manifest.json +++ b/ext/manifest.json @@ -42,7 +42,8 @@ "", "storage", "clipboardWrite", - "unlimitedStorage" + "unlimitedStorage", + "nativeMessaging" ], "optional_permissions": [ "clipboardRead" diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js index d24f56a6..78c419b2 100644 --- a/ext/mixed/js/japanese.js +++ b/ext/mixed/js/japanese.js @@ -61,12 +61,11 @@ function jpDistributeFurigana(expression, reading) { const group = groups[0]; if (group.mode === 'kana') { - if (reading.startsWith(group.text)) { - const readingUsed = reading.substring(0, group.text.length); + if (jpKatakanaToHiragana(reading).startsWith(jpKatakanaToHiragana(group.text))) { const readingLeft = reading.substring(group.text.length); const segs = segmentize(readingLeft, groups.splice(1)); if (segs) { - return [{text: readingUsed}].concat(segs); + return [{text: group.text}].concat(segs); } } } else { @@ -95,3 +94,33 @@ function jpDistributeFurigana(expression, reading) { return segmentize(reading, groups) || fallback; } + +function jpDistributeFuriganaInflected(expression, reading, source) { + const output = []; + + let stemLength = 0; + const shortest = Math.min(source.length, expression.length); + const sourceHiragana = jpKatakanaToHiragana(source); + const expressionHiragana = jpKatakanaToHiragana(expression); + while ( + stemLength < shortest && + // sometimes an expression can use a kanji that's different from the source + (!jpIsKana(source[stemLength]) || (sourceHiragana[stemLength] === expressionHiragana[stemLength])) + ) { + ++stemLength; + } + const offset = source.length - stemLength; + + for (const segment of jpDistributeFurigana( + source.slice(0, offset === 0 ? source.length : source.length - offset), + reading.slice(0, offset === 0 ? reading.length : reading.length - expression.length + stemLength) + )) { + output.push(segment); + } + + if (stemLength !== source.length) { + output.push({text: source.slice(stemLength)}); + } + + return output; +} -- cgit v1.2.3 From 955e131f9673e006556bc2c5e0b3551a614ccc48 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Tue, 5 Nov 2019 15:56:45 +0200 Subject: add parser selection options --- ext/bg/js/api.js | 37 +++++++++++++++++++++---------------- ext/bg/js/mecab.js | 2 +- ext/bg/js/options.js | 5 +++++ ext/bg/js/search-query-parser.js | 40 +++++++++++++++++++++++++--------------- ext/bg/js/settings.js | 6 ++++++ ext/bg/settings.html | 29 +++++++++++++++++++++++++++++ 6 files changed, 87 insertions(+), 32 deletions(-) (limited to 'ext/bg/js/api.js') diff --git a/ext/bg/js/api.js b/ext/bg/js/api.js index 2ab01af3..967bded7 100644 --- a/ext/bg/js/api.js +++ b/ext/bg/js/api.js @@ -109,25 +109,30 @@ async function apiTextParseMecab(text, optionsContext) { const options = await apiOptionsGet(optionsContext); const mecab = utilBackend().mecab; - const results = []; - for (const parsedLine of await mecab.parseText(text)) { - for (const {expression, reading, source} of parsedLine) { - const term = []; - if (expression && reading) { - for (const {text, furigana} of jpDistributeFuriganaInflected( - expression, - jpKatakanaToHiragana(reading), - source - )) { - // can't use 'furigana' in templates - term.push({text, reading: furigana}); + const results = {}; + const rawResults = await mecab.parseText(text); + for (const mecabName in rawResults) { + const result = []; + for (const parsedLine of rawResults[mecabName]) { + for (const {expression, reading, source} of parsedLine) { + const term = []; + if (expression && reading) { + for (const {text, furigana} of jpDistributeFuriganaInflected( + expression, + jpKatakanaToHiragana(reading), + source + )) { + // can't use 'furigana' in templates + term.push({text, reading: furigana}); + } + } else { + term.push({text: source}); } - } else { - term.push({text: source}); + result.push(term); } - results.push(term); + result.push([{text: '\n'}]); } - results.push([{text: '\n'}]); + results[mecabName] = result; } return results; } diff --git a/ext/bg/js/mecab.js b/ext/bg/js/mecab.js index 14f68393..fba9b2eb 100644 --- a/ext/bg/js/mecab.js +++ b/ext/bg/js/mecab.js @@ -60,4 +60,4 @@ class Mecab { } } -Mecab.timeout = 1000; +Mecab.timeout = 5000; diff --git a/ext/bg/js/options.js b/ext/bg/js/options.js index be1ccfbb..f1bafaf9 100644 --- a/ext/bg/js/options.js +++ b/ext/bg/js/options.js @@ -311,6 +311,11 @@ function profileOptionsCreateDefaults() { dictionaries: {}, + parsing: { + enableScanningParser: true, + enableMecabParser: false + }, + anki: { enable: false, server: 'http://127.0.0.1:8765', diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js index 1cf00425..81eb18c3 100644 --- a/ext/bg/js/search-query-parser.js +++ b/ext/bg/js/search-query-parser.js @@ -86,22 +86,32 @@ class QueryParser { this.search.setSpinnerVisible(true); await this.setPreview(text); - // const results = await apiTextParse(text, this.search.getOptionsContext()); - const results = await apiTextParseMecab(text, this.search.getOptionsContext()); - - const content = await apiTemplateRender('query-parser.html', { - terms: results.map((term) => { - return term.filter(part => part.text.trim()).map((part) => { - return { - text: Array.from(part.text), - reading: part.reading, - raw: !part.reading || !part.reading.trim(), - }; - }); - }) - }); + const results = {}; + if (this.search.options.parsing.enableScanningParser) { + results['scan'] = await apiTextParse(text, this.search.getOptionsContext()); + } + if (this.search.options.parsing.enableMecabParser) { + let mecabResults = await apiTextParseMecab(text, this.search.getOptionsContext()); + for (const mecabDictName in mecabResults) { + results[`mecab-${mecabDictName}`] = mecabResults[mecabDictName]; + } + } - this.queryParser.innerHTML = content; + const contents = await Promise.all(Object.values(results).map(async result => { + return await apiTemplateRender('query-parser.html', { + terms: result.map((term) => { + return term.filter(part => part.text.trim()).map((part) => { + return { + text: Array.from(part.text), + reading: part.reading, + raw: !part.reading || !part.reading.trim(), + }; + }); + }) + }); + })); + + this.queryParser.innerHTML = contents.join('
'); this.queryParser.querySelectorAll('.query-parser-char').forEach((charElement) => { this.activateScanning(charElement); diff --git a/ext/bg/js/settings.js b/ext/bg/js/settings.js index e562c54e..f4fe032a 100644 --- a/ext/bg/js/settings.js +++ b/ext/bg/js/settings.js @@ -64,6 +64,9 @@ async function formRead(options) { options.scanning.modifier = $('#scan-modifier-key').val(); options.scanning.popupNestingMaxDepth = parseInt($('#popup-nesting-max-depth').val(), 10); + options.parsing.enableScanningParser = $('#parsing-scan-enable').prop('checked'); + options.parsing.enableMecabParser = $('#parsing-mecab-enable').prop('checked'); + const optionsAnkiEnableOld = options.anki.enable; options.anki.enable = $('#anki-enable').prop('checked'); options.anki.tags = utilBackgroundIsolate($('#card-tags').val().split(/[,; ]+/)); @@ -126,6 +129,9 @@ async function formWrite(options) { $('#scan-modifier-key').val(options.scanning.modifier); $('#popup-nesting-max-depth').val(options.scanning.popupNestingMaxDepth); + $('#parsing-scan-enable').prop('checked', options.parsing.enableScanningParser); + $('#parsing-mecab-enable').prop('checked', options.parsing.enableMecabParser); + $('#anki-enable').prop('checked', options.anki.enable); $('#card-tags').val(options.anki.tags.join(' ')); $('#sentence-detection-extent').val(options.anki.sentenceExt); diff --git a/ext/bg/settings.html b/ext/bg/settings.html index bdcc11d3..8505567b 100644 --- a/ext/bg/settings.html +++ b/ext/bg/settings.html @@ -587,6 +587,35 @@ +
+

Text Parsing Options

+ +

+ Yomichan can attempt to parse entire sentences or longer text blocks on the search page, + adding furigana above words and a small space between words. +

+ +

+ Two types of parsers are supported. The first one, enabled by default, works using the built-in + scanning functionality by automatically advancing in the sentence after a matching word. +

+ +

+ The second type is an external program called MeCab + that uses its own dictionaries and a special parsing algorithm. To get it working, you must first + install it and a native messaging component + that acts as a bridge between the program and Yomichan. +

+ +
+ +
+ +
+ +
+
+
-- cgit v1.2.3 From b336ab3a9a0a2ac9a569ae217b506ceeeab6fda0 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Mon, 11 Nov 2019 01:43:35 +0200 Subject: use const --- ext/bg/js/api.js | 4 ++-- ext/bg/js/search-query-parser.js | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'ext/bg/js/api.js') diff --git a/ext/bg/js/api.js b/ext/bg/js/api.js index 967bded7..40e9b6d2 100644 --- a/ext/bg/js/api.js +++ b/ext/bg/js/api.js @@ -86,9 +86,9 @@ async function apiTextParse(text, optionsContext) { const results = []; while (text) { const term = []; - let [definitions, sourceLength] = await translator.findTerms(text, {}, options); + const [definitions, sourceLength] = await translator.findTerms(text, {}, options); if (definitions.length > 0) { - definitions = dictTermsSort(definitions); + dictTermsSort(definitions); const {expression, reading} = definitions[0]; const source = text.slice(0, sourceLength); for (const {text, furigana} of jpDistributeFuriganaInflected(expression, reading, source)) { diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js index 2e17688e..f8e53963 100644 --- a/ext/bg/js/search-query-parser.js +++ b/ext/bg/js/search-query-parser.js @@ -113,9 +113,9 @@ class QueryParser { this.queryParser.innerHTML = contents.join('
'); - this.queryParser.querySelectorAll('.query-parser-char').forEach((charElement) => { + for (const charElement of this.queryParser.querySelectorAll('.query-parser-char')) { this.activateScanning(charElement); - }); + } this.search.setSpinnerVisible(false); } @@ -133,9 +133,9 @@ class QueryParser { preview: true }); - this.queryParser.querySelectorAll('.query-parser-char').forEach((charElement) => { + for (const charElement of this.queryParser.querySelectorAll('.query-parser-char')) { this.activateScanning(charElement); - }); + } } activateScanning(element) { -- cgit v1.2.3 From b02a30a2fddb3b660a62f66541e90dba519cb270 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Mon, 11 Nov 2019 21:43:35 +0200 Subject: explicit checks in while and if --- ext/bg/js/api.js | 4 ++-- ext/bg/js/search-query-parser.js | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'ext/bg/js/api.js') diff --git a/ext/bg/js/api.js b/ext/bg/js/api.js index 40e9b6d2..bc9dfba1 100644 --- a/ext/bg/js/api.js +++ b/ext/bg/js/api.js @@ -84,7 +84,7 @@ async function apiTextParse(text, optionsContext) { const translator = utilBackend().translator; const results = []; - while (text) { + while (text.length > 0) { const term = []; const [definitions, sourceLength] = await translator.findTerms(text, {}, options); if (definitions.length > 0) { @@ -116,7 +116,7 @@ async function apiTextParseMecab(text, optionsContext) { for (const parsedLine of rawResults[mecabName]) { for (const {expression, reading, source} of parsedLine) { const term = []; - if (expression && reading) { + if (expression !== null && reading !== null) { for (const {text, furigana} of jpDistributeFuriganaInflected( expression, jpKatakanaToHiragana(reading), diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js index 2aee45dd..14b78105 100644 --- a/ext/bg/js/search-query-parser.js +++ b/ext/bg/js/search-query-parser.js @@ -122,7 +122,7 @@ class QueryParser { async setPreview(text) { const previewTerms = []; - while (text) { + while (text.length > 0) { const tempText = text.slice(0, 2); previewTerms.push([{text: Array.from(tempText)}]); text = text.slice(2); -- cgit v1.2.3 From cc8221c6ea686521261e2ac562d3d5a6d0b9913a Mon Sep 17 00:00:00 2001 From: siikamiika Date: Wed, 13 Nov 2019 20:24:11 +0200 Subject: add reading modes --- ext/bg/js/api.js | 14 ++++++++------ ext/bg/js/options.js | 3 ++- ext/bg/js/settings.js | 2 ++ ext/bg/settings.html | 9 +++++++++ ext/mixed/js/japanese.js | 37 +++++++++++++++++++++++++++++++++++++ 5 files changed, 58 insertions(+), 7 deletions(-) (limited to 'ext/bg/js/api.js') diff --git a/ext/bg/js/api.js b/ext/bg/js/api.js index bc9dfba1..228447c3 100644 --- a/ext/bg/js/api.js +++ b/ext/bg/js/api.js @@ -92,12 +92,13 @@ async function apiTextParse(text, optionsContext) { const {expression, reading} = definitions[0]; const source = text.slice(0, sourceLength); for (const {text, furigana} of jpDistributeFuriganaInflected(expression, reading, source)) { - // can't use 'furigana' in templates - term.push({text, reading: furigana}); + const reading = jpConvertReading(text, furigana, options.parsing.readingMode); + term.push({text, reading}); } text = text.slice(source.length); } else { - term.push({text: text[0]}); + const reading = jpConvertReading(text[0], null, options.parsing.readingMode); + term.push({text: text[0], reading}); text = text.slice(1); } results.push(term); @@ -122,11 +123,12 @@ async function apiTextParseMecab(text, optionsContext) { jpKatakanaToHiragana(reading), source )) { - // can't use 'furigana' in templates - term.push({text, reading: furigana}); + const reading = jpConvertReading(text, furigana, options.parsing.readingMode); + term.push({text, reading}); } } else { - term.push({text: source}); + const reading = jpConvertReading(source, null, options.parsing.readingMode); + term.push({text: source, reading}); } result.push(term); } diff --git a/ext/bg/js/options.js b/ext/bg/js/options.js index 053fb13d..b9bf85f3 100644 --- a/ext/bg/js/options.js +++ b/ext/bg/js/options.js @@ -314,7 +314,8 @@ function profileOptionsCreateDefaults() { parsing: { enableScanningParser: true, enableMecabParser: false, - selectedParser: null + selectedParser: null, + readingMode: 'hiragana' }, anki: { diff --git a/ext/bg/js/settings.js b/ext/bg/js/settings.js index f4fe032a..ab267c32 100644 --- a/ext/bg/js/settings.js +++ b/ext/bg/js/settings.js @@ -66,6 +66,7 @@ async function formRead(options) { options.parsing.enableScanningParser = $('#parsing-scan-enable').prop('checked'); options.parsing.enableMecabParser = $('#parsing-mecab-enable').prop('checked'); + options.parsing.readingMode = $('#parsing-reading-mode').val(); const optionsAnkiEnableOld = options.anki.enable; options.anki.enable = $('#anki-enable').prop('checked'); @@ -131,6 +132,7 @@ async function formWrite(options) { $('#parsing-scan-enable').prop('checked', options.parsing.enableScanningParser); $('#parsing-mecab-enable').prop('checked', options.parsing.enableMecabParser); + $('#parsing-reading-mode').val(options.parsing.readingMode); $('#anki-enable').prop('checked', options.anki.enable); $('#card-tags').val(options.anki.tags.join(' ')); diff --git a/ext/bg/settings.html b/ext/bg/settings.html index 08b9b6c1..0badb817 100644 --- a/ext/bg/settings.html +++ b/ext/bg/settings.html @@ -437,6 +437,15 @@
+ +
+ + +
diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js index e2d7a090..a7cd0452 100644 --- a/ext/mixed/js/japanese.js +++ b/ext/mixed/js/japanese.js @@ -48,6 +48,43 @@ function jpKatakanaToHiragana(text) { return result; } +function jpHiraganaToKatakana(text) { + let result = ''; + for (const c of text) { + if (wanakana.isHiragana(c)) { + result += wanakana.toKatakana(c); + } else { + result += c; + } + } + + return result; +} + +function jpToRomaji(text) { + return wanakana.toRomaji(text); +} + +function jpConvertReading(expressionFragment, readingFragment, readingMode) { + switch (readingMode) { + case 'hiragana': + return jpKatakanaToHiragana(readingFragment || ''); + case 'katakana': + return jpHiraganaToKatakana(readingFragment || ''); + case 'romaji': + if (readingFragment) { + return jpToRomaji(readingFragment); + } else { + if (jpIsKana(expressionFragment)) { + return jpToRomaji(expressionFragment); + } + } + return readingFragment; + default: + return readingFragment; + } +} + function jpDistributeFurigana(expression, reading) { const fallback = [{furigana: reading, text: expression}]; if (!reading) { -- cgit v1.2.3 From 2577d4054e344d2e6dc08d2cf95e83478c05bee6 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Sat, 23 Nov 2019 18:08:22 +0200 Subject: fix scanning parser --- ext/bg/js/api.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'ext/bg/js/api.js') diff --git a/ext/bg/js/api.js b/ext/bg/js/api.js index 228447c3..766fb0ed 100644 --- a/ext/bg/js/api.js +++ b/ext/bg/js/api.js @@ -86,7 +86,12 @@ async function apiTextParse(text, optionsContext) { const results = []; while (text.length > 0) { const term = []; - const [definitions, sourceLength] = await translator.findTerms(text, {}, options); + const [definitions, sourceLength] = await translator.findTermsInternal( + text.slice(0, options.scanning.length), + dictEnabledSet(options), + options.scanning.alphanumeric, + {} + ); if (definitions.length > 0) { dictTermsSort(definitions); const {expression, reading} = definitions[0]; -- cgit v1.2.3