diff options
author | siikamiika <siikamiika@users.noreply.github.com> | 2019-11-26 18:31:35 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-11-26 18:31:35 +0200 |
commit | 3423ed7d67ff7039d21da0b734e306b2c1024643 (patch) | |
tree | 92193ecc5e041a47f2afa8417adf17e215381e1e | |
parent | 820beb7c9338bd81501d1779b10181c7cced9992 (diff) | |
parent | 89c6ef54b0a44685cde530dd4a94405a578ce132 (diff) |
Merge pull request #279 from siikamiika/query-parser-1
Search page query parser
-rw-r--r-- | ext/bg/background.html | 1 | ||||
-rw-r--r-- | ext/bg/js/api.js | 65 | ||||
-rw-r--r-- | ext/bg/js/backend.js | 9 | ||||
-rw-r--r-- | ext/bg/js/mecab.js | 92 | ||||
-rw-r--r-- | ext/bg/js/options.js | 7 | ||||
-rw-r--r-- | ext/bg/js/search-query-parser.js | 228 | ||||
-rw-r--r-- | ext/bg/js/search.js | 24 | ||||
-rw-r--r-- | ext/bg/js/settings.js | 8 | ||||
-rw-r--r-- | ext/bg/js/templates.js | 52 | ||||
-rw-r--r-- | ext/bg/search.html | 8 | ||||
-rw-r--r-- | ext/bg/settings.html | 38 | ||||
-rw-r--r-- | ext/fg/js/api.js | 8 | ||||
-rw-r--r-- | ext/manifest.json | 3 | ||||
-rw-r--r-- | ext/mixed/css/display.css | 13 | ||||
-rw-r--r-- | ext/mixed/js/display.js | 76 | ||||
-rw-r--r-- | ext/mixed/js/japanese.js | 69 | ||||
-rw-r--r-- | tmpl/query-parser.html | 27 |
17 files changed, 693 insertions, 35 deletions
diff --git a/ext/bg/background.html b/ext/bg/background.html index bbfbd1e1..6e6e7c26 100644 --- a/ext/bg/background.html +++ b/ext/bg/background.html @@ -21,6 +21,7 @@ <script src="/mixed/js/extension.js"></script> <script src="/bg/js/anki.js"></script> + <script src="/bg/js/mecab.js"></script> <script src="/bg/js/api.js"></script> <script src="/bg/js/audio.js"></script> <script src="/bg/js/backend-api-forwarder.js"></script> diff --git a/ext/bg/js/api.js b/ext/bg/js/api.js index df73aa2a..766fb0ed 100644 --- a/ext/bg/js/api.js +++ b/ext/bg/js/api.js @@ -79,6 +79,71 @@ async function apiTermsFind(text, details, optionsContext) { return {length, definitions}; } +async function apiTextParse(text, optionsContext) { + const options = await apiOptionsGet(optionsContext); + const translator = utilBackend().translator; + + const results = []; + while (text.length > 0) { + const term = []; + const [definitions, sourceLength] = await translator.findTermsInternal( + text.slice(0, options.scanning.length), + dictEnabledSet(options), + options.scanning.alphanumeric, + {} + ); + if (definitions.length > 0) { + dictTermsSort(definitions); + const {expression, reading} = definitions[0]; + const source = text.slice(0, sourceLength); + for (const {text, furigana} of jpDistributeFuriganaInflected(expression, reading, source)) { + const reading = jpConvertReading(text, furigana, options.parsing.readingMode); + term.push({text, reading}); + } + text = text.slice(source.length); + } else { + const reading = jpConvertReading(text[0], null, options.parsing.readingMode); + term.push({text: text[0], reading}); + text = text.slice(1); + } + results.push(term); + } + return results; +} + +async function apiTextParseMecab(text, optionsContext) { + const options = await apiOptionsGet(optionsContext); + const mecab = utilBackend().mecab; + + const results = {}; + const rawResults = await mecab.parseText(text); + for (const mecabName in rawResults) { + const result = []; + for (const parsedLine of rawResults[mecabName]) { + for (const {expression, reading, source} of parsedLine) { + const term = []; + if (expression !== null && reading !== null) { + for (const {text, furigana} of jpDistributeFuriganaInflected( + expression, + jpKatakanaToHiragana(reading), + source + )) { + const reading = jpConvertReading(text, furigana, options.parsing.readingMode); + term.push({text, reading}); + } + } else { + const reading = jpConvertReading(source, null, options.parsing.readingMode); + term.push({text: source, reading}); + } + result.push(term); + } + result.push([{text: '\n'}]); + } + results[mecabName] = result; + } + return results; +} + async function apiKanjiFind(text, optionsContext) { const options = await apiOptionsGet(optionsContext); const definitions = await utilBackend().translator.findKanji(text, options); diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index efad153a..45db9660 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -21,6 +21,7 @@ class Backend { constructor() { this.translator = new Translator(); this.anki = new AnkiNull(); + this.mecab = new Mecab(); this.options = null; this.optionsContext = { depth: 0, @@ -97,6 +98,12 @@ class Backend { } this.anki = options.anki.enable ? new AnkiConnect(options.anki.server) : new AnkiNull(); + + if (options.parsing.enableMecabParser) { + this.mecab.startListener(); + } else { + this.mecab.stopListener(); + } } async getFullOptions() { @@ -180,6 +187,8 @@ Backend.messageHandlers = { optionsSet: ({changedOptions, optionsContext, source}) => apiOptionsSet(changedOptions, optionsContext, source), kanjiFind: ({text, optionsContext}) => apiKanjiFind(text, optionsContext), termsFind: ({text, details, optionsContext}) => apiTermsFind(text, details, optionsContext), + textParse: ({text, optionsContext}) => apiTextParse(text, optionsContext), + textParseMecab: ({text, optionsContext}) => apiTextParseMecab(text, optionsContext), definitionAdd: ({definition, mode, context, optionsContext}) => apiDefinitionAdd(definition, mode, context, optionsContext), definitionsAddable: ({definitions, modes, optionsContext}) => apiDefinitionsAddable(definitions, modes, optionsContext), noteView: ({noteId}) => apiNoteView(noteId), diff --git a/ext/bg/js/mecab.js b/ext/bg/js/mecab.js new file mode 100644 index 00000000..246f8bba --- /dev/null +++ b/ext/bg/js/mecab.js @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2019 Alex Yatskov <alex@foosoft.net> + * Author: Alex Yatskov <alex@foosoft.net> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +class Mecab { + constructor() { + this.port = null; + this.listeners = {}; + this.sequence = 0; + } + + onError(error) { + logError(error, false); + } + + async checkVersion() { + try { + const {version} = await this.invoke('get_version', {}); + if (version !== Mecab.version) { + this.stopListener(); + throw new Error(`Unsupported MeCab native messenger version ${version}. Yomichan supports version ${Mecab.version}.`); + } + } catch (error) { + this.onError(error); + } + } + + async parseText(text) { + return await this.invoke('parse_text', {text}); + } + + startListener() { + if (this.port !== null) { return; } + this.port = chrome.runtime.connectNative('yomichan_mecab'); + this.port.onMessage.addListener(this.onNativeMessage.bind(this)); + this.checkVersion(); + } + + stopListener() { + if (this.port === null) { return; } + this.port.disconnect(); + this.port = null; + this.listeners = {}; + this.sequence = 0; + } + + onNativeMessage({sequence, data}) { + if (this.listeners.hasOwnProperty(sequence)) { + const {callback, timer} = this.listeners[sequence]; + clearTimeout(timer); + callback(data); + delete this.listeners[sequence]; + } + } + + invoke(action, params) { + if (this.port === null) { + return Promise.resolve({}); + } + return new Promise((resolve, reject) => { + const sequence = this.sequence++; + + this.listeners[sequence] = { + callback: resolve, + timer: setTimeout(() => { + delete this.listeners[sequence]; + reject(new Error(`Mecab invoke timed out in ${Mecab.timeout} ms`)); + }, Mecab.timeout) + } + + this.port.postMessage({action, params, sequence}); + }); + } +} + +Mecab.timeout = 5000; +Mecab.version = 1; diff --git a/ext/bg/js/options.js b/ext/bg/js/options.js index be1ccfbb..b9bf85f3 100644 --- a/ext/bg/js/options.js +++ b/ext/bg/js/options.js @@ -311,6 +311,13 @@ function profileOptionsCreateDefaults() { dictionaries: {}, + parsing: { + enableScanningParser: true, + enableMecabParser: false, + selectedParser: null, + readingMode: 'hiragana' + }, + anki: { enable: false, server: 'http://127.0.0.1:8765', diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js new file mode 100644 index 00000000..42e53989 --- /dev/null +++ b/ext/bg/js/search-query-parser.js @@ -0,0 +1,228 @@ +/* + * Copyright (C) 2019 Alex Yatskov <alex@foosoft.net> + * Author: Alex Yatskov <alex@foosoft.net> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +class QueryParser { + constructor(search) { + this.search = search; + this.pendingLookup = false; + this.clickScanPrevent = false; + + this.parseResults = []; + this.selectedParser = null; + + this.queryParser = document.querySelector('#query-parser'); + this.queryParserSelect = document.querySelector('#query-parser-select'); + + this.queryParser.addEventListener('mousedown', (e) => this.onMouseDown(e)); + this.queryParser.addEventListener('mouseup', (e) => this.onMouseUp(e)); + } + + onError(error) { + logError(error, false); + } + + onMouseDown(e) { + if (Frontend.isMouseButton('primary', e)) { + this.clickScanPrevent = false; + } + } + + onMouseUp(e) { + if ( + this.search.options.scanning.clickGlossary && + !this.clickScanPrevent && + Frontend.isMouseButton('primary', e) + ) { + const selectText = this.search.options.scanning.selectText; + this.onTermLookup(e, {disableScroll: true, selectText}); + } + } + + onMouseMove(e) { + if (this.pendingLookup || Frontend.isMouseButton('primary', e)) { + return; + } + + const scanningOptions = this.search.options.scanning; + const scanningModifier = scanningOptions.modifier; + if (!( + Frontend.isScanningModifierPressed(scanningModifier, e) || + (scanningOptions.middleMouse && Frontend.isMouseButton('auxiliary', e)) + )) { + return; + } + + const selectText = this.search.options.scanning.selectText; + this.onTermLookup(e, {disableScroll: true, disableHistory: true, selectText}); + } + + onMouseLeave(e) { + this.clickScanPrevent = true; + clearTimeout(e.target.dataset.timer); + delete e.target.dataset.timer; + } + + onTermLookup(e, params) { + this.pendingLookup = true; + (async () => { + await this.search.onTermLookup(e, params); + this.pendingLookup = false; + })(); + } + + onParserChange(e) { + const selectedParser = e.target.value; + this.selectedParser = selectedParser; + apiOptionsSet({parsing: {selectedParser}}, this.search.getOptionsContext()); + this.renderParseResult(this.getParseResult()); + } + + refreshSelectedParser() { + if (this.parseResults.length > 0) { + if (this.selectedParser === null) { + this.selectedParser = this.search.options.parsing.selectedParser; + } + if (this.selectedParser === null || !this.getParseResult()) { + const selectedParser = this.parseResults[0].id; + this.selectedParser = selectedParser; + apiOptionsSet({parsing: {selectedParser}}, this.search.getOptionsContext()); + } + } + } + + getParseResult() { + return this.parseResults.find(r => r.id === this.selectedParser); + } + + async setText(text) { + this.search.setSpinnerVisible(true); + + await this.setPreview(text); + + this.parseResults = await this.parseText(text); + this.refreshSelectedParser(); + + this.renderParserSelect(); + await this.renderParseResult(); + + this.search.setSpinnerVisible(false); + } + + async parseText(text) { + const results = []; + if (this.search.options.parsing.enableScanningParser) { + results.push({ + name: 'Scanning parser', + id: 'scan', + parsedText: await apiTextParse(text, this.search.getOptionsContext()) + }); + } + if (this.search.options.parsing.enableMecabParser) { + let mecabResults = await apiTextParseMecab(text, this.search.getOptionsContext()); + for (const mecabDictName in mecabResults) { + results.push({ + name: `MeCab: ${mecabDictName}`, + id: `mecab-${mecabDictName}`, + parsedText: mecabResults[mecabDictName] + }); + } + } + return results; + } + + async setPreview(text) { + const previewTerms = []; + while (text.length > 0) { + const tempText = text.slice(0, 2); + previewTerms.push([{text: Array.from(tempText)}]); + text = text.slice(2); + } + this.queryParser.innerHTML = await apiTemplateRender('query-parser.html', { + terms: previewTerms, + preview: true + }); + + for (const charElement of this.queryParser.querySelectorAll('.query-parser-char')) { + this.activateScanning(charElement); + } + } + + renderParserSelect() { + this.queryParserSelect.innerHTML = ''; + if (this.parseResults.length > 1) { + const select = document.createElement('select'); + select.classList.add('form-control'); + for (const parseResult of this.parseResults) { + const option = document.createElement('option'); + option.value = parseResult.id; + option.innerText = parseResult.name; + option.defaultSelected = this.selectedParser === parseResult.id; + select.appendChild(option); + } + select.addEventListener('change', this.onParserChange.bind(this)); + this.queryParserSelect.appendChild(select); + } + } + + async renderParseResult() { + const parseResult = this.getParseResult(); + if (!parseResult) { + this.queryParser.innerHTML = ''; + return; + } + + this.queryParser.innerHTML = await apiTemplateRender( + 'query-parser.html', + {terms: QueryParser.processParseResultForDisplay(parseResult.parsedText)} + ); + + for (const charElement of this.queryParser.querySelectorAll('.query-parser-char')) { + this.activateScanning(charElement); + } + } + + activateScanning(element) { + element.addEventListener('mousemove', (e) => { + clearTimeout(e.target.dataset.timer); + if (this.search.options.scanning.modifier === 'none') { + e.target.dataset.timer = setTimeout(() => { + this.onMouseMove(e); + delete e.target.dataset.timer; + }, this.search.options.scanning.delay); + } else { + this.onMouseMove(e); + } + }); + element.addEventListener('mouseleave', (e) => { + this.onMouseLeave(e); + }); + } + + static processParseResultForDisplay(result) { + return result.map((term) => { + return term.filter(part => part.text.trim()).map((part) => { + return { + text: Array.from(part.text), + reading: part.reading, + raw: !part.reading || !part.reading.trim(), + }; + }); + }); + } +} diff --git a/ext/bg/js/search.js b/ext/bg/js/search.js index ec5a5972..b4731e6a 100644 --- a/ext/bg/js/search.js +++ b/ext/bg/js/search.js @@ -32,6 +32,8 @@ class DisplaySearch extends Display { url: window.location.href }; + this.queryParser = new QueryParser(this); + this.search = document.querySelector('#search'); this.query = document.querySelector('#query'); this.intro = document.querySelector('#intro'); @@ -72,11 +74,11 @@ class DisplaySearch extends Display { const query = DisplaySearch.getSearchQueryFromLocation(window.location.href) || ''; if (e.target.checked) { window.wanakana.bind(this.query); - this.query.value = window.wanakana.toKana(query); + this.setQuery(window.wanakana.toKana(query)); apiOptionsSet({general: {enableWanakana: true}}, this.getOptionsContext()); } else { window.wanakana.unbind(this.query); - this.query.value = query; + this.setQuery(query); apiOptionsSet({general: {enableWanakana: false}}, this.getOptionsContext()); } this.onSearchQueryUpdated(this.query.value, false); @@ -86,9 +88,9 @@ class DisplaySearch extends Display { const query = DisplaySearch.getSearchQueryFromLocation(window.location.href); if (query !== null) { if (this.isWanakanaEnabled()) { - this.query.value = window.wanakana.toKana(query); + this.setQuery(window.wanakana.toKana(query)); } else { - this.query.value = query; + this.setQuery(query); } this.onSearchQueryUpdated(this.query.value, false); } @@ -159,6 +161,7 @@ class DisplaySearch extends Display { e.preventDefault(); const query = this.query.value; + this.queryParser.setText(query); const queryString = query.length > 0 ? `?query=${encodeURIComponent(query)}` : ''; window.history.pushState(null, '', `${window.location.pathname}${queryString}`); this.onSearchQueryUpdated(query, true); @@ -168,9 +171,9 @@ class DisplaySearch extends Display { const query = DisplaySearch.getSearchQueryFromLocation(window.location.href) || ''; if (this.query !== null) { if (this.isWanakanaEnabled()) { - this.query.value = window.wanakana.toKana(query); + this.setQuery(window.wanakana.toKana(query)); } else { - this.query.value = query; + this.setQuery(query); } } @@ -258,9 +261,9 @@ class DisplaySearch extends Display { } if (curText && (curText !== this.clipboardPrevText) && jpIsJapaneseText(curText)) { if (this.isWanakanaEnabled()) { - this.query.value = window.wanakana.toKana(curText); + this.setQuery(window.wanakana.toKana(curText)); } else { - this.query.value = curText; + this.setQuery(curText); } const queryString = curText.length > 0 ? `?query=${encodeURIComponent(curText)}` : ''; @@ -287,6 +290,11 @@ class DisplaySearch extends Display { return this.optionsContext; } + setQuery(query) { + this.query.value = query; + this.queryParser.setText(query); + } + setIntroVisible(visible, animate) { if (this.introVisible === visible) { return; diff --git a/ext/bg/js/settings.js b/ext/bg/js/settings.js index e562c54e..ab267c32 100644 --- a/ext/bg/js/settings.js +++ b/ext/bg/js/settings.js @@ -64,6 +64,10 @@ async function formRead(options) { options.scanning.modifier = $('#scan-modifier-key').val(); options.scanning.popupNestingMaxDepth = parseInt($('#popup-nesting-max-depth').val(), 10); + options.parsing.enableScanningParser = $('#parsing-scan-enable').prop('checked'); + options.parsing.enableMecabParser = $('#parsing-mecab-enable').prop('checked'); + options.parsing.readingMode = $('#parsing-reading-mode').val(); + const optionsAnkiEnableOld = options.anki.enable; options.anki.enable = $('#anki-enable').prop('checked'); options.anki.tags = utilBackgroundIsolate($('#card-tags').val().split(/[,; ]+/)); @@ -126,6 +130,10 @@ async function formWrite(options) { $('#scan-modifier-key').val(options.scanning.modifier); $('#popup-nesting-max-depth').val(options.scanning.popupNestingMaxDepth); + $('#parsing-scan-enable').prop('checked', options.parsing.enableScanningParser); + $('#parsing-mecab-enable').prop('checked', options.parsing.enableMecabParser); + $('#parsing-reading-mode').val(options.parsing.readingMode); + $('#anki-enable').prop('checked', options.anki.enable); $('#card-tags').val(options.anki.tags.join(' ')); $('#sentence-detection-extent').val(options.anki.sentenceExt); diff --git a/ext/bg/js/templates.js b/ext/bg/js/templates.js index 823b9e6f..6e377957 100644 --- a/ext/bg/js/templates.js +++ b/ext/bg/js/templates.js @@ -163,6 +163,58 @@ templates['kanji.html'] = template({"1":function(container,depth0,helpers,partia } ,"useDecorators":true,"usePartial":true,"useData":true,"useDepths":true}); +templates['query-parser.html'] = template({"1":function(container,depth0,helpers,partials,data) { + var stack1, alias1=depth0 != null ? depth0 : (container.nullContext || {}); + + return ((stack1 = helpers["if"].call(alias1,(depth0 != null ? depth0.preview : depth0),{"name":"if","hash":{},"fn":container.program(2, data, 0),"inverse":container.program(4, data, 0),"data":data})) != null ? stack1 : "") + + ((stack1 = helpers.each.call(alias1,depth0,{"name":"each","hash":{},"fn":container.program(6, data, 0),"inverse":container.noop,"data":data})) != null ? stack1 : "") + + "</span>"; +},"2":function(container,depth0,helpers,partials,data) { + return "<span class=\"query-parser-term-preview\">"; +},"4":function(container,depth0,helpers,partials,data) { + return "<span class=\"query-parser-term\">"; +},"6":function(container,depth0,helpers,partials,data) { + var stack1; + + return ((stack1 = container.invokePartial(partials.part,depth0,{"name":"part","data":data,"helpers":helpers,"partials":partials,"decorators":container.decorators})) != null ? stack1 : ""); +},"8":function(container,depth0,helpers,partials,data) { + var stack1; + + return ((stack1 = helpers["if"].call(depth0 != null ? depth0 : (container.nullContext || {}),(depth0 != null ? depth0.raw : depth0),{"name":"if","hash":{},"fn":container.program(9, data, 0),"inverse":container.program(12, data, 0),"data":data})) != null ? stack1 : ""); +},"9":function(container,depth0,helpers,partials,data) { + var stack1; + + return ((stack1 = helpers.each.call(depth0 != null ? depth0 : (container.nullContext || {}),(depth0 != null ? depth0.text : depth0),{"name":"each","hash":{},"fn":container.program(10, data, 0),"inverse":container.noop,"data":data})) != null ? stack1 : ""); +},"10":function(container,depth0,helpers,partials,data) { + return "<span class=\"query-parser-char\">" + + container.escapeExpression(container.lambda(depth0, depth0)) + + "</span>"; +},"12":function(container,depth0,helpers,partials,data) { + var stack1, helper, alias1=depth0 != null ? depth0 : (container.nullContext || {}); + + return "<ruby>" + + ((stack1 = helpers.each.call(alias1,(depth0 != null ? depth0.text : depth0),{"name":"each","hash":{},"fn":container.program(10, data, 0),"inverse":container.noop,"data":data})) != null ? stack1 : "") + + "<rt>" + + container.escapeExpression(((helper = (helper = helpers.reading || (depth0 != null ? depth0.reading : depth0)) != null ? helper : helpers.helperMissing),(typeof helper === "function" ? helper.call(alias1,{"name":"reading","hash":{},"data":data}) : helper))) + + "</rt></ruby>"; +},"14":function(container,depth0,helpers,partials,data,blockParams,depths) { + var stack1; + + return ((stack1 = container.invokePartial(partials.term,depth0,{"name":"term","hash":{"preview":(depths[1] != null ? depths[1].preview : depths[1])},"data":data,"helpers":helpers,"partials":partials,"decorators":container.decorators})) != null ? stack1 : ""); +},"compiler":[7,">= 4.0.0"],"main":function(container,depth0,helpers,partials,data,blockParams,depths) { + var stack1; + + return ((stack1 = helpers.each.call(depth0 != null ? depth0 : (container.nullContext || {}),(depth0 != null ? depth0.terms : depth0),{"name":"each","hash":{},"fn":container.program(14, data, 0, blockParams, depths),"inverse":container.noop,"data":data})) != null ? stack1 : ""); +},"main_d": function(fn, props, container, depth0, data, blockParams, depths) { + + var decorators = container.decorators; + + fn = decorators.inline(fn,props,container,{"name":"inline","hash":{},"fn":container.program(1, data, 0, blockParams, depths),"inverse":container.noop,"args":["term"],"data":data}) || fn; + fn = decorators.inline(fn,props,container,{"name":"inline","hash":{},"fn":container.program(8, data, 0, blockParams, depths),"inverse":container.noop,"args":["part"],"data":data}) || fn; + return fn; + } + +,"useDecorators":true,"usePartial":true,"useData":true,"useDepths":true}); templates['terms.html'] = template({"1":function(container,depth0,helpers,partials,data) { var stack1, helper, options, alias1=depth0 != null ? depth0 : (container.nullContext || {}), buffer = "<div class=\"dict-"; diff --git a/ext/bg/search.html b/ext/bg/search.html index 54c5fb6c..e819ebe6 100644 --- a/ext/bg/search.html +++ b/ext/bg/search.html @@ -47,6 +47,13 @@ <img src="/mixed/img/spinner.gif"> </div> + <div class="scan-disable"> + <div id="query-parser-select" class="input-group"></div> + <div id="query-parser"></div> + </div> + + <hr> + <div id="content"></div> </div> @@ -67,6 +74,7 @@ <script src="/mixed/js/japanese.js"></script> <script src="/mixed/js/scroll.js"></script> + <script src="/bg/js/search-query-parser.js"></script> <script src="/bg/js/search.js"></script> <script src="/bg/js/search-frontend.js"></script> </body> diff --git a/ext/bg/settings.html b/ext/bg/settings.html index f95e76a1..262386e9 100644 --- a/ext/bg/settings.html +++ b/ext/bg/settings.html @@ -410,6 +410,44 @@ </div> </div> + <div id="text-parsing"> + <h3>Text Parsing Options</h3> + + <p class="help-block"> + Yomichan can attempt to parse entire sentences or longer text blocks on the search page, + adding furigana above words and a small space between words. + </p> + + <p class="help-block"> + Two types of parsers are supported. The first one, enabled by default, works using the built-in + scanning functionality by automatically advancing in the sentence after a matching word. + </p> + + <p class="help-block"> + The second type is an external program called <a href="https://en.wikipedia.org/wiki/MeCab" target="_blank" rel="noopener">MeCab</a> + that uses its own dictionaries and a special parsing algorithm. To get it working, you must first + install it and <a href="https://github.com/siikamiika/yomichan-mecab-installer" target="_blank" rel="noopener">a native messaging component</a> + that acts as a bridge between the program and Yomichan. + </p> + + <div class="checkbox"> + <label><input type="checkbox" id="parsing-scan-enable"> Enable text parsing using installed dictionaries</label> + </div> + + <div class="checkbox"> + <label><input type="checkbox" id="parsing-mecab-enable"> Enable text parsing using MeCab</label> + </div> + + <div class="form-group"> + <label for="parsing-reading-mode">Reading mode</label> + <select class="form-control" id="parsing-reading-mode"> + <option value="hiragana">ひらがな</option> + <option value="katakana">カタカナ</option> + <option value="romaji">Romaji</option> + </select> + </div> + </div> + <div class="ignore-form-changes"> <div> <img src="/mixed/img/spinner.gif" class="pull-right" id="dict-spinner" alt> diff --git a/ext/fg/js/api.js b/ext/fg/js/api.js index 945ba076..92330d9c 100644 --- a/ext/fg/js/api.js +++ b/ext/fg/js/api.js @@ -29,6 +29,14 @@ function apiTermsFind(text, details, optionsContext) { return utilInvoke('termsFind', {text, details, optionsContext}); } +function apiTextParse(text, optionsContext) { + return utilInvoke('textParse', {text, optionsContext}); +} + +function apiTextParseMecab(text, optionsContext) { + return utilInvoke('textParseMecab', {text, optionsContext}); +} + function apiKanjiFind(text, optionsContext) { return utilInvoke('kanjiFind', {text, optionsContext}); } diff --git a/ext/manifest.json b/ext/manifest.json index fabceafd..4d75cd54 100644 --- a/ext/manifest.json +++ b/ext/manifest.json @@ -42,7 +42,8 @@ "<all_urls>", "storage", "clipboardWrite", - "unlimitedStorage" + "unlimitedStorage", + "nativeMessaging" ], "optional_permissions": [ "clipboardRead" diff --git a/ext/mixed/css/display.css b/ext/mixed/css/display.css index 7ee6f5ac..ba2fadb7 100644 --- a/ext/mixed/css/display.css +++ b/ext/mixed/css/display.css @@ -88,6 +88,19 @@ ol, ul { user-select: none; } +#query-parser { + margin-top: 10px; + font-size: 24px; +} + +.query-parser-term { + margin-right: 5px; +} + +html:root[data-yomichan-page=search] body { + overflow-y: scroll; /* always show scroll bar to avoid scanning problems */ +} + /* * Entries diff --git a/ext/mixed/js/display.js b/ext/mixed/js/display.js index 8ad3ee1b..4c698ecf 100644 --- a/ext/mixed/js/display.js +++ b/ext/mixed/js/display.js @@ -98,17 +98,62 @@ class Display { } } - async onTermLookup(e) { + async onTermLookup(e, {disableScroll, selectText, disableHistory}={}) { + const termLookupResults = await this.termLookup(e); + if (!termLookupResults) { + return false; + } + + try { + const {textSource, definitions} = termLookupResults; + + const scannedElement = e.target; + const sentence = docSentenceExtract(textSource, this.options.anki.sentenceExt); + + if (!disableScroll) { + this.windowScroll.toY(0); + } + let context; + if (disableHistory) { + const {url, source} = this.context || {}; + context = {sentence, url, source, disableScroll}; + } else { + context = { + disableScroll, + source: { + definitions: this.definitions, + index: this.entryIndexFind(scannedElement), + scroll: this.windowScroll.y + } + }; + + if (this.context) { + context.sentence = sentence; + context.url = this.context.url; + context.source.source = this.context.source; + } + } + + this.setContentTerms(definitions, context); + + if (selectText) { + textSource.select(); + } + } catch (error) { + this.onError(error); + } + } + + async termLookup(e) { try { e.preventDefault(); - const clickedElement = e.target; const textSource = docRangeFromPoint(e.clientX, e.clientY, this.options); if (textSource === null) { return false; } - let definitions, length, sentence; + let definitions, length; try { textSource.setEndOffset(this.options.scanning.length); @@ -118,28 +163,11 @@ class Display { } textSource.setEndOffset(length); - - sentence = docSentenceExtract(textSource, this.options.anki.sentenceExt); } finally { textSource.cleanup(); } - this.windowScroll.toY(0); - const context = { - source: { - definitions: this.definitions, - index: this.entryIndexFind(clickedElement), - scroll: this.windowScroll.y - } - }; - - if (this.context) { - context.sentence = sentence; - context.url = this.context.url; - context.source.source = this.context.source; - } - - this.setContentTerms(definitions, context); + return {textSource, definitions}; } catch (error) { this.onError(error); } @@ -336,8 +364,10 @@ class Display { const content = await apiTemplateRender('terms.html', params); this.container.innerHTML = content; - const {index, scroll} = context || {}; - this.entryScrollIntoView(index || 0, scroll); + const {index, scroll, disableScroll} = context || {}; + if (!disableScroll) { + this.entryScrollIntoView(index || 0, scroll); + } if (options.audio.enabled && options.audio.autoPlay) { this.autoPlayAudio(); diff --git a/ext/mixed/js/japanese.js b/ext/mixed/js/japanese.js index d24f56a6..a7cd0452 100644 --- a/ext/mixed/js/japanese.js +++ b/ext/mixed/js/japanese.js @@ -48,6 +48,43 @@ function jpKatakanaToHiragana(text) { return result; } +function jpHiraganaToKatakana(text) { + let result = ''; + for (const c of text) { + if (wanakana.isHiragana(c)) { + result += wanakana.toKatakana(c); + } else { + result += c; + } + } + + return result; +} + +function jpToRomaji(text) { + return wanakana.toRomaji(text); +} + +function jpConvertReading(expressionFragment, readingFragment, readingMode) { + switch (readingMode) { + case 'hiragana': + return jpKatakanaToHiragana(readingFragment || ''); + case 'katakana': + return jpHiraganaToKatakana(readingFragment || ''); + case 'romaji': + if (readingFragment) { + return jpToRomaji(readingFragment); + } else { + if (jpIsKana(expressionFragment)) { + return jpToRomaji(expressionFragment); + } + } + return readingFragment; + default: + return readingFragment; + } +} + function jpDistributeFurigana(expression, reading) { const fallback = [{furigana: reading, text: expression}]; if (!reading) { @@ -61,12 +98,11 @@ function jpDistributeFurigana(expression, reading) { const group = groups[0]; if (group.mode === 'kana') { - if (reading.startsWith(group.text)) { - const readingUsed = reading.substring(0, group.text.length); + if (jpKatakanaToHiragana(reading).startsWith(jpKatakanaToHiragana(group.text))) { const readingLeft = reading.substring(group.text.length); const segs = segmentize(readingLeft, groups.splice(1)); if (segs) { - return [{text: readingUsed}].concat(segs); + return [{text: group.text}].concat(segs); } } } else { @@ -95,3 +131,30 @@ function jpDistributeFurigana(expression, reading) { return segmentize(reading, groups) || fallback; } + +function jpDistributeFuriganaInflected(expression, reading, source) { + const output = []; + + let stemLength = 0; + const shortest = Math.min(source.length, expression.length); + const sourceHiragana = jpKatakanaToHiragana(source); + const expressionHiragana = jpKatakanaToHiragana(expression); + while (stemLength < shortest && sourceHiragana[stemLength] === expressionHiragana[stemLength]) { + ++stemLength; + } + const offset = source.length - stemLength; + + const stemExpression = source.slice(0, source.length - offset); + const stemReading = reading.slice( + 0, offset === 0 ? reading.length : reading.length - expression.length + stemLength + ); + for (const segment of jpDistributeFurigana(stemExpression, stemReading)) { + output.push(segment); + } + + if (stemLength !== source.length) { + output.push({text: source.slice(stemLength)}); + } + + return output; +} diff --git a/tmpl/query-parser.html b/tmpl/query-parser.html new file mode 100644 index 00000000..db98b5ff --- /dev/null +++ b/tmpl/query-parser.html @@ -0,0 +1,27 @@ +{{~#*inline "term"~}} +{{~#if preview~}} +<span class="query-parser-term-preview"> +{{~else~}} +<span class="query-parser-term"> +{{~/if~}} +{{~#each this~}} +{{> part }} +{{~/each~}} +</span> +{{~/inline~}} + +{{~#*inline "part"~}} +{{~#if raw~}} +{{~#each text~}} +<span class="query-parser-char">{{this}}</span> +{{~/each~}} +{{~else~}} +<ruby>{{~#each text~}} +<span class="query-parser-char">{{this}}</span> +{{~/each~}}<rt>{{reading}}</rt></ruby> +{{~/if~}} +{{~/inline~}} + +{{~#each terms~}} +{{> term preview=../preview }} +{{~/each~}} |