From c613321a735dbf333491ab199b808f30852a8daa Mon Sep 17 00:00:00 2001 From: siikamiika Date: Sun, 12 Apr 2020 03:53:24 +0300 Subject: move QueryParser.parseText to Backend --- ext/bg/js/backend.js | 128 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 76 insertions(+), 52 deletions(-) (limited to 'ext/bg/js/backend.js') diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index be8ea322..65e00f28 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -85,7 +85,6 @@ class Backend { ['kanjiFind', {handler: this._onApiKanjiFind.bind(this), async: true}], ['termsFind', {handler: this._onApiTermsFind.bind(this), async: true}], ['textParse', {handler: this._onApiTextParse.bind(this), async: true}], - ['textParseMecab', {handler: this._onApiTextParseMecab.bind(this), async: true}], ['definitionAdd', {handler: this._onApiDefinitionAdd.bind(this), async: true}], ['definitionsAddable', {handler: this._onApiDefinitionsAddable.bind(this), async: true}], ['noteView', {handler: this._onApiNoteView.bind(this), async: true}], @@ -315,6 +314,65 @@ class Backend { return await this.dictionaryImporter.import(this.database, archiveSource, onProgress, details); } + async _textParseScanning(text, options) { + const results = []; + while (text.length > 0) { + const term = []; + const [definitions, sourceLength] = await this.translator.findTerms( + 'simple', + text.substring(0, options.scanning.length), + {}, + options + ); + if (definitions.length > 0) { + dictTermsSort(definitions); + const {expression, reading} = definitions[0]; + const source = text.substring(0, sourceLength); + for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) { + const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); + term.push({text: text2, reading: reading2}); + } + text = text.substring(source.length); + } else { + const reading = jp.convertReading(text[0], null, options.parsing.readingMode); + term.push({text: text[0], reading}); + text = text.substring(1); + } + results.push(term); + } + return results; + } + + async _textParseMecab(text, options) { + const results = []; + const rawResults = await this.mecab.parseText(text); + for (const [mecabName, parsedLines] of Object.entries(rawResults)) { + const result = []; + for (const parsedLine of parsedLines) { + for (const {expression, reading, source} of parsedLine) { + const term = []; + if (expression !== null && reading !== null) { + for (const {text: text2, furigana} of jp.distributeFuriganaInflected( + expression, + jp.convertKatakanaToHiragana(reading), + source + )) { + const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); + term.push({text: text2, reading: reading2}); + } + } else { + const reading2 = jp.convertReading(source, null, options.parsing.readingMode); + term.push({text: source, reading: reading2}); + } + result.push(term); + } + result.push([{text: '\n'}]); + } + results.push([mecabName, result]); + } + return results; + } + // Message handlers _onApiYomichanCoreReady(_params, sender) { @@ -406,61 +464,27 @@ class Backend { async _onApiTextParse({text, optionsContext}) { const options = this.getOptions(optionsContext); const results = []; - while (text.length > 0) { - const term = []; - const [definitions, sourceLength] = await this.translator.findTerms( - 'simple', - text.substring(0, options.scanning.length), - {}, - options - ); - if (definitions.length > 0) { - dictTermsSort(definitions); - const {expression, reading} = definitions[0]; - const source = text.substring(0, sourceLength); - for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) { - const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); - term.push({text: text2, reading: reading2}); - } - text = text.substring(source.length); - } else { - const reading = jp.convertReading(text[0], null, options.parsing.readingMode); - term.push({text: text[0], reading}); - text = text.substring(1); - } - results.push(term); + + if (options.parsing.enableScanningParser) { + results.push({ + source: 'scanning-parser', + id: 'scan', + content: await this._textParseScanning(text, options) + }); } - return results; - } - async _onApiTextParseMecab({text, optionsContext}) { - const options = this.getOptions(optionsContext); - const results = []; - const rawResults = await this.mecab.parseText(text); - for (const [mecabName, parsedLines] of Object.entries(rawResults)) { - const result = []; - for (const parsedLine of parsedLines) { - for (const {expression, reading, source} of parsedLine) { - const term = []; - if (expression !== null && reading !== null) { - for (const {text: text2, furigana} of jp.distributeFuriganaInflected( - expression, - jp.convertKatakanaToHiragana(reading), - source - )) { - const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); - term.push({text: text2, reading: reading2}); - } - } else { - const reading2 = jp.convertReading(source, null, options.parsing.readingMode); - term.push({text: source, reading: reading2}); - } - result.push(term); - } - result.push([{text: '\n'}]); + if (options.parsing.enableMecabParser) { + const mecabResults = await this._textParseMecab(text, options); + for (const [mecabDictName, mecabDictResults] of mecabResults) { + results.push({ + source: 'mecab', + dictionary: mecabDictName, + id: `mecab-${mecabDictName}`, + content: mecabDictResults + }); } - results.push([mecabName, result]); } + return results; } -- cgit v1.2.3 From f93dc857107e9b23ec06f1b568aad2c6f870ba4c Mon Sep 17 00:00:00 2001 From: siikamiika Date: Mon, 13 Apr 2020 22:55:33 +0300 Subject: assume and propagate strings for text parsing --- ext/bg/js/backend.js | 26 ++++++------- ext/bg/js/japanese.js | 10 ++--- ext/bg/js/search-query-parser-generator.js | 2 +- ext/bg/js/search-query-parser.js | 2 +- test/test-japanese.js | 60 +++++++++--------------------- 5 files changed, 36 insertions(+), 64 deletions(-) (limited to 'ext/bg/js/backend.js') diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index 65e00f28..d5086af2 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -334,7 +334,7 @@ class Backend { } text = text.substring(source.length); } else { - const reading = jp.convertReading(text[0], null, options.parsing.readingMode); + const reading = jp.convertReading(text[0], '', options.parsing.readingMode); term.push({text: text[0], reading}); text = text.substring(1); } @@ -349,24 +349,20 @@ class Backend { for (const [mecabName, parsedLines] of Object.entries(rawResults)) { const result = []; for (const parsedLine of parsedLines) { - for (const {expression, reading, source} of parsedLine) { + for (let {expression, reading, source} of parsedLine) { const term = []; - if (expression !== null && reading !== null) { - for (const {text: text2, furigana} of jp.distributeFuriganaInflected( - expression, - jp.convertKatakanaToHiragana(reading), - source - )) { - const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); - term.push({text: text2, reading: reading2}); - } - } else { - const reading2 = jp.convertReading(source, null, options.parsing.readingMode); - term.push({text: source, reading: reading2}); + if (expression === '') { expression = source; } + for (const {text: text2, furigana} of jp.distributeFuriganaInflected( + expression, + jp.convertKatakanaToHiragana(reading), + source + )) { + const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); + term.push({text: text2, reading: reading2}); } result.push(term); } - result.push([{text: '\n'}]); + result.push([{text: '\n', reading: ''}]); } results.push([mecabName, result]); } diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index 5fef27a7..c74e4553 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -127,9 +127,9 @@ function convertReading(expressionFragment, readingFragment, readingMode) { switch (readingMode) { case 'hiragana': - return convertKatakanaToHiragana(readingFragment || ''); + return convertKatakanaToHiragana(readingFragment); case 'katakana': - return convertHiraganaToKatakana(readingFragment || ''); + return convertHiraganaToKatakana(readingFragment); case 'romaji': if (readingFragment) { return convertToRomaji(readingFragment); @@ -140,7 +140,7 @@ } return readingFragment; case 'none': - return null; + return ''; default: return readingFragment; } @@ -300,7 +300,7 @@ const readingLeft = reading2.substring(group.text.length); const segs = segmentize(readingLeft, groups.splice(1)); if (segs) { - return [{text: group.text}].concat(segs); + return [{text: group.text, furigana: ''}].concat(segs); } } } else { @@ -368,7 +368,7 @@ } if (stemLength !== source.length) { - output.push({text: source.substring(stemLength)}); + output.push({text: source.substring(stemLength), furigana: ''}); } return output; diff --git a/ext/bg/js/search-query-parser-generator.js b/ext/bg/js/search-query-parser-generator.js index d44829f7..527302ed 100644 --- a/ext/bg/js/search-query-parser-generator.js +++ b/ext/bg/js/search-query-parser-generator.js @@ -36,7 +36,7 @@ class QueryParserGenerator { const termContainer = this._templateHandler.instantiate(preview ? 'term-preview' : 'term'); for (const segment of term) { if (!segment.text.trim()) { continue; } - if (!segment.reading || !segment.reading.trim()) { + if (!segment.reading.trim()) { termContainer.appendChild(this.createSegmentText(segment.text)); } else { termContainer.appendChild(this.createSegment(segment)); diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js index da61b045..692fb1a8 100644 --- a/ext/bg/js/search-query-parser.js +++ b/ext/bg/js/search-query-parser.js @@ -140,7 +140,7 @@ class QueryParser extends TextScanner { const previewTerms = []; for (let i = 0, ii = text.length; i < ii; i += 2) { const tempText = text.substring(i, i + 2); - previewTerms.push([{text: tempText}]); + previewTerms.push([{text: tempText, reading: ''}]); } this.queryParser.textContent = ''; this.queryParser.appendChild(this.queryParserGenerator.createParseResult(previewTerms, true)); diff --git a/test/test-japanese.js b/test/test-japanese.js index 89e41c36..7fd71ba8 100644 --- a/test/test-japanese.js +++ b/test/test-japanese.js @@ -176,19 +176,19 @@ function testConvertReading() { [['アリガトウ', 'アリガトウ', 'hiragana'], 'ありがとう'], [['アリガトウ', 'アリガトウ', 'katakana'], 'アリガトウ'], [['アリガトウ', 'アリガトウ', 'romaji'], 'arigatou'], - [['アリガトウ', 'アリガトウ', 'none'], null], + [['アリガトウ', 'アリガトウ', 'none'], ''], [['アリガトウ', 'アリガトウ', 'default'], 'アリガトウ'], [['ありがとう', 'ありがとう', 'hiragana'], 'ありがとう'], [['ありがとう', 'ありがとう', 'katakana'], 'アリガトウ'], [['ありがとう', 'ありがとう', 'romaji'], 'arigatou'], - [['ありがとう', 'ありがとう', 'none'], null], + [['ありがとう', 'ありがとう', 'none'], ''], [['ありがとう', 'ありがとう', 'default'], 'ありがとう'], [['有り難う', 'ありがとう', 'hiragana'], 'ありがとう'], [['有り難う', 'ありがとう', 'katakana'], 'アリガトウ'], [['有り難う', 'ありがとう', 'romaji'], 'arigatou'], - [['有り難う', 'ありがとう', 'none'], null], + [['有り難う', 'ありがとう', 'none'], ''], [['有り難う', 'ありがとう', 'default'], 'ありがとう'], // Cases with falsy readings @@ -196,40 +196,16 @@ function testConvertReading() { [['ありがとう', '', 'hiragana'], ''], [['ありがとう', '', 'katakana'], ''], [['ありがとう', '', 'romaji'], 'arigatou'], - [['ありがとう', '', 'none'], null], + [['ありがとう', '', 'none'], ''], [['ありがとう', '', 'default'], ''], - [['ありがとう', null, 'hiragana'], ''], - [['ありがとう', null, 'katakana'], ''], - [['ありがとう', null, 'romaji'], 'arigatou'], - [['ありがとう', null, 'none'], null], - [['ありがとう', null, 'default'], null], - - [['ありがとう', void 0, 'hiragana'], ''], - [['ありがとう', void 0, 'katakana'], ''], - [['ありがとう', void 0, 'romaji'], 'arigatou'], - [['ありがとう', void 0, 'none'], null], - [['ありがとう', void 0, 'default'], void 0], - // Cases with falsy readings and kanji expressions [['有り難う', '', 'hiragana'], ''], [['有り難う', '', 'katakana'], ''], [['有り難う', '', 'romaji'], ''], - [['有り難う', '', 'none'], null], - [['有り難う', '', 'default'], ''], - - [['有り難う', null, 'hiragana'], ''], - [['有り難う', null, 'katakana'], ''], - [['有り難う', null, 'romaji'], null], - [['有り難う', null, 'none'], null], - [['有り難う', null, 'default'], null], - - [['有り難う', void 0, 'hiragana'], ''], - [['有り難う', void 0, 'katakana'], ''], - [['有り難う', void 0, 'romaji'], void 0], - [['有り難う', void 0, 'none'], null], - [['有り難う', void 0, 'default'], void 0] + [['有り難う', '', 'none'], ''], + [['有り難う', '', 'default'], ''] ]; for (const [[expressionFragment, readingFragment, readingMode], expected] of data) { @@ -303,9 +279,9 @@ function testDistributeFurigana() { ['有り難う', 'ありがとう'], [ {text: '有', furigana: 'あ'}, - {text: 'り'}, + {text: 'り', furigana: ''}, {text: '難', furigana: 'がと'}, - {text: 'う'} + {text: 'う', furigana: ''} ] ], [ @@ -317,23 +293,23 @@ function testDistributeFurigana() { [ ['お祝い', 'おいわい'], [ - {text: 'お'}, + {text: 'お', furigana: ''}, {text: '祝', furigana: 'いわ'}, - {text: 'い'} + {text: 'い', furigana: ''} ] ], [ ['美味しい', 'おいしい'], [ {text: '美味', furigana: 'おい'}, - {text: 'しい'} + {text: 'しい', furigana: ''} ] ], [ ['食べ物', 'たべもの'], [ {text: '食', furigana: 'た'}, - {text: 'べ'}, + {text: 'べ', furigana: ''}, {text: '物', furigana: 'もの'} ] ], @@ -341,9 +317,9 @@ function testDistributeFurigana() { ['試し切り', 'ためしぎり'], [ {text: '試', furigana: 'ため'}, - {text: 'し'}, + {text: 'し', furigana: ''}, {text: '切', furigana: 'ぎ'}, - {text: 'り'} + {text: 'り', furigana: ''} ] ], // Ambiguous @@ -373,16 +349,16 @@ function testDistributeFuriganaInflected() { ['美味しい', 'おいしい', '美味しかた'], [ {text: '美味', furigana: 'おい'}, - {text: 'し'}, - {text: 'かた'} + {text: 'し', furigana: ''}, + {text: 'かた', furigana: ''} ] ], [ ['食べる', 'たべる', '食べた'], [ {text: '食', furigana: 'た'}, - {text: 'べ'}, - {text: 'た'} + {text: 'べ', furigana: ''}, + {text: 'た', furigana: ''} ] ] ]; -- cgit v1.2.3 From 3fa8b8e1866933fdfccd0598a11e443fc1693840 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Tue, 14 Apr 2020 21:21:52 +0300 Subject: prevent infinite loop if source length is 0 --- ext/bg/js/backend.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'ext/bg/js/backend.js') diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index d5086af2..960c65bf 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -324,7 +324,7 @@ class Backend { {}, options ); - if (definitions.length > 0) { + if (definitions.length > 0 && sourceLength > 0) { dictTermsSort(definitions); const {expression, reading} = definitions[0]; const source = text.substring(0, sourceLength); -- cgit v1.2.3 From 619df42aedaa8da97d0a16d539b7211349143a0a Mon Sep 17 00:00:00 2001 From: siikamiika Date: Tue, 14 Apr 2020 21:25:02 +0300 Subject: simplify to enable constant usage --- ext/bg/js/backend.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'ext/bg/js/backend.js') diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index 960c65bf..f8b0fc96 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -349,11 +349,10 @@ class Backend { for (const [mecabName, parsedLines] of Object.entries(rawResults)) { const result = []; for (const parsedLine of parsedLines) { - for (let {expression, reading, source} of parsedLine) { + for (const {expression, reading, source} of parsedLine) { const term = []; - if (expression === '') { expression = source; } for (const {text: text2, furigana} of jp.distributeFuriganaInflected( - expression, + expression.length > 0 ? expression : source, jp.convertKatakanaToHiragana(reading), source )) { -- cgit v1.2.3