diff options
Diffstat (limited to 'ext/bg/js')
-rw-r--r-- | ext/bg/js/backend.js | 124 | ||||
-rw-r--r-- | ext/bg/js/database.js | 3 | ||||
-rw-r--r-- | ext/bg/js/japanese.js | 64 | ||||
-rw-r--r-- | ext/bg/js/mecab.js | 31 | ||||
-rw-r--r-- | ext/bg/js/options.js | 3 | ||||
-rw-r--r-- | ext/bg/js/search-frontend.js | 40 | ||||
-rw-r--r-- | ext/bg/js/search-query-parser-generator.js | 14 | ||||
-rw-r--r-- | ext/bg/js/search-query-parser.js | 31 | ||||
-rw-r--r-- | ext/bg/js/search.js | 2 | ||||
-rw-r--r-- | ext/bg/js/settings/main.js | 2 | ||||
-rw-r--r-- | ext/bg/js/translator.js | 17 |
11 files changed, 215 insertions, 116 deletions
diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index bc687a24..04a3b932 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -30,7 +30,6 @@ * Translator * conditionsTestValue * dictConfigured - * dictEnabledSet * dictTermsSort * handlebarsRenderDynamic * jp @@ -85,7 +84,6 @@ class Backend { ['kanjiFind', {handler: this._onApiKanjiFind.bind(this), async: true}], ['termsFind', {handler: this._onApiTermsFind.bind(this), async: true}], ['textParse', {handler: this._onApiTextParse.bind(this), async: true}], - ['textParseMecab', {handler: this._onApiTextParseMecab.bind(this), async: true}], ['definitionAdd', {handler: this._onApiDefinitionAdd.bind(this), async: true}], ['definitionsAddable', {handler: this._onApiDefinitionsAddable.bind(this), async: true}], ['noteView', {handler: this._onApiNoteView.bind(this), async: true}], @@ -321,6 +319,60 @@ class Backend { return await this.dictionaryImporter.import(this.database, archiveSource, onProgress, details); } + async _textParseScanning(text, options) { + const results = []; + while (text.length > 0) { + const term = []; + const [definitions, sourceLength] = await this.translator.findTerms( + 'simple', + text.substring(0, options.scanning.length), + {}, + options + ); + if (definitions.length > 0 && sourceLength > 0) { + dictTermsSort(definitions); + const {expression, reading} = definitions[0]; + const source = text.substring(0, sourceLength); + for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) { + const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); + term.push({text: text2, reading: reading2}); + } + text = text.substring(source.length); + } else { + const reading = jp.convertReading(text[0], '', options.parsing.readingMode); + term.push({text: text[0], reading}); + text = text.substring(1); + } + results.push(term); + } + return results; + } + + async _textParseMecab(text, options) { + const results = []; + const rawResults = await this.mecab.parseText(text); + for (const [mecabName, parsedLines] of Object.entries(rawResults)) { + const result = []; + for (const parsedLine of parsedLines) { + for (const {expression, reading, source} of parsedLine) { + const term = []; + for (const {text: text2, furigana} of jp.distributeFuriganaInflected( + expression.length > 0 ? expression : source, + jp.convertKatakanaToHiragana(reading), + source + )) { + const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); + term.push({text: text2, reading: reading2}); + } + result.push(term); + } + result.push([{text: '\n', reading: ''}]); + } + results.push([mecabName, result]); + } + return results; + } + // Message handlers _onApiYomichanCoreReady(_params, sender) { @@ -412,61 +464,27 @@ class Backend { async _onApiTextParse({text, optionsContext}) { const options = this.getOptions(optionsContext); const results = []; - while (text.length > 0) { - const term = []; - const [definitions, sourceLength] = await this.translator.findTerms( - 'simple', - text.substring(0, options.scanning.length), - {}, - options - ); - if (definitions.length > 0) { - dictTermsSort(definitions); - const {expression, reading} = definitions[0]; - const source = text.substring(0, sourceLength); - for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) { - const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); - term.push({text: text2, reading: reading2}); - } - text = text.substring(source.length); - } else { - const reading = jp.convertReading(text[0], null, options.parsing.readingMode); - term.push({text: text[0], reading}); - text = text.substring(1); - } - results.push(term); + + if (options.parsing.enableScanningParser) { + results.push({ + source: 'scanning-parser', + id: 'scan', + content: await this._textParseScanning(text, options) + }); } - return results; - } - async _onApiTextParseMecab({text, optionsContext}) { - const options = this.getOptions(optionsContext); - const results = []; - const rawResults = await this.mecab.parseText(text); - for (const [mecabName, parsedLines] of Object.entries(rawResults)) { - const result = []; - for (const parsedLine of parsedLines) { - for (const {expression, reading, source} of parsedLine) { - const term = []; - if (expression !== null && reading !== null) { - for (const {text: text2, furigana} of jp.distributeFuriganaInflected( - expression, - jp.convertKatakanaToHiragana(reading), - source - )) { - const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); - term.push({text: text2, reading: reading2}); - } - } else { - const reading2 = jp.convertReading(source, null, options.parsing.readingMode); - term.push({text: source, reading: reading2}); - } - result.push(term); - } - result.push([{text: '\n'}]); + if (options.parsing.enableMecabParser) { + const mecabResults = await this._textParseMecab(text, options); + for (const [mecabDictName, mecabDictResults] of mecabResults) { + results.push({ + source: 'mecab', + dictionary: mecabDictName, + id: `mecab-${mecabDictName}`, + content: mecabDictResults + }); } - results.push([mecabName, result]); } + return results; } diff --git a/ext/bg/js/database.js b/ext/bg/js/database.js index ad4e3bad..260c815a 100644 --- a/ext/bg/js/database.js +++ b/ext/bg/js/database.js @@ -16,10 +16,7 @@ */ /* global - * JSZip - * JsonSchema * dictFieldSplit - * requestJson */ class Database { diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js index 5c49cca7..ac81acb5 100644 --- a/ext/bg/js/japanese.js +++ b/ext/bg/js/japanese.js @@ -82,6 +82,9 @@ const ITERATION_MARK_CODE_POINT = 0x3005; + const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063; + const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3; + const KANA_PROLONGED_SOUND_MARK_CODE_POINT = 0x30fc; // Existing functions @@ -121,25 +124,25 @@ return wanakana.toRomaji(text); } - function convertReading(expressionFragment, readingFragment, readingMode) { + function convertReading(expression, reading, readingMode) { switch (readingMode) { case 'hiragana': - return convertKatakanaToHiragana(readingFragment || ''); + return convertKatakanaToHiragana(reading); case 'katakana': - return convertHiraganaToKatakana(readingFragment || ''); + return convertHiraganaToKatakana(reading); case 'romaji': - if (readingFragment) { - return convertToRomaji(readingFragment); + if (reading) { + return convertToRomaji(reading); } else { - if (isStringEntirelyKana(expressionFragment)) { - return convertToRomaji(expressionFragment); + if (isStringEntirelyKana(expression)) { + return convertToRomaji(expression); } } - return readingFragment; + return reading; case 'none': - return null; + return ''; default: - return readingFragment; + return reading; } } @@ -297,7 +300,7 @@ const readingLeft = reading2.substring(group.text.length); const segs = segmentize(readingLeft, groups.splice(1)); if (segs) { - return [{text: group.text}].concat(segs); + return [{text: group.text, furigana: ''}].concat(segs); } } } else { @@ -365,13 +368,47 @@ } if (stemLength !== source.length) { - output.push({text: source.substring(stemLength)}); + output.push({text: source.substring(stemLength), furigana: ''}); } return output; } + // Miscellaneous + + function collapseEmphaticSequences(text, fullCollapse, sourceMap=null) { + let result = ''; + let collapseCodePoint = -1; + const hasSourceMap = (sourceMap !== null); + for (const char of text) { + const c = char.codePointAt(0); + if ( + c === HIRAGANA_SMALL_TSU_CODE_POINT || + c === KATAKANA_SMALL_TSU_CODE_POINT || + c === KANA_PROLONGED_SOUND_MARK_CODE_POINT + ) { + if (collapseCodePoint !== c) { + collapseCodePoint = c; + if (!fullCollapse) { + result += char; + continue; + } + } + } else { + collapseCodePoint = -1; + result += char; + continue; + } + + if (hasSourceMap) { + sourceMap.combine(Math.max(0, result.length - 1), 1); + } + } + return result; + } + + // Exports Object.assign(jp, { @@ -383,6 +420,7 @@ convertHalfWidthKanaToFullWidth, convertAlphabeticToKana, distributeFurigana, - distributeFuriganaInflected + distributeFuriganaInflected, + collapseEmphaticSequences }); })(); diff --git a/ext/bg/js/mecab.js b/ext/bg/js/mecab.js index cd6e6c57..597dceae 100644 --- a/ext/bg/js/mecab.js +++ b/ext/bg/js/mecab.js @@ -40,7 +40,36 @@ class Mecab { } async parseText(text) { - return await this.invoke('parse_text', {text}); + const rawResults = await this.invoke('parse_text', {text}); + // { + // 'mecab-name': [ + // // line1 + // [ + // {str expression: 'expression', str reading: 'reading', str source: 'source'}, + // {str expression: 'expression2', str reading: 'reading2', str source: 'source2'} + // ], + // line2, + // ... + // ], + // 'mecab-name2': [...] + // } + const results = {}; + for (const [mecabName, parsedLines] of Object.entries(rawResults)) { + const result = []; + for (const parsedLine of parsedLines) { + const line = []; + for (const {expression, reading, source} of parsedLine) { + line.push({ + expression: expression || '', + reading: reading || '', + source: source || '' + }); + } + result.push(line); + } + results[mecabName] = result; + } + return results; } startListener() { diff --git a/ext/bg/js/options.js b/ext/bg/js/options.js index d36b2bab..da26b628 100644 --- a/ext/bg/js/options.js +++ b/ext/bg/js/options.js @@ -179,7 +179,8 @@ function profileOptionsCreateDefaults() { convertNumericCharacters: 'false', convertAlphabeticCharacters: 'false', convertHiraganaToKatakana: 'false', - convertKatakanaToHiragana: 'variant' + convertKatakanaToHiragana: 'variant', + collapseEmphaticSequences: 'false' }, dictionaries: {}, diff --git a/ext/bg/js/search-frontend.js b/ext/bg/js/search-frontend.js index 9cc1436f..e534e771 100644 --- a/ext/bg/js/search-frontend.js +++ b/ext/bg/js/search-frontend.js @@ -19,18 +19,7 @@ * apiOptionsGet */ -async function searchFrontendSetup() { - await yomichan.prepare(); - - const optionsContext = { - depth: 0, - url: window.location.href - }; - const options = await apiOptionsGet(optionsContext); - if (!options.scanning.enableOnSearchPage) { return; } - - window.frontendInitializationData = {depth: 1, proxy: false}; - +function injectSearchFrontend() { const scriptSrcs = [ '/mixed/js/text-scanner.js', '/fg/js/frontend-api-receiver.js', @@ -62,4 +51,29 @@ async function searchFrontendSetup() { } } -searchFrontendSetup(); +async function main() { + await yomichan.prepare(); + + let optionsApplied = false; + + const applyOptions = async () => { + const optionsContext = { + depth: 0, + url: window.location.href + }; + const options = await apiOptionsGet(optionsContext); + if (!options.scanning.enableOnSearchPage || optionsApplied) { return; } + optionsApplied = true; + + window.frontendInitializationData = {depth: 1, proxy: false, isSearchPage: true}; + injectSearchFrontend(); + + yomichan.off('optionsUpdated', applyOptions); + }; + + yomichan.on('optionsUpdated', applyOptions); + + await applyOptions(); +} + +main(); diff --git a/ext/bg/js/search-query-parser-generator.js b/ext/bg/js/search-query-parser-generator.js index 390841c1..9e7ff8aa 100644 --- a/ext/bg/js/search-query-parser-generator.js +++ b/ext/bg/js/search-query-parser-generator.js @@ -36,7 +36,7 @@ class QueryParserGenerator { const termContainer = this._templateHandler.instantiate(preview ? 'term-preview' : 'term'); for (const segment of term) { if (!segment.text.trim()) { continue; } - if (!segment.reading || !segment.reading.trim()) { + if (!segment.reading.trim()) { termContainer.appendChild(this.createSegmentText(segment.text)); } else { termContainer.appendChild(this.createSegment(segment)); @@ -71,7 +71,17 @@ class QueryParserGenerator { for (const parseResult of parseResults) { const optionContainer = this._templateHandler.instantiate('select-option'); optionContainer.value = parseResult.id; - optionContainer.textContent = parseResult.name; + switch (parseResult.source) { + case 'scanning-parser': + optionContainer.textContent = 'Scanning parser'; + break; + case 'mecab': + optionContainer.textContent = `MeCab: ${parseResult.dictionary}`; + break; + default: + optionContainer.textContent = 'Unrecognized dictionary'; + break; + } optionContainer.defaultSelected = selectedParser === parseResult.id; selectContainer.appendChild(optionContainer); } diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js index 01a0ace5..eb3b681c 100644 --- a/ext/bg/js/search-query-parser.js +++ b/ext/bg/js/search-query-parser.js @@ -21,13 +21,12 @@ * apiOptionsSet * apiTermsFind * apiTextParse - * apiTextParseMecab * docSentenceExtract */ class QueryParser extends TextScanner { constructor({getOptionsContext, setContent, setSpinnerVisible}) { - super(document.querySelector('#query-parser-content'), [], []); + super(document.querySelector('#query-parser-content'), () => [], []); this.getOptionsContext = getOptionsContext; this.setContent = setContent; @@ -128,7 +127,7 @@ class QueryParser extends TextScanner { this.setPreview(text); - this.parseResults = await this.parseText(text); + this.parseResults = await apiTextParse(text, this.getOptionsContext()); this.refreshSelectedParser(); this.renderParserSelect(); @@ -137,33 +136,11 @@ class QueryParser extends TextScanner { this.setSpinnerVisible(false); } - async parseText(text) { - const results = []; - if (this.options.parsing.enableScanningParser) { - results.push({ - name: 'Scanning parser', - id: 'scan', - parsedText: await apiTextParse(text, this.getOptionsContext()) - }); - } - if (this.options.parsing.enableMecabParser) { - const mecabResults = await apiTextParseMecab(text, this.getOptionsContext()); - for (const [mecabDictName, mecabDictResults] of mecabResults) { - results.push({ - name: `MeCab: ${mecabDictName}`, - id: `mecab-${mecabDictName}`, - parsedText: mecabDictResults - }); - } - } - return results; - } - setPreview(text) { const previewTerms = []; for (let i = 0, ii = text.length; i < ii; i += 2) { const tempText = text.substring(i, i + 2); - previewTerms.push([{text: tempText}]); + previewTerms.push([{text: tempText, reading: ''}]); } this.queryParser.textContent = ''; this.queryParser.appendChild(this.queryParserGenerator.createParseResult(previewTerms, true)); @@ -183,6 +160,6 @@ class QueryParser extends TextScanner { const parseResult = this.getParseResult(); this.queryParser.textContent = ''; if (!parseResult) { return; } - this.queryParser.appendChild(this.queryParserGenerator.createParseResult(parseResult.parsedText)); + this.queryParser.appendChild(this.queryParserGenerator.createParseResult(parseResult.content)); } } diff --git a/ext/bg/js/search.js b/ext/bg/js/search.js index 2ba3e468..871c576b 100644 --- a/ext/bg/js/search.js +++ b/ext/bg/js/search.js @@ -208,7 +208,7 @@ class DisplaySearch extends Display { onCopy() { // ignore copy from search page - this.clipboardMonitor.setPreviousText(document.getSelection().toString().trim()); + this.clipboardMonitor.setPreviousText(window.getSelection().toString().trim()); } onExternalSearchUpdate({text}) { diff --git a/ext/bg/js/settings/main.js b/ext/bg/js/settings/main.js index 8fd94562..308e92eb 100644 --- a/ext/bg/js/settings/main.js +++ b/ext/bg/js/settings/main.js @@ -118,6 +118,7 @@ async function formRead(options) { options.translation.convertAlphabeticCharacters = $('#translation-convert-alphabetic-characters').val(); options.translation.convertHiraganaToKatakana = $('#translation-convert-hiragana-to-katakana').val(); options.translation.convertKatakanaToHiragana = $('#translation-convert-katakana-to-hiragana').val(); + options.translation.collapseEmphaticSequences = $('#translation-collapse-emphatic-sequences').val(); options.parsing.enableScanningParser = $('#parsing-scan-enable').prop('checked'); options.parsing.enableMecabParser = $('#parsing-mecab-enable').prop('checked'); @@ -199,6 +200,7 @@ async function formWrite(options) { $('#translation-convert-alphabetic-characters').val(options.translation.convertAlphabeticCharacters); $('#translation-convert-hiragana-to-katakana').val(options.translation.convertHiraganaToKatakana); $('#translation-convert-katakana-to-hiragana').val(options.translation.convertKatakanaToHiragana); + $('#translation-collapse-emphatic-sequences').val(options.translation.collapseEmphaticSequences); $('#parsing-scan-enable').prop('checked', options.parsing.enableScanningParser); $('#parsing-mecab-enable').prop('checked', options.parsing.enableMecabParser); diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index e4441384..aaa1a0ec 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -347,17 +347,27 @@ class Translator { getAllDeinflections(text, options) { const translationOptions = options.translation; + const collapseEmphaticOptions = [[false, false]]; + switch (translationOptions.collapseEmphaticSequences) { + case 'true': + collapseEmphaticOptions.push([true, false]); + break; + case 'full': + collapseEmphaticOptions.push([true, false], [true, true]); + break; + } const textOptionVariantArray = [ Translator.getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters), Translator.getTextOptionEntryVariants(translationOptions.convertNumericCharacters), Translator.getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters), Translator.getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana), - Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana) + Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana), + collapseEmphaticOptions ]; const deinflections = []; const used = new Set(); - for (const [halfWidth, numeric, alphabetic, katakana, hiragana] of Translator.getArrayVariants(textOptionVariantArray)) { + for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of Translator.getArrayVariants(textOptionVariantArray)) { let text2 = text; const sourceMap = new TextSourceMap(text2); if (halfWidth) { @@ -375,6 +385,9 @@ class Translator { if (hiragana) { text2 = jp.convertKatakanaToHiragana(text2); } + if (collapseEmphatic) { + text2 = jp.collapseEmphaticSequences(text2, collapseEmphaticFull, sourceMap); + } for (let i = text2.length; i > 0; --i) { const text2Substring = text2.substring(0, i); |