summaryrefslogtreecommitdiff
path: root/ext/bg/js
diff options
context:
space:
mode:
Diffstat (limited to 'ext/bg/js')
-rw-r--r--ext/bg/js/backend.js124
-rw-r--r--ext/bg/js/database.js3
-rw-r--r--ext/bg/js/japanese.js64
-rw-r--r--ext/bg/js/mecab.js31
-rw-r--r--ext/bg/js/options.js3
-rw-r--r--ext/bg/js/search-frontend.js40
-rw-r--r--ext/bg/js/search-query-parser-generator.js14
-rw-r--r--ext/bg/js/search-query-parser.js31
-rw-r--r--ext/bg/js/search.js2
-rw-r--r--ext/bg/js/settings/main.js2
-rw-r--r--ext/bg/js/translator.js17
11 files changed, 215 insertions, 116 deletions
diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js
index bc687a24..04a3b932 100644
--- a/ext/bg/js/backend.js
+++ b/ext/bg/js/backend.js
@@ -30,7 +30,6 @@
* Translator
* conditionsTestValue
* dictConfigured
- * dictEnabledSet
* dictTermsSort
* handlebarsRenderDynamic
* jp
@@ -85,7 +84,6 @@ class Backend {
['kanjiFind', {handler: this._onApiKanjiFind.bind(this), async: true}],
['termsFind', {handler: this._onApiTermsFind.bind(this), async: true}],
['textParse', {handler: this._onApiTextParse.bind(this), async: true}],
- ['textParseMecab', {handler: this._onApiTextParseMecab.bind(this), async: true}],
['definitionAdd', {handler: this._onApiDefinitionAdd.bind(this), async: true}],
['definitionsAddable', {handler: this._onApiDefinitionsAddable.bind(this), async: true}],
['noteView', {handler: this._onApiNoteView.bind(this), async: true}],
@@ -321,6 +319,60 @@ class Backend {
return await this.dictionaryImporter.import(this.database, archiveSource, onProgress, details);
}
+ async _textParseScanning(text, options) {
+ const results = [];
+ while (text.length > 0) {
+ const term = [];
+ const [definitions, sourceLength] = await this.translator.findTerms(
+ 'simple',
+ text.substring(0, options.scanning.length),
+ {},
+ options
+ );
+ if (definitions.length > 0 && sourceLength > 0) {
+ dictTermsSort(definitions);
+ const {expression, reading} = definitions[0];
+ const source = text.substring(0, sourceLength);
+ for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) {
+ const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode);
+ term.push({text: text2, reading: reading2});
+ }
+ text = text.substring(source.length);
+ } else {
+ const reading = jp.convertReading(text[0], '', options.parsing.readingMode);
+ term.push({text: text[0], reading});
+ text = text.substring(1);
+ }
+ results.push(term);
+ }
+ return results;
+ }
+
+ async _textParseMecab(text, options) {
+ const results = [];
+ const rawResults = await this.mecab.parseText(text);
+ for (const [mecabName, parsedLines] of Object.entries(rawResults)) {
+ const result = [];
+ for (const parsedLine of parsedLines) {
+ for (const {expression, reading, source} of parsedLine) {
+ const term = [];
+ for (const {text: text2, furigana} of jp.distributeFuriganaInflected(
+ expression.length > 0 ? expression : source,
+ jp.convertKatakanaToHiragana(reading),
+ source
+ )) {
+ const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode);
+ term.push({text: text2, reading: reading2});
+ }
+ result.push(term);
+ }
+ result.push([{text: '\n', reading: ''}]);
+ }
+ results.push([mecabName, result]);
+ }
+ return results;
+ }
+
// Message handlers
_onApiYomichanCoreReady(_params, sender) {
@@ -412,61 +464,27 @@ class Backend {
async _onApiTextParse({text, optionsContext}) {
const options = this.getOptions(optionsContext);
const results = [];
- while (text.length > 0) {
- const term = [];
- const [definitions, sourceLength] = await this.translator.findTerms(
- 'simple',
- text.substring(0, options.scanning.length),
- {},
- options
- );
- if (definitions.length > 0) {
- dictTermsSort(definitions);
- const {expression, reading} = definitions[0];
- const source = text.substring(0, sourceLength);
- for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) {
- const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode);
- term.push({text: text2, reading: reading2});
- }
- text = text.substring(source.length);
- } else {
- const reading = jp.convertReading(text[0], null, options.parsing.readingMode);
- term.push({text: text[0], reading});
- text = text.substring(1);
- }
- results.push(term);
+
+ if (options.parsing.enableScanningParser) {
+ results.push({
+ source: 'scanning-parser',
+ id: 'scan',
+ content: await this._textParseScanning(text, options)
+ });
}
- return results;
- }
- async _onApiTextParseMecab({text, optionsContext}) {
- const options = this.getOptions(optionsContext);
- const results = [];
- const rawResults = await this.mecab.parseText(text);
- for (const [mecabName, parsedLines] of Object.entries(rawResults)) {
- const result = [];
- for (const parsedLine of parsedLines) {
- for (const {expression, reading, source} of parsedLine) {
- const term = [];
- if (expression !== null && reading !== null) {
- for (const {text: text2, furigana} of jp.distributeFuriganaInflected(
- expression,
- jp.convertKatakanaToHiragana(reading),
- source
- )) {
- const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode);
- term.push({text: text2, reading: reading2});
- }
- } else {
- const reading2 = jp.convertReading(source, null, options.parsing.readingMode);
- term.push({text: source, reading: reading2});
- }
- result.push(term);
- }
- result.push([{text: '\n'}]);
+ if (options.parsing.enableMecabParser) {
+ const mecabResults = await this._textParseMecab(text, options);
+ for (const [mecabDictName, mecabDictResults] of mecabResults) {
+ results.push({
+ source: 'mecab',
+ dictionary: mecabDictName,
+ id: `mecab-${mecabDictName}`,
+ content: mecabDictResults
+ });
}
- results.push([mecabName, result]);
}
+
return results;
}
diff --git a/ext/bg/js/database.js b/ext/bg/js/database.js
index ad4e3bad..260c815a 100644
--- a/ext/bg/js/database.js
+++ b/ext/bg/js/database.js
@@ -16,10 +16,7 @@
*/
/* global
- * JSZip
- * JsonSchema
* dictFieldSplit
- * requestJson
*/
class Database {
diff --git a/ext/bg/js/japanese.js b/ext/bg/js/japanese.js
index 5c49cca7..ac81acb5 100644
--- a/ext/bg/js/japanese.js
+++ b/ext/bg/js/japanese.js
@@ -82,6 +82,9 @@
const ITERATION_MARK_CODE_POINT = 0x3005;
+ const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;
+ const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;
+ const KANA_PROLONGED_SOUND_MARK_CODE_POINT = 0x30fc;
// Existing functions
@@ -121,25 +124,25 @@
return wanakana.toRomaji(text);
}
- function convertReading(expressionFragment, readingFragment, readingMode) {
+ function convertReading(expression, reading, readingMode) {
switch (readingMode) {
case 'hiragana':
- return convertKatakanaToHiragana(readingFragment || '');
+ return convertKatakanaToHiragana(reading);
case 'katakana':
- return convertHiraganaToKatakana(readingFragment || '');
+ return convertHiraganaToKatakana(reading);
case 'romaji':
- if (readingFragment) {
- return convertToRomaji(readingFragment);
+ if (reading) {
+ return convertToRomaji(reading);
} else {
- if (isStringEntirelyKana(expressionFragment)) {
- return convertToRomaji(expressionFragment);
+ if (isStringEntirelyKana(expression)) {
+ return convertToRomaji(expression);
}
}
- return readingFragment;
+ return reading;
case 'none':
- return null;
+ return '';
default:
- return readingFragment;
+ return reading;
}
}
@@ -297,7 +300,7 @@
const readingLeft = reading2.substring(group.text.length);
const segs = segmentize(readingLeft, groups.splice(1));
if (segs) {
- return [{text: group.text}].concat(segs);
+ return [{text: group.text, furigana: ''}].concat(segs);
}
}
} else {
@@ -365,13 +368,47 @@
}
if (stemLength !== source.length) {
- output.push({text: source.substring(stemLength)});
+ output.push({text: source.substring(stemLength), furigana: ''});
}
return output;
}
+ // Miscellaneous
+
+ function collapseEmphaticSequences(text, fullCollapse, sourceMap=null) {
+ let result = '';
+ let collapseCodePoint = -1;
+ const hasSourceMap = (sourceMap !== null);
+ for (const char of text) {
+ const c = char.codePointAt(0);
+ if (
+ c === HIRAGANA_SMALL_TSU_CODE_POINT ||
+ c === KATAKANA_SMALL_TSU_CODE_POINT ||
+ c === KANA_PROLONGED_SOUND_MARK_CODE_POINT
+ ) {
+ if (collapseCodePoint !== c) {
+ collapseCodePoint = c;
+ if (!fullCollapse) {
+ result += char;
+ continue;
+ }
+ }
+ } else {
+ collapseCodePoint = -1;
+ result += char;
+ continue;
+ }
+
+ if (hasSourceMap) {
+ sourceMap.combine(Math.max(0, result.length - 1), 1);
+ }
+ }
+ return result;
+ }
+
+
// Exports
Object.assign(jp, {
@@ -383,6 +420,7 @@
convertHalfWidthKanaToFullWidth,
convertAlphabeticToKana,
distributeFurigana,
- distributeFuriganaInflected
+ distributeFuriganaInflected,
+ collapseEmphaticSequences
});
})();
diff --git a/ext/bg/js/mecab.js b/ext/bg/js/mecab.js
index cd6e6c57..597dceae 100644
--- a/ext/bg/js/mecab.js
+++ b/ext/bg/js/mecab.js
@@ -40,7 +40,36 @@ class Mecab {
}
async parseText(text) {
- return await this.invoke('parse_text', {text});
+ const rawResults = await this.invoke('parse_text', {text});
+ // {
+ // 'mecab-name': [
+ // // line1
+ // [
+ // {str expression: 'expression', str reading: 'reading', str source: 'source'},
+ // {str expression: 'expression2', str reading: 'reading2', str source: 'source2'}
+ // ],
+ // line2,
+ // ...
+ // ],
+ // 'mecab-name2': [...]
+ // }
+ const results = {};
+ for (const [mecabName, parsedLines] of Object.entries(rawResults)) {
+ const result = [];
+ for (const parsedLine of parsedLines) {
+ const line = [];
+ for (const {expression, reading, source} of parsedLine) {
+ line.push({
+ expression: expression || '',
+ reading: reading || '',
+ source: source || ''
+ });
+ }
+ result.push(line);
+ }
+ results[mecabName] = result;
+ }
+ return results;
}
startListener() {
diff --git a/ext/bg/js/options.js b/ext/bg/js/options.js
index d36b2bab..da26b628 100644
--- a/ext/bg/js/options.js
+++ b/ext/bg/js/options.js
@@ -179,7 +179,8 @@ function profileOptionsCreateDefaults() {
convertNumericCharacters: 'false',
convertAlphabeticCharacters: 'false',
convertHiraganaToKatakana: 'false',
- convertKatakanaToHiragana: 'variant'
+ convertKatakanaToHiragana: 'variant',
+ collapseEmphaticSequences: 'false'
},
dictionaries: {},
diff --git a/ext/bg/js/search-frontend.js b/ext/bg/js/search-frontend.js
index 9cc1436f..e534e771 100644
--- a/ext/bg/js/search-frontend.js
+++ b/ext/bg/js/search-frontend.js
@@ -19,18 +19,7 @@
* apiOptionsGet
*/
-async function searchFrontendSetup() {
- await yomichan.prepare();
-
- const optionsContext = {
- depth: 0,
- url: window.location.href
- };
- const options = await apiOptionsGet(optionsContext);
- if (!options.scanning.enableOnSearchPage) { return; }
-
- window.frontendInitializationData = {depth: 1, proxy: false};
-
+function injectSearchFrontend() {
const scriptSrcs = [
'/mixed/js/text-scanner.js',
'/fg/js/frontend-api-receiver.js',
@@ -62,4 +51,29 @@ async function searchFrontendSetup() {
}
}
-searchFrontendSetup();
+async function main() {
+ await yomichan.prepare();
+
+ let optionsApplied = false;
+
+ const applyOptions = async () => {
+ const optionsContext = {
+ depth: 0,
+ url: window.location.href
+ };
+ const options = await apiOptionsGet(optionsContext);
+ if (!options.scanning.enableOnSearchPage || optionsApplied) { return; }
+ optionsApplied = true;
+
+ window.frontendInitializationData = {depth: 1, proxy: false, isSearchPage: true};
+ injectSearchFrontend();
+
+ yomichan.off('optionsUpdated', applyOptions);
+ };
+
+ yomichan.on('optionsUpdated', applyOptions);
+
+ await applyOptions();
+}
+
+main();
diff --git a/ext/bg/js/search-query-parser-generator.js b/ext/bg/js/search-query-parser-generator.js
index 390841c1..9e7ff8aa 100644
--- a/ext/bg/js/search-query-parser-generator.js
+++ b/ext/bg/js/search-query-parser-generator.js
@@ -36,7 +36,7 @@ class QueryParserGenerator {
const termContainer = this._templateHandler.instantiate(preview ? 'term-preview' : 'term');
for (const segment of term) {
if (!segment.text.trim()) { continue; }
- if (!segment.reading || !segment.reading.trim()) {
+ if (!segment.reading.trim()) {
termContainer.appendChild(this.createSegmentText(segment.text));
} else {
termContainer.appendChild(this.createSegment(segment));
@@ -71,7 +71,17 @@ class QueryParserGenerator {
for (const parseResult of parseResults) {
const optionContainer = this._templateHandler.instantiate('select-option');
optionContainer.value = parseResult.id;
- optionContainer.textContent = parseResult.name;
+ switch (parseResult.source) {
+ case 'scanning-parser':
+ optionContainer.textContent = 'Scanning parser';
+ break;
+ case 'mecab':
+ optionContainer.textContent = `MeCab: ${parseResult.dictionary}`;
+ break;
+ default:
+ optionContainer.textContent = 'Unrecognized dictionary';
+ break;
+ }
optionContainer.defaultSelected = selectedParser === parseResult.id;
selectContainer.appendChild(optionContainer);
}
diff --git a/ext/bg/js/search-query-parser.js b/ext/bg/js/search-query-parser.js
index 01a0ace5..eb3b681c 100644
--- a/ext/bg/js/search-query-parser.js
+++ b/ext/bg/js/search-query-parser.js
@@ -21,13 +21,12 @@
* apiOptionsSet
* apiTermsFind
* apiTextParse
- * apiTextParseMecab
* docSentenceExtract
*/
class QueryParser extends TextScanner {
constructor({getOptionsContext, setContent, setSpinnerVisible}) {
- super(document.querySelector('#query-parser-content'), [], []);
+ super(document.querySelector('#query-parser-content'), () => [], []);
this.getOptionsContext = getOptionsContext;
this.setContent = setContent;
@@ -128,7 +127,7 @@ class QueryParser extends TextScanner {
this.setPreview(text);
- this.parseResults = await this.parseText(text);
+ this.parseResults = await apiTextParse(text, this.getOptionsContext());
this.refreshSelectedParser();
this.renderParserSelect();
@@ -137,33 +136,11 @@ class QueryParser extends TextScanner {
this.setSpinnerVisible(false);
}
- async parseText(text) {
- const results = [];
- if (this.options.parsing.enableScanningParser) {
- results.push({
- name: 'Scanning parser',
- id: 'scan',
- parsedText: await apiTextParse(text, this.getOptionsContext())
- });
- }
- if (this.options.parsing.enableMecabParser) {
- const mecabResults = await apiTextParseMecab(text, this.getOptionsContext());
- for (const [mecabDictName, mecabDictResults] of mecabResults) {
- results.push({
- name: `MeCab: ${mecabDictName}`,
- id: `mecab-${mecabDictName}`,
- parsedText: mecabDictResults
- });
- }
- }
- return results;
- }
-
setPreview(text) {
const previewTerms = [];
for (let i = 0, ii = text.length; i < ii; i += 2) {
const tempText = text.substring(i, i + 2);
- previewTerms.push([{text: tempText}]);
+ previewTerms.push([{text: tempText, reading: ''}]);
}
this.queryParser.textContent = '';
this.queryParser.appendChild(this.queryParserGenerator.createParseResult(previewTerms, true));
@@ -183,6 +160,6 @@ class QueryParser extends TextScanner {
const parseResult = this.getParseResult();
this.queryParser.textContent = '';
if (!parseResult) { return; }
- this.queryParser.appendChild(this.queryParserGenerator.createParseResult(parseResult.parsedText));
+ this.queryParser.appendChild(this.queryParserGenerator.createParseResult(parseResult.content));
}
}
diff --git a/ext/bg/js/search.js b/ext/bg/js/search.js
index 2ba3e468..871c576b 100644
--- a/ext/bg/js/search.js
+++ b/ext/bg/js/search.js
@@ -208,7 +208,7 @@ class DisplaySearch extends Display {
onCopy() {
// ignore copy from search page
- this.clipboardMonitor.setPreviousText(document.getSelection().toString().trim());
+ this.clipboardMonitor.setPreviousText(window.getSelection().toString().trim());
}
onExternalSearchUpdate({text}) {
diff --git a/ext/bg/js/settings/main.js b/ext/bg/js/settings/main.js
index 8fd94562..308e92eb 100644
--- a/ext/bg/js/settings/main.js
+++ b/ext/bg/js/settings/main.js
@@ -118,6 +118,7 @@ async function formRead(options) {
options.translation.convertAlphabeticCharacters = $('#translation-convert-alphabetic-characters').val();
options.translation.convertHiraganaToKatakana = $('#translation-convert-hiragana-to-katakana').val();
options.translation.convertKatakanaToHiragana = $('#translation-convert-katakana-to-hiragana').val();
+ options.translation.collapseEmphaticSequences = $('#translation-collapse-emphatic-sequences').val();
options.parsing.enableScanningParser = $('#parsing-scan-enable').prop('checked');
options.parsing.enableMecabParser = $('#parsing-mecab-enable').prop('checked');
@@ -199,6 +200,7 @@ async function formWrite(options) {
$('#translation-convert-alphabetic-characters').val(options.translation.convertAlphabeticCharacters);
$('#translation-convert-hiragana-to-katakana').val(options.translation.convertHiraganaToKatakana);
$('#translation-convert-katakana-to-hiragana').val(options.translation.convertKatakanaToHiragana);
+ $('#translation-collapse-emphatic-sequences').val(options.translation.collapseEmphaticSequences);
$('#parsing-scan-enable').prop('checked', options.parsing.enableScanningParser);
$('#parsing-mecab-enable').prop('checked', options.parsing.enableMecabParser);
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js
index e4441384..aaa1a0ec 100644
--- a/ext/bg/js/translator.js
+++ b/ext/bg/js/translator.js
@@ -347,17 +347,27 @@ class Translator {
getAllDeinflections(text, options) {
const translationOptions = options.translation;
+ const collapseEmphaticOptions = [[false, false]];
+ switch (translationOptions.collapseEmphaticSequences) {
+ case 'true':
+ collapseEmphaticOptions.push([true, false]);
+ break;
+ case 'full':
+ collapseEmphaticOptions.push([true, false], [true, true]);
+ break;
+ }
const textOptionVariantArray = [
Translator.getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters),
Translator.getTextOptionEntryVariants(translationOptions.convertNumericCharacters),
Translator.getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters),
Translator.getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana),
- Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana)
+ Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana),
+ collapseEmphaticOptions
];
const deinflections = [];
const used = new Set();
- for (const [halfWidth, numeric, alphabetic, katakana, hiragana] of Translator.getArrayVariants(textOptionVariantArray)) {
+ for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of Translator.getArrayVariants(textOptionVariantArray)) {
let text2 = text;
const sourceMap = new TextSourceMap(text2);
if (halfWidth) {
@@ -375,6 +385,9 @@ class Translator {
if (hiragana) {
text2 = jp.convertKatakanaToHiragana(text2);
}
+ if (collapseEmphatic) {
+ text2 = jp.collapseEmphaticSequences(text2, collapseEmphaticFull, sourceMap);
+ }
for (let i = text2.length; i > 0; --i) {
const text2Substring = text2.substring(0, i);