aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2024-01-28 07:22:47 -0500
committerGitHub <noreply@github.com>2024-01-28 12:22:47 +0000
commitacc013a1a8051d34322f0f5f91d7bdecc0a18843 (patch)
tree601344b2f047f395548ddfb16a83319af10464f9
parenta51f1ab2dc675a49bfeeb08cc24b97eb8d888e4a (diff)
JapaneseUtil refactor (#555)
* Copy functions from JapaneseUtil * Remove JapaneseUtil * Update usages of JapaneseUtil functions
-rw-r--r--.eslintrc.json4
-rw-r--r--ext/js/background/backend.js21
-rw-r--r--ext/js/background/offscreen.js5
-rw-r--r--ext/js/comm/clipboard-monitor.js9
-rw-r--r--ext/js/data/anki-note-builder.js12
-rw-r--r--ext/js/data/sandbox/anki-note-data-creator.js12
-rw-r--r--ext/js/display/display-anki.js5
-rw-r--r--ext/js/display/display-generator.js26
-rw-r--r--ext/js/display/display.js14
-rw-r--r--ext/js/display/popup-main.js7
-rw-r--r--ext/js/display/query-parser.js40
-rw-r--r--ext/js/display/sandbox/pronunciation-generator.js24
-rw-r--r--ext/js/display/sandbox/structured-content-generator.js9
-rw-r--r--ext/js/display/search-display-controller.js4
-rw-r--r--ext/js/display/search-main.js10
-rw-r--r--ext/js/language/japanese-wanakana.js122
-rw-r--r--ext/js/language/japanese.js740
-rw-r--r--ext/js/language/sandbox/japanese-util.js885
-rw-r--r--ext/js/language/translator.js22
-rw-r--r--ext/js/media/audio-downloader.js9
-rw-r--r--ext/js/pages/settings/anki-templates-controller.js3
-rw-r--r--ext/js/templates/sandbox/anki-template-renderer.js26
-rw-r--r--test/fixtures/translator-test.js6
-rw-r--r--test/japanese-util.test.js12
-rw-r--r--test/utilities/anki.js4
-rw-r--r--types/ext/display.d.ts3
-rw-r--r--types/ext/translator.d.ts3
27 files changed, 980 insertions, 1057 deletions
diff --git a/.eslintrc.json b/.eslintrc.json
index d4bb3d23..eec25b5c 100644
--- a/.eslintrc.json
+++ b/.eslintrc.json
@@ -613,7 +613,9 @@
"files": [
"ext/js/core.js",
"ext/js/core/extension-error.js",
- "ext/js/**/sandbox/**/*.js"
+ "ext/js/**/sandbox/**/*.js",
+ "ext/js/language/japanese.js",
+ "ext/js/language/japanese-wanakana.js"
],
"env": {
"webextensions": false
diff --git a/ext/js/background/backend.js b/ext/js/background/backend.js
index b61f27b1..74c1370c 100644
--- a/ext/js/background/backend.js
+++ b/ext/js/background/backend.js
@@ -16,7 +16,6 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
-import * as wanakana from '../../lib/wanakana.js';
import {AccessibilityController} from '../accessibility/accessibility-controller.js';
import {AnkiConnect} from '../comm/anki-connect.js';
import {ClipboardMonitor} from '../comm/clipboard-monitor.js';
@@ -34,7 +33,7 @@ import {ArrayBufferUtil} from '../data/sandbox/array-buffer-util.js';
import {DictionaryDatabase} from '../dictionary/dictionary-database.js';
import {Environment} from '../extension/environment.js';
import {ObjectPropertyAccessor} from '../general/object-property-accessor.js';
-import {JapaneseUtil} from '../language/sandbox/japanese-util.js';
+import {distributeFuriganaInflected, isCodePointJapanese, isStringPartiallyJapanese, convertKatakanaToHiragana as jpConvertKatakanaToHiragana} from '../language/japanese.js';
import {Translator} from '../language/translator.js';
import {AudioDownloader} from '../media/audio-downloader.js';
import {MediaUtil} from '../media/media-util.js';
@@ -54,8 +53,6 @@ export class Backend {
constructor(webExtension) {
/** @type {import('../extension/web-extension.js').WebExtension} */
this._webExtension = webExtension;
- /** @type {JapaneseUtil} */
- this._japaneseUtil = new JapaneseUtil(wanakana);
/** @type {Environment} */
this._environment = new Environment();
/** @type {AnkiConnect} */
@@ -70,7 +67,6 @@ export class Backend {
this._dictionaryDatabase = new DictionaryDatabase();
/** @type {Translator|TranslatorProxy} */
this._translator = new Translator({
- japaneseUtil: this._japaneseUtil,
database: this._dictionaryDatabase
});
/** @type {ClipboardReader|ClipboardReaderProxy} */
@@ -93,7 +89,6 @@ export class Backend {
/** @type {ClipboardMonitor} */
this._clipboardMonitor = new ClipboardMonitor({
- japaneseUtil: this._japaneseUtil,
clipboardReader: this._clipboardReader
});
/** @type {?import('settings').Options} */
@@ -108,7 +103,6 @@ export class Backend {
this._requestBuilder = new RequestBuilder();
/** @type {AudioDownloader} */
this._audioDownloader = new AudioDownloader({
- japaneseUtil: this._japaneseUtil,
requestBuilder: this._requestBuilder
});
/** @type {OptionsUtil} */
@@ -852,7 +846,7 @@ export class Backend {
/** @type {import('api').ApiHandler<'textHasJapaneseCharacters'>} */
_onApiTextHasJapaneseCharacters({text}) {
- return this._japaneseUtil.isStringPartiallyJapanese(text);
+ return isStringPartiallyJapanese(text);
}
/** @type {import('api').ApiHandler<'getTermFrequencies'>} */
@@ -1376,7 +1370,6 @@ export class Backend {
* @returns {Promise<import('api').ParseTextLine[]>}
*/
async _textParseScanning(text, scanLength, optionsContext) {
- const jp = this._japaneseUtil;
/** @type {import('translator').FindTermsMode} */
const mode = 'simple';
const options = this._getProfileOptions(optionsContext, false);
@@ -1398,13 +1391,13 @@ export class Backend {
if (
dictionaryEntries.length > 0 &&
originalTextLength > 0 &&
- (originalTextLength !== character.length || jp.isCodePointJapanese(codePoint))
+ (originalTextLength !== character.length || isCodePointJapanese(codePoint))
) {
previousUngroupedSegment = null;
const {headwords: [{term, reading}]} = dictionaryEntries[0];
const source = text.substring(i, i + originalTextLength);
const textSegments = [];
- for (const {text: text2, reading: reading2} of jp.distributeFuriganaInflected(term, reading, source)) {
+ for (const {text: text2, reading: reading2} of distributeFuriganaInflected(term, reading, source)) {
textSegments.push({text: text2, reading: reading2});
}
results.push(textSegments);
@@ -1427,8 +1420,6 @@ export class Backend {
* @returns {Promise<import('backend').MecabParseResults>}
*/
async _textParseMecab(text) {
- const jp = this._japaneseUtil;
-
let parseTextResults;
try {
parseTextResults = await this._mecab.parseText(text);
@@ -1444,9 +1435,9 @@ export class Backend {
for (const line of lines) {
for (const {term, reading, source} of line) {
const termParts = [];
- for (const {text: text2, reading: reading2} of jp.distributeFuriganaInflected(
+ for (const {text: text2, reading: reading2} of distributeFuriganaInflected(
term.length > 0 ? term : source,
- jp.convertKatakanaToHiragana(reading),
+ jpConvertKatakanaToHiragana(reading),
source
)) {
termParts.push({text: text2, reading: reading2});
diff --git a/ext/js/background/offscreen.js b/ext/js/background/offscreen.js
index 470ea0e2..a0f5592e 100644
--- a/ext/js/background/offscreen.js
+++ b/ext/js/background/offscreen.js
@@ -16,12 +16,10 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
-import * as wanakana from '../../lib/wanakana.js';
import {ClipboardReader} from '../comm/clipboard-reader.js';
import {createApiMap, invokeApiMapHandler} from '../core/api-map.js';
import {ArrayBufferUtil} from '../data/sandbox/array-buffer-util.js';
import {DictionaryDatabase} from '../dictionary/dictionary-database.js';
-import {JapaneseUtil} from '../language/sandbox/japanese-util.js';
import {Translator} from '../language/translator.js';
/**
@@ -33,13 +31,10 @@ export class Offscreen {
* Creates a new instance.
*/
constructor() {
- /** @type {JapaneseUtil} */
- this._japaneseUtil = new JapaneseUtil(wanakana);
/** @type {DictionaryDatabase} */
this._dictionaryDatabase = new DictionaryDatabase();
/** @type {Translator} */
this._translator = new Translator({
- japaneseUtil: this._japaneseUtil,
database: this._dictionaryDatabase
});
/** @type {ClipboardReader} */
diff --git a/ext/js/comm/clipboard-monitor.js b/ext/js/comm/clipboard-monitor.js
index a1ea3362..a8e79a1b 100644
--- a/ext/js/comm/clipboard-monitor.js
+++ b/ext/js/comm/clipboard-monitor.js
@@ -17,18 +17,17 @@
*/
import {EventDispatcher} from '../core/event-dispatcher.js';
+import {isStringPartiallyJapanese} from '../language/japanese.js';
/**
* @augments EventDispatcher<import('clipboard-monitor').Events>
*/
export class ClipboardMonitor extends EventDispatcher {
/**
- * @param {{japaneseUtil: import('../language/sandbox/japanese-util.js').JapaneseUtil, clipboardReader: import('clipboard-monitor').ClipboardReaderLike}} details
+ * @param {{clipboardReader: import('clipboard-monitor').ClipboardReaderLike}} details
*/
- constructor({japaneseUtil, clipboardReader}) {
+ constructor({clipboardReader}) {
super();
- /** @type {import('../language/sandbox/japanese-util.js').JapaneseUtil} */
- this._japaneseUtil = japaneseUtil;
/** @type {import('clipboard-monitor').ClipboardReaderLike} */
this._clipboardReader = clipboardReader;
/** @type {?import('core').Timeout} */
@@ -72,7 +71,7 @@ export class ClipboardMonitor extends EventDispatcher {
text !== this._previousText
) {
this._previousText = text;
- if (canChange && this._japaneseUtil.isStringPartiallyJapanese(text)) {
+ if (canChange && isStringPartiallyJapanese(text)) {
this.trigger('change', {text});
}
}
diff --git a/ext/js/data/anki-note-builder.js b/ext/js/data/anki-note-builder.js
index 48564d54..815e7f3f 100644
--- a/ext/js/data/anki-note-builder.js
+++ b/ext/js/data/anki-note-builder.js
@@ -16,20 +16,18 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
-import {deferPromise} from '../core/utilities.js';
import {ExtensionError} from '../core/extension-error.js';
+import {deferPromise} from '../core/utilities.js';
+import {convertHiraganaToKatakana, convertKatakanaToHiragana} from '../language/japanese.js';
import {yomitan} from '../yomitan.js';
import {AnkiUtil} from './anki-util.js';
export class AnkiNoteBuilder {
/**
* Initiate an instance of AnkiNoteBuilder.
- * @param {import('../language/sandbox/japanese-util.js').JapaneseUtil} japaneseUtil
* @param {import('../templates/template-renderer-proxy.js').TemplateRendererProxy|import('../templates/sandbox/template-renderer.js').TemplateRenderer} templateRenderer
*/
- constructor(japaneseUtil, templateRenderer) {
- /** @type {import('../language/sandbox/japanese-util.js').JapaneseUtil} */
- this._japaneseUtil = japaneseUtil;
+ constructor(templateRenderer) {
/** @type {RegExp} */
this._markerPattern = AnkiUtil.cloneFieldMarkerPattern(true);
/** @type {import('../templates/template-renderer-proxy.js').TemplateRendererProxy|import('../templates/sandbox/template-renderer.js').TemplateRenderer} */
@@ -530,9 +528,9 @@ export class AnkiNoteBuilder {
_convertReading(reading, readingMode) {
switch (readingMode) {
case 'hiragana':
- return this._japaneseUtil.convertKatakanaToHiragana(reading);
+ return convertKatakanaToHiragana(reading);
case 'katakana':
- return this._japaneseUtil.convertHiraganaToKatakana(reading);
+ return convertHiraganaToKatakana(reading);
default:
return reading;
}
diff --git a/ext/js/data/sandbox/anki-note-data-creator.js b/ext/js/data/sandbox/anki-note-data-creator.js
index 5a608cd2..fc787a66 100644
--- a/ext/js/data/sandbox/anki-note-data-creator.js
+++ b/ext/js/data/sandbox/anki-note-data-creator.js
@@ -17,6 +17,7 @@
*/
import {DictionaryDataUtil} from '../../dictionary/dictionary-data-util.js';
+import {distributeFurigana} from '../../language/japanese.js';
/**
* This class is used to convert the internal dictionary entry format to the
@@ -24,15 +25,6 @@ import {DictionaryDataUtil} from '../../dictionary/dictionary-data-util.js';
*/
export class AnkiNoteDataCreator {
/**
- * Creates a new instance.
- * @param {import('../../language/sandbox/japanese-util.js').JapaneseUtil} japaneseUtil An instance of `JapaneseUtil`.
- */
- constructor(japaneseUtil) {
- /** @type {import('../../language/sandbox/japanese-util.js').JapaneseUtil} */
- this._japaneseUtil = japaneseUtil;
- }
-
- /**
* Creates a compatibility representation of the specified data.
* @param {string} marker The marker that is being used for template rendering.
* @param {import('anki-templates-internal').CreateDetails} details Information which is used to generate the data.
@@ -860,7 +852,7 @@ export class AnkiNoteDataCreator {
_getTermHeadwordFuriganaSegments(term, reading) {
/** @type {import('anki-templates').FuriganaSegment[]} */
const result = [];
- for (const {text, reading: reading2} of this._japaneseUtil.distributeFurigana(term, reading)) {
+ for (const {text, reading: reading2} of distributeFurigana(term, reading)) {
result.push({text, furigana: reading2});
}
return result;
diff --git a/ext/js/display/display-anki.js b/ext/js/display/display-anki.js
index c51ddfa2..5433142d 100644
--- a/ext/js/display/display-anki.js
+++ b/ext/js/display/display-anki.js
@@ -30,9 +30,8 @@ export class DisplayAnki {
/**
* @param {import('./display.js').Display} display
* @param {import('./display-audio.js').DisplayAudio} displayAudio
- * @param {import('../language/sandbox/japanese-util.js').JapaneseUtil} japaneseUtil
*/
- constructor(display, displayAudio, japaneseUtil) {
+ constructor(display, displayAudio) {
/** @type {import('./display.js').Display} */
this._display = display;
/** @type {import('./display-audio.js').DisplayAudio} */
@@ -42,7 +41,7 @@ export class DisplayAnki {
/** @type {?string} */
this._ankiFieldTemplatesDefault = null;
/** @type {AnkiNoteBuilder} */
- this._ankiNoteBuilder = new AnkiNoteBuilder(japaneseUtil, new TemplateRendererProxy());
+ this._ankiNoteBuilder = new AnkiNoteBuilder(new TemplateRendererProxy());
/** @type {?import('./display-notification.js').DisplayNotification} */
this._errorNotification = null;
/** @type {?EventListenerCollection} */
diff --git a/ext/js/display/display-generator.js b/ext/js/display/display-generator.js
index 7bf13b77..eef58bb0 100644
--- a/ext/js/display/display-generator.js
+++ b/ext/js/display/display-generator.js
@@ -16,10 +16,11 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
-import {isObject} from '../core/utilities.js';
import {ExtensionError} from '../core/extension-error.js';
+import {isObject} from '../core/utilities.js';
import {DictionaryDataUtil} from '../dictionary/dictionary-data-util.js';
import {HtmlTemplateCollection} from '../dom/html-template-collection.js';
+import {distributeFurigana, getKanaMorae, getPitchCategory, isCodePointKanji, isStringPartiallyJapanese} from '../language/japanese.js';
import {yomitan} from '../yomitan.js';
import {PronunciationGenerator} from './sandbox/pronunciation-generator.js';
import {StructuredContentGenerator} from './sandbox/structured-content-generator.js';
@@ -28,9 +29,7 @@ export class DisplayGenerator {
/**
* @param {import('display').DisplayGeneratorConstructorDetails} details
*/
- constructor({japaneseUtil, contentManager, hotkeyHelpController = null}) {
- /** @type {import('../language/sandbox/japanese-util.js').JapaneseUtil} */
- this._japaneseUtil = japaneseUtil;
+ constructor({contentManager, hotkeyHelpController = null}) {
/** @type {import('./display-content-manager.js').DisplayContentManager} */
this._contentManager = contentManager;
/** @type {?import('../input/hotkey-help-controller.js').HotkeyHelpController} */
@@ -38,9 +37,9 @@ export class DisplayGenerator {
/** @type {HtmlTemplateCollection} */
this._templates = new HtmlTemplateCollection();
/** @type {StructuredContentGenerator} */
- this._structuredContentGenerator = new StructuredContentGenerator(this._contentManager, japaneseUtil, document);
+ this._structuredContentGenerator = new StructuredContentGenerator(this._contentManager, document);
/** @type {PronunciationGenerator} */
- this._pronunciationGenerator = new PronunciationGenerator(japaneseUtil);
+ this._pronunciationGenerator = new PronunciationGenerator();
}
/** */
@@ -725,11 +724,9 @@ export class DisplayGenerator {
* @returns {HTMLElement}
*/
_createPronunciationPitchAccent(pitchAccent, details) {
- const jp = this._japaneseUtil;
-
const {position, nasalPositions, devoicePositions, tags} = pitchAccent;
const {reading, exclusiveTerms, exclusiveReadings} = details;
- const morae = jp.getKanaMorae(reading);
+ const morae = getKanaMorae(reading);
const node = this._instantiate('pronunciation');
@@ -912,10 +909,9 @@ export class DisplayGenerator {
* @param {string} text
*/
_appendKanjiLinks(container, text) {
- const jp = this._japaneseUtil;
let part = '';
for (const c of text) {
- if (jp.isCodePointKanji(/** @type {number} */ (c.codePointAt(0)))) {
+ if (isCodePointKanji(/** @type {number} */ (c.codePointAt(0)))) {
if (part.length > 0) {
container.appendChild(document.createTextNode(part));
part = '';
@@ -969,7 +965,7 @@ export class DisplayGenerator {
*/
_appendFurigana(container, term, reading, addText) {
container.lang = 'ja';
- const segments = this._japaneseUtil.distributeFurigana(term, reading);
+ const segments = distributeFurigana(term, reading);
for (const {text, reading: furigana} of segments) {
if (furigana) {
const ruby = document.createElement('ruby');
@@ -1000,7 +996,7 @@ export class DisplayGenerator {
_setTextContent(node, value, language) {
if (typeof language === 'string') {
node.lang = language;
- } else if (this._japaneseUtil.isStringPartiallyJapanese(value)) {
+ } else if (isStringPartiallyJapanese(value)) {
node.lang = 'ja';
}
@@ -1017,7 +1013,7 @@ export class DisplayGenerator {
// cause the text to not copy correctly.
if (typeof language === 'string') {
node.lang = language;
- } else if (this._japaneseUtil.isStringPartiallyJapanese(value)) {
+ } else if (isStringPartiallyJapanese(value)) {
node.lang = 'ja';
}
@@ -1051,7 +1047,7 @@ export class DisplayGenerator {
if (termPronunciation.headwordIndex !== headwordIndex) { continue; }
for (const pronunciation of termPronunciation.pronunciations) {
if (pronunciation.type !== 'pitch-accent') { continue; }
- const category = this._japaneseUtil.getPitchCategory(reading, pronunciation.position, isVerbOrAdjective);
+ const category = getPitchCategory(reading, pronunciation.position, isVerbOrAdjective);
if (category !== null) {
categories.add(category);
}
diff --git a/ext/js/display/display.js b/ext/js/display/display.js
index 689481f4..cff87309 100644
--- a/ext/js/display/display.js
+++ b/ext/js/display/display.js
@@ -48,11 +48,10 @@ export class Display extends EventDispatcher {
* @param {number|undefined} tabId
* @param {number|undefined} frameId
* @param {import('display').DisplayPageType} pageType
- * @param {import('../language/sandbox/japanese-util.js').JapaneseUtil} japaneseUtil
* @param {import('../dom/document-focus-controller.js').DocumentFocusController} documentFocusController
* @param {import('../input/hotkey-handler.js').HotkeyHandler} hotkeyHandler
*/
- constructor(tabId, frameId, pageType, japaneseUtil, documentFocusController, hotkeyHandler) {
+ constructor(tabId, frameId, pageType, documentFocusController, hotkeyHandler) {
super();
/** @type {number|undefined} */
this._tabId = tabId;
@@ -60,8 +59,6 @@ export class Display extends EventDispatcher {
this._frameId = frameId;
/** @type {import('display').DisplayPageType} */
this._pageType = pageType;
- /** @type {import('../language/sandbox/japanese-util.js').JapaneseUtil} */
- this._japaneseUtil = japaneseUtil;
/** @type {import('../dom/document-focus-controller.js').DocumentFocusController} */
this._documentFocusController = documentFocusController;
/** @type {import('../input/hotkey-handler.js').HotkeyHandler} */
@@ -90,7 +87,6 @@ export class Display extends EventDispatcher {
this._hotkeyHelpController = new HotkeyHelpController();
/** @type {DisplayGenerator} */
this._displayGenerator = new DisplayGenerator({
- japaneseUtil,
contentManager: this._contentManager,
hotkeyHelpController: this._hotkeyHelpController
});
@@ -132,8 +128,7 @@ export class Display extends EventDispatcher {
this._queryParserContainer = querySelectorNotNull(document, '#query-parser-container');
/** @type {QueryParser} */
this._queryParser = new QueryParser({
- getSearchContext: this._getSearchContext.bind(this),
- japaneseUtil
+ getSearchContext: this._getSearchContext.bind(this)
});
/** @type {HTMLElement} */
this._contentScrollElement = querySelectorNotNull(document, '#content-scroll');
@@ -240,11 +235,6 @@ export class Display extends EventDispatcher {
this._updateQueryParser();
}
- /** @type {import('../language/sandbox/japanese-util.js').JapaneseUtil} */
- get japaneseUtil() {
- return this._japaneseUtil;
- }
-
/** @type {number} */
get depth() {
return this._depth;
diff --git a/ext/js/display/popup-main.js b/ext/js/display/popup-main.js
index d4f622f2..870e039e 100644
--- a/ext/js/display/popup-main.js
+++ b/ext/js/display/popup-main.js
@@ -19,7 +19,6 @@
import {log} from '../core/logger.js';
import {DocumentFocusController} from '../dom/document-focus-controller.js';
import {HotkeyHandler} from '../input/hotkey-handler.js';
-import {JapaneseUtil} from '../language/sandbox/japanese-util.js';
import {yomitan} from '../yomitan.js';
import {DisplayAnki} from './display-anki.js';
import {DisplayAudio} from './display-audio.js';
@@ -37,18 +36,16 @@ async function main() {
const {tabId, frameId} = await yomitan.api.frameInformationGet();
- const japaneseUtil = new JapaneseUtil(null);
-
const hotkeyHandler = new HotkeyHandler();
hotkeyHandler.prepare();
- const display = new Display(tabId, frameId, 'popup', japaneseUtil, documentFocusController, hotkeyHandler);
+ const display = new Display(tabId, frameId, 'popup', documentFocusController, hotkeyHandler);
await display.prepare();
const displayAudio = new DisplayAudio(display);
displayAudio.prepare();
- const displayAnki = new DisplayAnki(display, displayAudio, japaneseUtil);
+ const displayAnki = new DisplayAnki(display, displayAudio);
displayAnki.prepare();
const displayProfileSelection = new DisplayProfileSelection(display);
diff --git a/ext/js/display/query-parser.js b/ext/js/display/query-parser.js
index e129e1be..eb053f38 100644
--- a/ext/js/display/query-parser.js
+++ b/ext/js/display/query-parser.js
@@ -19,6 +19,7 @@
import {EventDispatcher} from '../core/event-dispatcher.js';
import {log} from '../core/logger.js';
import {querySelectorNotNull} from '../dom/query-selector.js';
+import {convertHiraganaToKatakana, convertKatakanaToHiragana, isStringEntirelyKana} from '../language/japanese.js';
import {TextScanner} from '../language/text-scanner.js';
import {yomitan} from '../yomitan.js';
@@ -29,12 +30,10 @@ export class QueryParser extends EventDispatcher {
/**
* @param {import('display').QueryParserConstructorDetails} details
*/
- constructor({getSearchContext, japaneseUtil}) {
+ constructor({getSearchContext}) {
super();
/** @type {import('display').GetSearchContextCallback} */
this._getSearchContext = getSearchContext;
- /** @type {import('../language/sandbox/japanese-util.js').JapaneseUtil} */
- this._japaneseUtil = japaneseUtil;
/** @type {string} */
this._text = '';
/** @type {?import('core').TokenObject} */
@@ -65,6 +64,10 @@ export class QueryParser extends EventDispatcher {
searchKanji: false,
searchOnClick: true
});
+ /** @type {?(import('../language/japanese-wanakana.js'))} */
+ this._japaneseWanakanaModule = null;
+ /** @type {?Promise<import('../language/japanese-wanakana.js')>} */
+ this._japaneseWanakanaModuleImport = null;
}
/** @type {string} */
@@ -93,7 +96,7 @@ export class QueryParser extends EventDispatcher {
this._queryParser.dataset.termSpacing = `${termSpacing}`;
}
if (typeof readingMode === 'string') {
- this._readingMode = readingMode;
+ this._setReadingMode(readingMode);
}
if (typeof useInternalParser === 'boolean') {
this._useInternalParser = useInternalParser;
@@ -346,15 +349,15 @@ export class QueryParser extends EventDispatcher {
_convertReading(term, reading) {
switch (this._readingMode) {
case 'hiragana':
- return this._japaneseUtil.convertKatakanaToHiragana(reading);
+ return convertKatakanaToHiragana(reading);
case 'katakana':
- return this._japaneseUtil.convertHiraganaToKatakana(reading);
+ return convertHiraganaToKatakana(reading);
case 'romaji':
- if (this._japaneseUtil.convertToRomajiSupported()) {
+ if (this._japaneseWanakanaModule !== null) {
if (reading.length > 0) {
- return this._japaneseUtil.convertToRomaji(reading);
- } else if (this._japaneseUtil.isStringEntirelyKana(term)) {
- return this._japaneseUtil.convertToRomaji(term);
+ return this._japaneseWanakanaModule.convertToRomaji(reading);
+ } else if (isStringEntirelyKana(term)) {
+ return this._japaneseWanakanaModule.convertToRomaji(term);
}
}
return reading;
@@ -398,4 +401,21 @@ export class QueryParser extends EventDispatcher {
node = node.parentNode;
}
}
+
+ /**
+ * @param {import('settings').ParsingReadingMode} value
+ */
+ _setReadingMode(value) {
+ this._readingMode = value;
+ if (value === 'romaji') {
+ this._loadJapaneseWanakanaModule();
+ }
+ }
+
+ /** */
+ _loadJapaneseWanakanaModule() {
+ if (this._japaneseWanakanaModuleImport !== null) { return; }
+ this._japaneseWanakanaModuleImport = import('../language/japanese-wanakana.js');
+ this._japaneseWanakanaModuleImport.then((value) => { this._japaneseWanakanaModule = value; });
+ }
}
diff --git a/ext/js/display/sandbox/pronunciation-generator.js b/ext/js/display/sandbox/pronunciation-generator.js
index cfcf82a1..45631e74 100644
--- a/ext/js/display/sandbox/pronunciation-generator.js
+++ b/ext/js/display/sandbox/pronunciation-generator.js
@@ -16,15 +16,9 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
-export class PronunciationGenerator {
- /**
- * @param {import('../../language/sandbox/japanese-util.js').JapaneseUtil} japaneseUtil
- */
- constructor(japaneseUtil) {
- /** @type {import('../../language/sandbox/japanese-util.js').JapaneseUtil} */
- this._japaneseUtil = japaneseUtil;
- }
+import {getKanaDiacriticInfo, isMoraPitchHigh} from '../../language/japanese.js';
+export class PronunciationGenerator {
/**
* @param {string[]} morae
* @param {number} downstepPosition
@@ -33,7 +27,6 @@ export class PronunciationGenerator {
* @returns {HTMLSpanElement}
*/
createPronunciationText(morae, downstepPosition, nasalPositions, devoicePositions) {
- const jp = this._japaneseUtil;
const nasalPositionsSet = nasalPositions.length > 0 ? new Set(nasalPositions) : null;
const devoicePositionsSet = devoicePositions.length > 0 ? new Set(devoicePositions) : null;
const container = document.createElement('span');
@@ -41,8 +34,8 @@ export class PronunciationGenerator {
for (let i = 0, ii = morae.length; i < ii; ++i) {
const i1 = i + 1;
const mora = morae[i];
- const highPitch = jp.isMoraPitchHigh(i, downstepPosition);
- const highPitchNext = jp.isMoraPitchHigh(i1, downstepPosition);
+ const highPitch = isMoraPitchHigh(i, downstepPosition);
+ const highPitchNext = isMoraPitchHigh(i1, downstepPosition);
const nasal = nasalPositionsSet !== null && nasalPositionsSet.has(i1);
const devoice = devoicePositionsSet !== null && devoicePositionsSet.has(i1);
@@ -76,7 +69,7 @@ export class PronunciationGenerator {
const n2 = characterNodes[0];
const character = /** @type {string} */ (n2.textContent);
- const characterInfo = jp.getKanaDiacriticInfo(character);
+ const characterInfo = getKanaDiacriticInfo(character);
if (characterInfo !== null) {
n1.dataset.originalText = mora;
n2.dataset.originalText = character;
@@ -111,7 +104,6 @@ export class PronunciationGenerator {
* @returns {SVGSVGElement}
*/
createPronunciationGraph(morae, downstepPosition) {
- const jp = this._japaneseUtil;
const ii = morae.length;
const svgns = 'http://www.w3.org/2000/svg';
@@ -131,8 +123,8 @@ export class PronunciationGenerator {
const pathPoints = [];
for (let i = 0; i < ii; ++i) {
- const highPitch = jp.isMoraPitchHigh(i, downstepPosition);
- const highPitchNext = jp.isMoraPitchHigh(i + 1, downstepPosition);
+ const highPitch = isMoraPitchHigh(i, downstepPosition);
+ const highPitchNext = isMoraPitchHigh(i + 1, downstepPosition);
const x = i * 50 + 25;
const y = highPitch ? 25 : 75;
if (highPitch && !highPitchNext) {
@@ -148,7 +140,7 @@ export class PronunciationGenerator {
pathPoints.splice(0, ii - 1);
{
- const highPitch = jp.isMoraPitchHigh(ii, downstepPosition);
+ const highPitch = isMoraPitchHigh(ii, downstepPosition);
const x = ii * 50 + 25;
const y = highPitch ? 25 : 75;
this._addGraphTriangle(svg, svgns, x, y);
diff --git a/ext/js/display/sandbox/structured-content-generator.js b/ext/js/display/sandbox/structured-content-generator.js
index ee86a7f4..60bf0ee5 100644
--- a/ext/js/display/sandbox/structured-content-generator.js
+++ b/ext/js/display/sandbox/structured-content-generator.js
@@ -16,17 +16,16 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
+import {isStringPartiallyJapanese} from '../../language/japanese.js';
+
export class StructuredContentGenerator {
/**
* @param {import('../../display/display-content-manager.js').DisplayContentManager|import('../../templates/sandbox/anki-template-renderer-content-manager.js').AnkiTemplateRendererContentManager} contentManager
- * @param {import('../../language/sandbox/japanese-util.js').JapaneseUtil} japaneseUtil
* @param {Document} document
*/
- constructor(contentManager, japaneseUtil, document) {
+ constructor(contentManager, document) {
/** @type {import('../../display/display-content-manager.js').DisplayContentManager|import('../../templates/sandbox/anki-template-renderer-content-manager.js').AnkiTemplateRendererContentManager} */
this._contentManager = contentManager;
- /** @type {import('../../language/sandbox/japanese-util.js').JapaneseUtil} */
- this._japaneseUtil = japaneseUtil;
/** @type {Document} */
this._document = document;
}
@@ -164,7 +163,7 @@ export class StructuredContentGenerator {
if (typeof content === 'string') {
if (content.length > 0) {
container.appendChild(this._createTextNode(content));
- if (language === null && this._japaneseUtil.isStringPartiallyJapanese(content)) {
+ if (language === null && isStringPartiallyJapanese(content)) {
container.lang = 'ja';
}
}
diff --git a/ext/js/display/search-display-controller.js b/ext/js/display/search-display-controller.js
index 594a80aa..ff4340c1 100644
--- a/ext/js/display/search-display-controller.js
+++ b/ext/js/display/search-display-controller.js
@@ -29,10 +29,9 @@ export class SearchDisplayController {
* @param {number|undefined} frameId
* @param {import('./display.js').Display} display
* @param {import('./display-audio.js').DisplayAudio} displayAudio
- * @param {import('../language/sandbox/japanese-util.js').JapaneseUtil} japaneseUtil
* @param {import('./search-persistent-state-controller.js').SearchPersistentStateController} searchPersistentStateController
*/
- constructor(tabId, frameId, display, displayAudio, japaneseUtil, searchPersistentStateController) {
+ constructor(tabId, frameId, display, displayAudio, searchPersistentStateController) {
/** @type {number|undefined} */
this._tabId = tabId;
/** @type {number|undefined} */
@@ -71,7 +70,6 @@ export class SearchDisplayController {
this._clipboardMonitorEnabled = false;
/** @type {ClipboardMonitor} */
this._clipboardMonitor = new ClipboardMonitor({
- japaneseUtil,
clipboardReader: {
getText: yomitan.api.clipboardGet.bind(yomitan.api)
}
diff --git a/ext/js/display/search-main.js b/ext/js/display/search-main.js
index 3cdd1f25..dedad163 100644
--- a/ext/js/display/search-main.js
+++ b/ext/js/display/search-main.js
@@ -16,11 +16,9 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
-import * as wanakana from '../../lib/wanakana.js';
import {log} from '../core/logger.js';
import {DocumentFocusController} from '../dom/document-focus-controller.js';
import {HotkeyHandler} from '../input/hotkey-handler.js';
-import {JapaneseUtil} from '../language/sandbox/japanese-util.js';
import {yomitan} from '../yomitan.js';
import {DisplayAnki} from './display-anki.js';
import {DisplayAudio} from './display-audio.js';
@@ -45,21 +43,19 @@ async function main() {
const {tabId, frameId} = await yomitan.api.frameInformationGet();
- const japaneseUtil = new JapaneseUtil(wanakana);
-
const hotkeyHandler = new HotkeyHandler();
hotkeyHandler.prepare();
- const display = new Display(tabId, frameId, 'search', japaneseUtil, documentFocusController, hotkeyHandler);
+ const display = new Display(tabId, frameId, 'search', documentFocusController, hotkeyHandler);
await display.prepare();
const displayAudio = new DisplayAudio(display);
displayAudio.prepare();
- const displayAnki = new DisplayAnki(display, displayAudio, japaneseUtil);
+ const displayAnki = new DisplayAnki(display, displayAudio);
displayAnki.prepare();
- const searchDisplayController = new SearchDisplayController(tabId, frameId, display, displayAudio, japaneseUtil, searchPersistentStateController);
+ const searchDisplayController = new SearchDisplayController(tabId, frameId, display, displayAudio, searchPersistentStateController);
await searchDisplayController.prepare();
display.initializeState();
diff --git a/ext/js/language/japanese-wanakana.js b/ext/js/language/japanese-wanakana.js
new file mode 100644
index 00000000..b48ab6d6
--- /dev/null
+++ b/ext/js/language/japanese-wanakana.js
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2024 Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import * as wanakana from '../../lib/wanakana.js';
+
+/**
+ * @param {string} text
+ * @param {?import('../general/text-source-map.js').TextSourceMap} sourceMap
+ * @param {number} sourceMapStart
+ * @returns {string}
+ */
+function convertAlphabeticPartToKana(text, sourceMap, sourceMapStart) {
+ const result = wanakana.toHiragana(text);
+
+ // Generate source mapping
+ if (sourceMap !== null) {
+ let i = 0;
+ let resultPos = 0;
+ const ii = text.length;
+ while (i < ii) {
+ // Find smallest matching substring
+ let iNext = i + 1;
+ let resultPosNext = result.length;
+ while (iNext < ii) {
+ const t = wanakana.toHiragana(text.substring(0, iNext));
+ if (t === result.substring(0, t.length)) {
+ resultPosNext = t.length;
+ break;
+ }
+ ++iNext;
+ }
+
+ // Merge characters
+ const removals = iNext - i - 1;
+ if (removals > 0) {
+ sourceMap.combine(sourceMapStart, removals);
+ }
+ ++sourceMapStart;
+
+ // Empty elements
+ const additions = resultPosNext - resultPos - 1;
+ for (let j = 0; j < additions; ++j) {
+ sourceMap.insert(sourceMapStart, 0);
+ ++sourceMapStart;
+ }
+
+ i = iNext;
+ resultPos = resultPosNext;
+ }
+ }
+
+ return result;
+}
+
+/**
+ * @param {string} text
+ * @returns {string}
+ */
+export function convertToKana(text) {
+ return wanakana.toKana(text);
+}
+
+/**
+ * @param {string} text
+ * @returns {string}
+ */
+export function convertToRomaji(text) {
+ return wanakana.toRomaji(text);
+}
+
+/**
+ * @param {string} text
+ * @param {?import('../general/text-source-map.js').TextSourceMap} sourceMap
+ * @returns {string}
+ */
+export function convertAlphabeticToKana(text, sourceMap = null) {
+ let part = '';
+ let result = '';
+
+ for (const char of text) {
+ // Note: 0x61 is the character code for 'a'
+ let c = /** @type {number} */ (char.codePointAt(0));
+ if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z']
+ c += (0x61 - 0x41);
+ } else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z']
+ // NOP; c += (0x61 - 0x61);
+ } else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z'] fullwidth
+ c += (0x61 - 0xff21);
+ } else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z'] fullwidth
+ c += (0x61 - 0xff41);
+ } else if (c === 0x2d || c === 0xff0d) { // '-' or fullwidth dash
+ c = 0x2d; // '-'
+ } else {
+ if (part.length > 0) {
+ result += convertAlphabeticPartToKana(part, sourceMap, result.length);
+ part = '';
+ }
+ result += char;
+ continue;
+ }
+ part += String.fromCodePoint(c);
+ }
+
+ if (part.length > 0) {
+ result += convertAlphabeticPartToKana(part, sourceMap, result.length);
+ }
+ return result;
+}
diff --git a/ext/js/language/japanese.js b/ext/js/language/japanese.js
new file mode 100644
index 00000000..88eb5af5
--- /dev/null
+++ b/ext/js/language/japanese.js
@@ -0,0 +1,740 @@
+/*
+ * Copyright (C) 2024 Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;
+const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;
+const KATAKANA_SMALL_KA_CODE_POINT = 0x30f5;
+const KATAKANA_SMALL_KE_CODE_POINT = 0x30f6;
+const KANA_PROLONGED_SOUND_MARK_CODE_POINT = 0x30fc;
+
+/** @type {import('japanese-util').CodepointRange} */
+const HIRAGANA_RANGE = [0x3040, 0x309f];
+/** @type {import('japanese-util').CodepointRange} */
+const KATAKANA_RANGE = [0x30a0, 0x30ff];
+
+/** @type {import('japanese-util').CodepointRange} */
+const HIRAGANA_CONVERSION_RANGE = [0x3041, 0x3096];
+/** @type {import('japanese-util').CodepointRange} */
+const KATAKANA_CONVERSION_RANGE = [0x30a1, 0x30f6];
+
+/** @type {import('japanese-util').CodepointRange[]} */
+const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE];
+
+/** @type {import('japanese-util').CodepointRange} */
+const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff];
+/** @type {import('japanese-util').CodepointRange} */
+const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf];
+/** @type {import('japanese-util').CodepointRange} */
+const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df];
+/** @type {import('japanese-util').CodepointRange} */
+const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f];
+/** @type {import('japanese-util').CodepointRange} */
+const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f];
+/** @type {import('japanese-util').CodepointRange} */
+const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf];
+/** @type {import('japanese-util').CodepointRange} */
+const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef];
+/** @type {import('japanese-util').CodepointRange} */
+const CJK_COMPATIBILITY_IDEOGRAPHS_RANGE = [0xf900, 0xfaff];
+/** @type {import('japanese-util').CodepointRange} */
+const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f];
+/** @type {import('japanese-util').CodepointRange[]} */
+const CJK_IDEOGRAPH_RANGES = [
+ CJK_UNIFIED_IDEOGRAPHS_RANGE,
+ CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE,
+ CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE,
+ CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE,
+ CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE,
+ CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE,
+ CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE,
+ CJK_COMPATIBILITY_IDEOGRAPHS_RANGE,
+ CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE
+];
+
+/**
+ * Japanese character ranges, roughly ordered in order of expected frequency.
+ * @type {import('japanese-util').CodepointRange[]}
+ */
+const JAPANESE_RANGES = [
+ HIRAGANA_RANGE,
+ KATAKANA_RANGE,
+
+ ...CJK_IDEOGRAPH_RANGES,
+
+ [0xff66, 0xff9f], // Halfwidth katakana
+
+ [0x30fb, 0x30fc], // Katakana punctuation
+ [0xff61, 0xff65], // Kana punctuation
+ [0x3000, 0x303f], // CJK punctuation
+
+ [0xff10, 0xff19], // Fullwidth numbers
+ [0xff21, 0xff3a], // Fullwidth upper case Latin letters
+ [0xff41, 0xff5a], // Fullwidth lower case Latin letters
+
+ [0xff01, 0xff0f], // Fullwidth punctuation 1
+ [0xff1a, 0xff1f], // Fullwidth punctuation 2
+ [0xff3b, 0xff3f], // Fullwidth punctuation 3
+ [0xff5b, 0xff60], // Fullwidth punctuation 4
+ [0xffe0, 0xffee] // Currency markers
+];
+
+const SMALL_KANA_SET = new Set(Array.from('ぁぃぅぇぉゃゅょゎァィゥェォャュョヮ'));
+
+const HALFWIDTH_KATAKANA_MAPPING = new Map([
+ ['ヲ', 'ヲヺ-'],
+ ['ァ', 'ァ--'],
+ ['ィ', 'ィ--'],
+ ['ゥ', 'ゥ--'],
+ ['ェ', 'ェ--'],
+ ['ォ', 'ォ--'],
+ ['ャ', 'ャ--'],
+ ['ュ', 'ュ--'],
+ ['ョ', 'ョ--'],
+ ['ッ', 'ッ--'],
+ ['ー', 'ー--'],
+ ['ア', 'ア--'],
+ ['イ', 'イ--'],
+ ['ウ', 'ウヴ-'],
+ ['エ', 'エ--'],
+ ['オ', 'オ--'],
+ ['カ', 'カガ-'],
+ ['キ', 'キギ-'],
+ ['ク', 'クグ-'],
+ ['ケ', 'ケゲ-'],
+ ['コ', 'コゴ-'],
+ ['サ', 'サザ-'],
+ ['シ', 'シジ-'],
+ ['ス', 'スズ-'],
+ ['セ', 'セゼ-'],
+ ['ソ', 'ソゾ-'],
+ ['タ', 'タダ-'],
+ ['チ', 'チヂ-'],
+ ['ツ', 'ツヅ-'],
+ ['テ', 'テデ-'],
+ ['ト', 'トド-'],
+ ['ナ', 'ナ--'],
+ ['ニ', 'ニ--'],
+ ['ヌ', 'ヌ--'],
+ ['ネ', 'ネ--'],
+ ['ノ', 'ノ--'],
+ ['ハ', 'ハバパ'],
+ ['ヒ', 'ヒビピ'],
+ ['フ', 'フブプ'],
+ ['ヘ', 'ヘベペ'],
+ ['ホ', 'ホボポ'],
+ ['マ', 'マ--'],
+ ['ミ', 'ミ--'],
+ ['ム', 'ム--'],
+ ['メ', 'メ--'],
+ ['モ', 'モ--'],
+ ['ヤ', 'ヤ--'],
+ ['ユ', 'ユ--'],
+ ['ヨ', 'ヨ--'],
+ ['ラ', 'ラ--'],
+ ['リ', 'リ--'],
+ ['ル', 'ル--'],
+ ['レ', 'レ--'],
+ ['ロ', 'ロ--'],
+ ['ワ', 'ワ--'],
+ ['ン', 'ン--']
+]);
+
+const VOWEL_TO_KANA_MAPPING = new Map([
+ ['a', 'ぁあかがさざただなはばぱまゃやらゎわヵァアカガサザタダナハバパマャヤラヮワヵヷ'],
+ ['i', 'ぃいきぎしじちぢにひびぴみりゐィイキギシジチヂニヒビピミリヰヸ'],
+ ['u', 'ぅうくぐすずっつづぬふぶぷむゅゆるゥウクグスズッツヅヌフブプムュユルヴ'],
+ ['e', 'ぇえけげせぜてでねへべぺめれゑヶェエケゲセゼテデネヘベペメレヱヶヹ'],
+ ['o', 'ぉおこごそぞとどのほぼぽもょよろをォオコゴソゾトドノホボポモョヨロヲヺ'],
+ ['', 'のノ']
+]);
+
+/** @type {Map<string, string>} */
+const KANA_TO_VOWEL_MAPPING = new Map();
+for (const [vowel, characters] of VOWEL_TO_KANA_MAPPING) {
+ for (const character of characters) {
+ KANA_TO_VOWEL_MAPPING.set(character, vowel);
+ }
+}
+
+const kana = 'うゔ-かが-きぎ-くぐ-けげ-こご-さざ-しじ-すず-せぜ-そぞ-ただ-ちぢ-つづ-てで-とど-はばぱひびぴふぶぷへべぺほぼぽワヷ-ヰヸ-ウヴ-ヱヹ-ヲヺ-カガ-キギ-クグ-ケゲ-コゴ-サザ-シジ-スズ-セゼ-ソゾ-タダ-チヂ-ツヅ-テデ-トド-ハバパヒビピフブプヘベペホボポ';
+/** @type {Map<string, {character: string, type: import('japanese-util').DiacriticType}>} */
+const DIACRITIC_MAPPING = new Map();
+for (let i = 0, ii = kana.length; i < ii; i += 3) {
+ const character = kana[i];
+ const dakuten = kana[i + 1];
+ const handakuten = kana[i + 2];
+ DIACRITIC_MAPPING.set(dakuten, {character, type: 'dakuten'});
+ if (handakuten !== '-') {
+ DIACRITIC_MAPPING.set(handakuten, {character, type: 'handakuten'});
+ }
+}
+
+
+/**
+ * @param {number} codePoint
+ * @param {import('japanese-util').CodepointRange} range
+ * @returns {boolean}
+ */
+function isCodePointInRange(codePoint, [min, max]) {
+ return (codePoint >= min && codePoint <= max);
+}
+
+/**
+ * @param {number} codePoint
+ * @param {import('japanese-util').CodepointRange[]} ranges
+ * @returns {boolean}
+ */
+function isCodePointInRanges(codePoint, ranges) {
+ for (const [min, max] of ranges) {
+ if (codePoint >= min && codePoint <= max) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/**
+ * @param {string} previousCharacter
+ * @returns {?string}
+ */
+function getProlongedHiragana(previousCharacter) {
+ switch (KANA_TO_VOWEL_MAPPING.get(previousCharacter)) {
+ case 'a': return 'あ';
+ case 'i': return 'い';
+ case 'u': return 'う';
+ case 'e': return 'え';
+ case 'o': return 'う';
+ default: return null;
+ }
+}
+
+/**
+ * @param {string} text
+ * @param {string} reading
+ * @returns {import('japanese-util').FuriganaSegment}
+ */
+function createFuriganaSegment(text, reading) {
+ return {text, reading};
+}
+
+/**
+ * @param {string} reading
+ * @param {string} readingNormalized
+ * @param {import('japanese-util').FuriganaGroup[]} groups
+ * @param {number} groupsStart
+ * @returns {?(import('japanese-util').FuriganaSegment[])}
+ */
+function segmentizeFurigana(reading, readingNormalized, groups, groupsStart) {
+ const groupCount = groups.length - groupsStart;
+ if (groupCount <= 0) {
+ return reading.length === 0 ? [] : null;
+ }
+
+ const group = groups[groupsStart];
+ const {isKana, text} = group;
+ const textLength = text.length;
+ if (isKana) {
+ const {textNormalized} = group;
+ if (textNormalized !== null && readingNormalized.startsWith(textNormalized)) {
+ const segments = segmentizeFurigana(
+ reading.substring(textLength),
+ readingNormalized.substring(textLength),
+ groups,
+ groupsStart + 1
+ );
+ if (segments !== null) {
+ if (reading.startsWith(text)) {
+ segments.unshift(createFuriganaSegment(text, ''));
+ } else {
+ segments.unshift(...getFuriganaKanaSegments(text, reading));
+ }
+ return segments;
+ }
+ }
+ return null;
+ } else {
+ let result = null;
+ for (let i = reading.length; i >= textLength; --i) {
+ const segments = segmentizeFurigana(
+ reading.substring(i),
+ readingNormalized.substring(i),
+ groups,
+ groupsStart + 1
+ );
+ if (segments !== null) {
+ if (result !== null) {
+ // More than one way to segmentize the tail; mark as ambiguous
+ return null;
+ }
+ const segmentReading = reading.substring(0, i);
+ segments.unshift(createFuriganaSegment(text, segmentReading));
+ result = segments;
+ }
+ // There is only one way to segmentize the last non-kana group
+ if (groupCount === 1) {
+ break;
+ }
+ }
+ return result;
+ }
+}
+
+/**
+ * @param {string} text
+ * @param {string} reading
+ * @returns {import('japanese-util').FuriganaSegment[]}
+ */
+function getFuriganaKanaSegments(text, reading) {
+ const textLength = text.length;
+ const newSegments = [];
+ let start = 0;
+ let state = (reading[0] === text[0]);
+ for (let i = 1; i < textLength; ++i) {
+ const newState = (reading[i] === text[i]);
+ if (state === newState) { continue; }
+ newSegments.push(createFuriganaSegment(text.substring(start, i), state ? '' : reading.substring(start, i)));
+ state = newState;
+ start = i;
+ }
+ newSegments.push(createFuriganaSegment(text.substring(start, textLength), state ? '' : reading.substring(start, textLength)));
+ return newSegments;
+}
+
+/**
+ * @param {string} text1
+ * @param {string} text2
+ * @returns {number}
+ */
+function getStemLength(text1, text2) {
+ const minLength = Math.min(text1.length, text2.length);
+ if (minLength === 0) { return 0; }
+
+ let i = 0;
+ while (true) {
+ const char1 = /** @type {number} */ (text1.codePointAt(i));
+ const char2 = /** @type {number} */ (text2.codePointAt(i));
+ if (char1 !== char2) { break; }
+ const charLength = String.fromCodePoint(char1).length;
+ i += charLength;
+ if (i >= minLength) {
+ if (i > minLength) {
+ i -= charLength; // Don't consume partial UTF16 surrogate characters
+ }
+ break;
+ }
+ }
+ return i;
+}
+
+
+// Character code testing functions
+
+/**
+ * @param {number} codePoint
+ * @returns {boolean}
+ */
+export function isCodePointKanji(codePoint) {
+ return isCodePointInRanges(codePoint, CJK_IDEOGRAPH_RANGES);
+}
+
+/**
+ * @param {number} codePoint
+ * @returns {boolean}
+ */
+export function isCodePointKana(codePoint) {
+ return isCodePointInRanges(codePoint, KANA_RANGES);
+}
+
+/**
+ * @param {number} codePoint
+ * @returns {boolean}
+ */
+export function isCodePointJapanese(codePoint) {
+ return isCodePointInRanges(codePoint, JAPANESE_RANGES);
+}
+
+
+// String testing functions
+
+/**
+ * @param {string} str
+ * @returns {boolean}
+ */
+export function isStringEntirelyKana(str) {
+ if (str.length === 0) { return false; }
+ for (const c of str) {
+ if (!isCodePointInRanges(/** @type {number} */ (c.codePointAt(0)), KANA_RANGES)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/**
+ * @param {string} str
+ * @returns {boolean}
+ */
+export function isStringPartiallyJapanese(str) {
+ if (str.length === 0) { return false; }
+ for (const c of str) {
+ if (isCodePointInRanges(/** @type {number} */ (c.codePointAt(0)), JAPANESE_RANGES)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+
+// Mora functions
+
+/**
+ * @param {number} moraIndex
+ * @param {number} pitchAccentDownstepPosition
+ * @returns {boolean}
+ */
+export function isMoraPitchHigh(moraIndex, pitchAccentDownstepPosition) {
+ switch (pitchAccentDownstepPosition) {
+ case 0: return (moraIndex > 0);
+ case 1: return (moraIndex < 1);
+ default: return (moraIndex > 0 && moraIndex < pitchAccentDownstepPosition);
+ }
+}
+
+/**
+ * @param {string} text
+ * @param {number} pitchAccentDownstepPosition
+ * @param {boolean} isVerbOrAdjective
+ * @returns {?import('japanese-util').PitchCategory}
+ */
+export function getPitchCategory(text, pitchAccentDownstepPosition, isVerbOrAdjective) {
+ if (pitchAccentDownstepPosition === 0) {
+ return 'heiban';
+ }
+ if (isVerbOrAdjective) {
+ return pitchAccentDownstepPosition > 0 ? 'kifuku' : null;
+ }
+ if (pitchAccentDownstepPosition === 1) {
+ return 'atamadaka';
+ }
+ if (pitchAccentDownstepPosition > 1) {
+ return pitchAccentDownstepPosition >= getKanaMoraCount(text) ? 'odaka' : 'nakadaka';
+ }
+ return null;
+}
+
+/**
+ * @param {string} text
+ * @returns {string[]}
+ */
+export function getKanaMorae(text) {
+ const morae = [];
+ let i;
+ for (const c of text) {
+ if (SMALL_KANA_SET.has(c) && (i = morae.length) > 0) {
+ morae[i - 1] += c;
+ } else {
+ morae.push(c);
+ }
+ }
+ return morae;
+}
+
+/**
+ * @param {string} text
+ * @returns {number}
+ */
+export function getKanaMoraCount(text) {
+ let moraCount = 0;
+ for (const c of text) {
+ if (!(SMALL_KANA_SET.has(c) && moraCount > 0)) {
+ ++moraCount;
+ }
+ }
+ return moraCount;
+}
+
+
+// Conversion functions
+
+/**
+ * @param {string} text
+ * @param {boolean} [keepProlongedSoundMarks]
+ * @returns {string}
+ */
+export function convertKatakanaToHiragana(text, keepProlongedSoundMarks = false) {
+ let result = '';
+ const offset = (HIRAGANA_CONVERSION_RANGE[0] - KATAKANA_CONVERSION_RANGE[0]);
+ for (let char of text) {
+ const codePoint = /** @type {number} */ (char.codePointAt(0));
+ switch (codePoint) {
+ case KATAKANA_SMALL_KA_CODE_POINT:
+ case KATAKANA_SMALL_KE_CODE_POINT:
+ // No change
+ break;
+ case KANA_PROLONGED_SOUND_MARK_CODE_POINT:
+ if (!keepProlongedSoundMarks && result.length > 0) {
+ const char2 = getProlongedHiragana(result[result.length - 1]);
+ if (char2 !== null) { char = char2; }
+ }
+ break;
+ default:
+ if (isCodePointInRange(codePoint, KATAKANA_CONVERSION_RANGE)) {
+ char = String.fromCodePoint(codePoint + offset);
+ }
+ break;
+ }
+ result += char;
+ }
+ return result;
+}
+
+/**
+ * @param {string} text
+ * @returns {string}
+ */
+export function convertHiraganaToKatakana(text) {
+ let result = '';
+ const offset = (KATAKANA_CONVERSION_RANGE[0] - HIRAGANA_CONVERSION_RANGE[0]);
+ for (let char of text) {
+ const codePoint = /** @type {number} */ (char.codePointAt(0));
+ if (isCodePointInRange(codePoint, HIRAGANA_CONVERSION_RANGE)) {
+ char = String.fromCodePoint(codePoint + offset);
+ }
+ result += char;
+ }
+ return result;
+}
+
+/**
+ * @param {string} text
+ * @returns {string}
+ */
+export function convertNumericToFullWidth(text) {
+ let result = '';
+ for (const char of text) {
+ let c = /** @type {number} */ (char.codePointAt(0));
+ if (c >= 0x30 && c <= 0x39) { // ['0', '9']
+ c += 0xff10 - 0x30; // 0xff10 = '0' full width
+ result += String.fromCodePoint(c);
+ } else {
+ result += char;
+ }
+ }
+ return result;
+}
+
+/**
+ * @param {string} text
+ * @param {?import('../general/text-source-map.js').TextSourceMap} [sourceMap]
+ * @returns {string}
+ */
+export function convertHalfWidthKanaToFullWidth(text, sourceMap = null) {
+ let result = '';
+
+ // This function is safe to use charCodeAt instead of codePointAt, since all
+ // the relevant characters are represented with a single UTF-16 character code.
+ for (let i = 0, ii = text.length; i < ii; ++i) {
+ const c = text[i];
+ const mapping = HALFWIDTH_KATAKANA_MAPPING.get(c);
+ if (typeof mapping !== 'string') {
+ result += c;
+ continue;
+ }
+
+ let index = 0;
+ switch (text.charCodeAt(i + 1)) {
+ case 0xff9e: // dakuten
+ index = 1;
+ break;
+ case 0xff9f: // handakuten
+ index = 2;
+ break;
+ }
+
+ let c2 = mapping[index];
+ if (index > 0) {
+ if (c2 === '-') { // invalid
+ index = 0;
+ c2 = mapping[0];
+ } else {
+ ++i;
+ }
+ }
+
+ if (sourceMap !== null && index > 0) {
+ sourceMap.combine(result.length, 1);
+ }
+ result += c2;
+ }
+
+ return result;
+}
+
+/**
+ * @param {string} character
+ * @returns {?{character: string, type: import('japanese-util').DiacriticType}}
+ */
+export function getKanaDiacriticInfo(character) {
+ const info = DIACRITIC_MAPPING.get(character);
+ return typeof info !== 'undefined' ? {character: info.character, type: info.type} : null;
+}
+
+
+// Furigana distribution
+
+/**
+ * @param {string} term
+ * @param {string} reading
+ * @returns {import('japanese-util').FuriganaSegment[]}
+ */
+export function distributeFurigana(term, reading) {
+ if (reading === term) {
+ // Same
+ return [createFuriganaSegment(term, '')];
+ }
+
+ /** @type {import('japanese-util').FuriganaGroup[]} */
+ const groups = [];
+ /** @type {?import('japanese-util').FuriganaGroup} */
+ let groupPre = null;
+ let isKanaPre = null;
+ for (const c of term) {
+ const codePoint = /** @type {number} */ (c.codePointAt(0));
+ const isKana = isCodePointKana(codePoint);
+ if (isKana === isKanaPre) {
+ /** @type {import('japanese-util').FuriganaGroup} */ (groupPre).text += c;
+ } else {
+ groupPre = {isKana, text: c, textNormalized: null};
+ groups.push(groupPre);
+ isKanaPre = isKana;
+ }
+ }
+ for (const group of groups) {
+ if (group.isKana) {
+ group.textNormalized = convertKatakanaToHiragana(group.text);
+ }
+ }
+
+ const readingNormalized = convertKatakanaToHiragana(reading);
+ const segments = segmentizeFurigana(reading, readingNormalized, groups, 0);
+ if (segments !== null) {
+ return segments;
+ }
+
+ // Fallback
+ return [createFuriganaSegment(term, reading)];
+}
+
+/**
+ * @param {string} term
+ * @param {string} reading
+ * @param {string} source
+ * @returns {import('japanese-util').FuriganaSegment[]}
+ */
+export function distributeFuriganaInflected(term, reading, source) {
+ const termNormalized = convertKatakanaToHiragana(term);
+ const readingNormalized = convertKatakanaToHiragana(reading);
+ const sourceNormalized = convertKatakanaToHiragana(source);
+
+ let mainText = term;
+ let stemLength = getStemLength(termNormalized, sourceNormalized);
+
+ // Check if source is derived from the reading instead of the term
+ const readingStemLength = getStemLength(readingNormalized, sourceNormalized);
+ if (readingStemLength > 0 && readingStemLength >= stemLength) {
+ mainText = reading;
+ stemLength = readingStemLength;
+ reading = `${source.substring(0, stemLength)}${reading.substring(stemLength)}`;
+ }
+
+ const segments = [];
+ if (stemLength > 0) {
+ mainText = `${source.substring(0, stemLength)}${mainText.substring(stemLength)}`;
+ const segments2 = distributeFurigana(mainText, reading);
+ let consumed = 0;
+ for (const segment of segments2) {
+ const {text} = segment;
+ const start = consumed;
+ consumed += text.length;
+ if (consumed < stemLength) {
+ segments.push(segment);
+ } else if (consumed === stemLength) {
+ segments.push(segment);
+ break;
+ } else {
+ if (start < stemLength) {
+ segments.push(createFuriganaSegment(mainText.substring(start, stemLength), ''));
+ }
+ break;
+ }
+ }
+ }
+
+ if (stemLength < source.length) {
+ const remainder = source.substring(stemLength);
+ const segmentCount = segments.length;
+ if (segmentCount > 0 && segments[segmentCount - 1].reading.length === 0) {
+ // Append to the last segment if it has an empty reading
+ segments[segmentCount - 1].text += remainder;
+ } else {
+ // Otherwise, create a new segment
+ segments.push(createFuriganaSegment(remainder, ''));
+ }
+ }
+
+ return segments;
+}
+
+
+// Miscellaneous
+
+/**
+ * @param {string} text
+ * @param {boolean} fullCollapse
+ * @param {?import('../general/text-source-map.js').TextSourceMap} [sourceMap]
+ * @returns {string}
+ */
+export function collapseEmphaticSequences(text, fullCollapse, sourceMap = null) {
+ let result = '';
+ let collapseCodePoint = -1;
+ const hasSourceMap = (sourceMap !== null);
+ for (const char of text) {
+ const c = char.codePointAt(0);
+ if (
+ c === HIRAGANA_SMALL_TSU_CODE_POINT ||
+ c === KATAKANA_SMALL_TSU_CODE_POINT ||
+ c === KANA_PROLONGED_SOUND_MARK_CODE_POINT
+ ) {
+ if (collapseCodePoint !== c) {
+ collapseCodePoint = c;
+ if (!fullCollapse) {
+ result += char;
+ continue;
+ }
+ }
+ } else {
+ collapseCodePoint = -1;
+ result += char;
+ continue;
+ }
+
+ if (hasSourceMap) {
+ sourceMap.combine(Math.max(0, result.length - 1), 1);
+ }
+ }
+ return result;
+}
diff --git a/ext/js/language/sandbox/japanese-util.js b/ext/js/language/sandbox/japanese-util.js
deleted file mode 100644
index f9874cd4..00000000
--- a/ext/js/language/sandbox/japanese-util.js
+++ /dev/null
@@ -1,885 +0,0 @@
-/*
- * Copyright (C) 2023-2024 Yomitan Authors
- * Copyright (C) 2020-2022 Yomichan Authors
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
- */
-
-const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;
-const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;
-const KATAKANA_SMALL_KA_CODE_POINT = 0x30f5;
-const KATAKANA_SMALL_KE_CODE_POINT = 0x30f6;
-const KANA_PROLONGED_SOUND_MARK_CODE_POINT = 0x30fc;
-
-/** @type {import('japanese-util').CodepointRange} */
-const HIRAGANA_RANGE = [0x3040, 0x309f];
-/** @type {import('japanese-util').CodepointRange} */
-const KATAKANA_RANGE = [0x30a0, 0x30ff];
-
-/** @type {import('japanese-util').CodepointRange} */
-const HIRAGANA_CONVERSION_RANGE = [0x3041, 0x3096];
-/** @type {import('japanese-util').CodepointRange} */
-const KATAKANA_CONVERSION_RANGE = [0x30a1, 0x30f6];
-
-/** @type {import('japanese-util').CodepointRange[]} */
-const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE];
-
-/** @type {import('japanese-util').CodepointRange} */
-const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff];
-/** @type {import('japanese-util').CodepointRange} */
-const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf];
-/** @type {import('japanese-util').CodepointRange} */
-const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df];
-/** @type {import('japanese-util').CodepointRange} */
-const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f];
-/** @type {import('japanese-util').CodepointRange} */
-const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f];
-/** @type {import('japanese-util').CodepointRange} */
-const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf];
-/** @type {import('japanese-util').CodepointRange} */
-const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef];
-/** @type {import('japanese-util').CodepointRange} */
-const CJK_COMPATIBILITY_IDEOGRAPHS_RANGE = [0xf900, 0xfaff];
-/** @type {import('japanese-util').CodepointRange} */
-const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f];
-/** @type {import('japanese-util').CodepointRange[]} */
-const CJK_IDEOGRAPH_RANGES = [
- CJK_UNIFIED_IDEOGRAPHS_RANGE,
- CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE,
- CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE,
- CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE,
- CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE,
- CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE,
- CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE,
- CJK_COMPATIBILITY_IDEOGRAPHS_RANGE,
- CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE
-];
-
-/**
- * Japanese character ranges, roughly ordered in order of expected frequency.
- * @type {import('japanese-util').CodepointRange[]}
- */
-const JAPANESE_RANGES = [
- HIRAGANA_RANGE,
- KATAKANA_RANGE,
-
- ...CJK_IDEOGRAPH_RANGES,
-
- [0xff66, 0xff9f], // Halfwidth katakana
-
- [0x30fb, 0x30fc], // Katakana punctuation
- [0xff61, 0xff65], // Kana punctuation
- [0x3000, 0x303f], // CJK punctuation
-
- [0xff10, 0xff19], // Fullwidth numbers
- [0xff21, 0xff3a], // Fullwidth upper case Latin letters
- [0xff41, 0xff5a], // Fullwidth lower case Latin letters
-
- [0xff01, 0xff0f], // Fullwidth punctuation 1
- [0xff1a, 0xff1f], // Fullwidth punctuation 2
- [0xff3b, 0xff3f], // Fullwidth punctuation 3
- [0xff5b, 0xff60], // Fullwidth punctuation 4
- [0xffe0, 0xffee] // Currency markers
-];
-
-const SMALL_KANA_SET = new Set(Array.from('ぁぃぅぇぉゃゅょゎァィゥェォャュョヮ'));
-
-const HALFWIDTH_KATAKANA_MAPPING = new Map([
- ['ヲ', 'ヲヺ-'],
- ['ァ', 'ァ--'],
- ['ィ', 'ィ--'],
- ['ゥ', 'ゥ--'],
- ['ェ', 'ェ--'],
- ['ォ', 'ォ--'],
- ['ャ', 'ャ--'],
- ['ュ', 'ュ--'],
- ['ョ', 'ョ--'],
- ['ッ', 'ッ--'],
- ['ー', 'ー--'],
- ['ア', 'ア--'],
- ['イ', 'イ--'],
- ['ウ', 'ウヴ-'],
- ['エ', 'エ--'],
- ['オ', 'オ--'],
- ['カ', 'カガ-'],
- ['キ', 'キギ-'],
- ['ク', 'クグ-'],
- ['ケ', 'ケゲ-'],
- ['コ', 'コゴ-'],
- ['サ', 'サザ-'],
- ['シ', 'シジ-'],
- ['ス', 'スズ-'],
- ['セ', 'セゼ-'],
- ['ソ', 'ソゾ-'],
- ['タ', 'タダ-'],
- ['チ', 'チヂ-'],
- ['ツ', 'ツヅ-'],
- ['テ', 'テデ-'],
- ['ト', 'トド-'],
- ['ナ', 'ナ--'],
- ['ニ', 'ニ--'],
- ['ヌ', 'ヌ--'],
- ['ネ', 'ネ--'],
- ['ノ', 'ノ--'],
- ['ハ', 'ハバパ'],
- ['ヒ', 'ヒビピ'],
- ['フ', 'フブプ'],
- ['ヘ', 'ヘベペ'],
- ['ホ', 'ホボポ'],
- ['マ', 'マ--'],
- ['ミ', 'ミ--'],
- ['ム', 'ム--'],
- ['メ', 'メ--'],
- ['モ', 'モ--'],
- ['ヤ', 'ヤ--'],
- ['ユ', 'ユ--'],
- ['ヨ', 'ヨ--'],
- ['ラ', 'ラ--'],
- ['リ', 'リ--'],
- ['ル', 'ル--'],
- ['レ', 'レ--'],
- ['ロ', 'ロ--'],
- ['ワ', 'ワ--'],
- ['ン', 'ン--']
-]);
-
-const VOWEL_TO_KANA_MAPPING = new Map([
- ['a', 'ぁあかがさざただなはばぱまゃやらゎわヵァアカガサザタダナハバパマャヤラヮワヵヷ'],
- ['i', 'ぃいきぎしじちぢにひびぴみりゐィイキギシジチヂニヒビピミリヰヸ'],
- ['u', 'ぅうくぐすずっつづぬふぶぷむゅゆるゥウクグスズッツヅヌフブプムュユルヴ'],
- ['e', 'ぇえけげせぜてでねへべぺめれゑヶェエケゲセゼテデネヘベペメレヱヶヹ'],
- ['o', 'ぉおこごそぞとどのほぼぽもょよろをォオコゴソゾトドノホボポモョヨロヲヺ'],
- ['', 'のノ']
-]);
-
-/** @type {Map<string, string>} */
-const KANA_TO_VOWEL_MAPPING = new Map();
-for (const [vowel, characters] of VOWEL_TO_KANA_MAPPING) {
- for (const character of characters) {
- KANA_TO_VOWEL_MAPPING.set(character, vowel);
- }
-}
-
-const kana = 'うゔ-かが-きぎ-くぐ-けげ-こご-さざ-しじ-すず-せぜ-そぞ-ただ-ちぢ-つづ-てで-とど-はばぱひびぴふぶぷへべぺほぼぽワヷ-ヰヸ-ウヴ-ヱヹ-ヲヺ-カガ-キギ-クグ-ケゲ-コゴ-サザ-シジ-スズ-セゼ-ソゾ-タダ-チヂ-ツヅ-テデ-トド-ハバパヒビピフブプヘベペホボポ';
-/** @type {Map<string, {character: string, type: import('japanese-util').DiacriticType}>} */
-const DIACRITIC_MAPPING = new Map();
-for (let i = 0, ii = kana.length; i < ii; i += 3) {
- const character = kana[i];
- const dakuten = kana[i + 1];
- const handakuten = kana[i + 2];
- DIACRITIC_MAPPING.set(dakuten, {character, type: 'dakuten'});
- if (handakuten !== '-') {
- DIACRITIC_MAPPING.set(handakuten, {character, type: 'handakuten'});
- }
-}
-
-
-/**
- * @param {number} codePoint
- * @param {import('japanese-util').CodepointRange} range
- * @returns {boolean}
- */
-function isCodePointInRange(codePoint, [min, max]) {
- return (codePoint >= min && codePoint <= max);
-}
-
-/**
- * @param {number} codePoint
- * @param {import('japanese-util').CodepointRange[]} ranges
- * @returns {boolean}
- */
-function isCodePointInRanges(codePoint, ranges) {
- for (const [min, max] of ranges) {
- if (codePoint >= min && codePoint <= max) {
- return true;
- }
- }
- return false;
-}
-
-/**
- * @param {string} previousCharacter
- * @returns {?string}
- */
-function getProlongedHiragana(previousCharacter) {
- switch (KANA_TO_VOWEL_MAPPING.get(previousCharacter)) {
- case 'a': return 'あ';
- case 'i': return 'い';
- case 'u': return 'う';
- case 'e': return 'え';
- case 'o': return 'う';
- default: return null;
- }
-}
-
-
-export class JapaneseUtil {
- /**
- * @param {?import('wanakana')|import('../../../lib/wanakana.js')} wanakana
- */
- constructor(wanakana = null) {
- /** @type {?import('wanakana')} */
- this._wanakana = /** @type {import('wanakana')} */ (wanakana);
- }
-
- // Character code testing functions
-
- /**
- * @param {number} codePoint
- * @returns {boolean}
- */
- isCodePointKanji(codePoint) {
- return isCodePointInRanges(codePoint, CJK_IDEOGRAPH_RANGES);
- }
-
- /**
- * @param {number} codePoint
- * @returns {boolean}
- */
- isCodePointKana(codePoint) {
- return isCodePointInRanges(codePoint, KANA_RANGES);
- }
-
- /**
- * @param {number} codePoint
- * @returns {boolean}
- */
- isCodePointJapanese(codePoint) {
- return isCodePointInRanges(codePoint, JAPANESE_RANGES);
- }
-
- // String testing functions
-
- /**
- * @param {string} str
- * @returns {boolean}
- */
- isStringEntirelyKana(str) {
- if (str.length === 0) { return false; }
- for (const c of str) {
- if (!isCodePointInRanges(/** @type {number} */ (c.codePointAt(0)), KANA_RANGES)) {
- return false;
- }
- }
- return true;
- }
-
- /**
- * @param {string} str
- * @returns {boolean}
- */
- isStringPartiallyJapanese(str) {
- if (str.length === 0) { return false; }
- for (const c of str) {
- if (isCodePointInRanges(/** @type {number} */ (c.codePointAt(0)), JAPANESE_RANGES)) {
- return true;
- }
- }
- return false;
- }
-
- // Mora functions
-
- /**
- * @param {number} moraIndex
- * @param {number} pitchAccentDownstepPosition
- * @returns {boolean}
- */
- isMoraPitchHigh(moraIndex, pitchAccentDownstepPosition) {
- switch (pitchAccentDownstepPosition) {
- case 0: return (moraIndex > 0);
- case 1: return (moraIndex < 1);
- default: return (moraIndex > 0 && moraIndex < pitchAccentDownstepPosition);
- }
- }
-
- /**
- * @param {string} text
- * @param {number} pitchAccentDownstepPosition
- * @param {boolean} isVerbOrAdjective
- * @returns {?import('japanese-util').PitchCategory}
- */
- getPitchCategory(text, pitchAccentDownstepPosition, isVerbOrAdjective) {
- if (pitchAccentDownstepPosition === 0) {
- return 'heiban';
- }
- if (isVerbOrAdjective) {
- return pitchAccentDownstepPosition > 0 ? 'kifuku' : null;
- }
- if (pitchAccentDownstepPosition === 1) {
- return 'atamadaka';
- }
- if (pitchAccentDownstepPosition > 1) {
- return pitchAccentDownstepPosition >= this.getKanaMoraCount(text) ? 'odaka' : 'nakadaka';
- }
- return null;
- }
-
- /**
- * @param {string} text
- * @returns {string[]}
- */
- getKanaMorae(text) {
- const morae = [];
- let i;
- for (const c of text) {
- if (SMALL_KANA_SET.has(c) && (i = morae.length) > 0) {
- morae[i - 1] += c;
- } else {
- morae.push(c);
- }
- }
- return morae;
- }
-
- /**
- * @param {string} text
- * @returns {number}
- */
- getKanaMoraCount(text) {
- let moraCount = 0;
- for (const c of text) {
- if (!(SMALL_KANA_SET.has(c) && moraCount > 0)) {
- ++moraCount;
- }
- }
- return moraCount;
- }
-
- // Conversion functions
-
- /**
- * @param {string} text
- * @returns {string}
- */
- convertToKana(text) {
- return this._getWanakana().toKana(text);
- }
-
- /**
- * @returns {boolean}
- */
- convertToKanaSupported() {
- return this._wanakana !== null;
- }
-
- /**
- * @param {string} text
- * @param {boolean} [keepProlongedSoundMarks]
- * @returns {string}
- */
- convertKatakanaToHiragana(text, keepProlongedSoundMarks = false) {
- let result = '';
- const offset = (HIRAGANA_CONVERSION_RANGE[0] - KATAKANA_CONVERSION_RANGE[0]);
- for (let char of text) {
- const codePoint = /** @type {number} */ (char.codePointAt(0));
- switch (codePoint) {
- case KATAKANA_SMALL_KA_CODE_POINT:
- case KATAKANA_SMALL_KE_CODE_POINT:
- // No change
- break;
- case KANA_PROLONGED_SOUND_MARK_CODE_POINT:
- if (!keepProlongedSoundMarks && result.length > 0) {
- const char2 = getProlongedHiragana(result[result.length - 1]);
- if (char2 !== null) { char = char2; }
- }
- break;
- default:
- if (isCodePointInRange(codePoint, KATAKANA_CONVERSION_RANGE)) {
- char = String.fromCodePoint(codePoint + offset);
- }
- break;
- }
- result += char;
- }
- return result;
- }
-
- /**
- * @param {string} text
- * @returns {string}
- */
- convertHiraganaToKatakana(text) {
- let result = '';
- const offset = (KATAKANA_CONVERSION_RANGE[0] - HIRAGANA_CONVERSION_RANGE[0]);
- for (let char of text) {
- const codePoint = /** @type {number} */ (char.codePointAt(0));
- if (isCodePointInRange(codePoint, HIRAGANA_CONVERSION_RANGE)) {
- char = String.fromCodePoint(codePoint + offset);
- }
- result += char;
- }
- return result;
- }
-
- /**
- * @param {string} text
- * @returns {string}
- */
- convertToRomaji(text) {
- const wanakana = this._getWanakana();
- return wanakana.toRomaji(text);
- }
-
- /**
- * @returns {boolean}
- */
- convertToRomajiSupported() {
- return this._wanakana !== null;
- }
-
- /**
- * @param {string} text
- * @returns {string}
- */
- convertNumericToFullWidth(text) {
- let result = '';
- for (const char of text) {
- let c = /** @type {number} */ (char.codePointAt(0));
- if (c >= 0x30 && c <= 0x39) { // ['0', '9']
- c += 0xff10 - 0x30; // 0xff10 = '0' full width
- result += String.fromCodePoint(c);
- } else {
- result += char;
- }
- }
- return result;
- }
-
- /**
- * @param {string} text
- * @param {?import('../../general/text-source-map.js').TextSourceMap} [sourceMap]
- * @returns {string}
- */
- convertHalfWidthKanaToFullWidth(text, sourceMap = null) {
- let result = '';
-
- // This function is safe to use charCodeAt instead of codePointAt, since all
- // the relevant characters are represented with a single UTF-16 character code.
- for (let i = 0, ii = text.length; i < ii; ++i) {
- const c = text[i];
- const mapping = HALFWIDTH_KATAKANA_MAPPING.get(c);
- if (typeof mapping !== 'string') {
- result += c;
- continue;
- }
-
- let index = 0;
- switch (text.charCodeAt(i + 1)) {
- case 0xff9e: // dakuten
- index = 1;
- break;
- case 0xff9f: // handakuten
- index = 2;
- break;
- }
-
- let c2 = mapping[index];
- if (index > 0) {
- if (c2 === '-') { // invalid
- index = 0;
- c2 = mapping[0];
- } else {
- ++i;
- }
- }
-
- if (sourceMap !== null && index > 0) {
- sourceMap.combine(result.length, 1);
- }
- result += c2;
- }
-
- return result;
- }
-
- /**
- * @param {string} text
- * @param {?import('../../general/text-source-map.js').TextSourceMap} sourceMap
- * @returns {string}
- */
- convertAlphabeticToKana(text, sourceMap = null) {
- let part = '';
- let result = '';
-
- for (const char of text) {
- // Note: 0x61 is the character code for 'a'
- let c = /** @type {number} */ (char.codePointAt(0));
- if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z']
- c += (0x61 - 0x41);
- } else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z']
- // NOP; c += (0x61 - 0x61);
- } else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z'] fullwidth
- c += (0x61 - 0xff21);
- } else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z'] fullwidth
- c += (0x61 - 0xff41);
- } else if (c === 0x2d || c === 0xff0d) { // '-' or fullwidth dash
- c = 0x2d; // '-'
- } else {
- if (part.length > 0) {
- result += this._convertAlphabeticPartToKana(part, sourceMap, result.length);
- part = '';
- }
- result += char;
- continue;
- }
- part += String.fromCodePoint(c);
- }
-
- if (part.length > 0) {
- result += this._convertAlphabeticPartToKana(part, sourceMap, result.length);
- }
- return result;
- }
-
- /**
- * @returns {boolean}
- */
- convertAlphabeticToKanaSupported() {
- return this._wanakana !== null;
- }
-
- /**
- * @param {string} character
- * @returns {?{character: string, type: import('japanese-util').DiacriticType}}
- */
- getKanaDiacriticInfo(character) {
- const info = DIACRITIC_MAPPING.get(character);
- return typeof info !== 'undefined' ? {character: info.character, type: info.type} : null;
- }
-
- // Furigana distribution
-
- /**
- * @param {string} term
- * @param {string} reading
- * @returns {import('japanese-util').FuriganaSegment[]}
- */
- distributeFurigana(term, reading) {
- if (reading === term) {
- // Same
- return [this._createFuriganaSegment(term, '')];
- }
-
- /** @type {import('japanese-util').FuriganaGroup[]} */
- const groups = [];
- /** @type {?import('japanese-util').FuriganaGroup} */
- let groupPre = null;
- let isKanaPre = null;
- for (const c of term) {
- const codePoint = /** @type {number} */ (c.codePointAt(0));
- const isKana = this.isCodePointKana(codePoint);
- if (isKana === isKanaPre) {
- /** @type {import('japanese-util').FuriganaGroup} */ (groupPre).text += c;
- } else {
- groupPre = {isKana, text: c, textNormalized: null};
- groups.push(groupPre);
- isKanaPre = isKana;
- }
- }
- for (const group of groups) {
- if (group.isKana) {
- group.textNormalized = this.convertKatakanaToHiragana(group.text);
- }
- }
-
- const readingNormalized = this.convertKatakanaToHiragana(reading);
- const segments = this._segmentizeFurigana(reading, readingNormalized, groups, 0);
- if (segments !== null) {
- return segments;
- }
-
- // Fallback
- return [this._createFuriganaSegment(term, reading)];
- }
-
- /**
- * @param {string} term
- * @param {string} reading
- * @param {string} source
- * @returns {import('japanese-util').FuriganaSegment[]}
- */
- distributeFuriganaInflected(term, reading, source) {
- const termNormalized = this.convertKatakanaToHiragana(term);
- const readingNormalized = this.convertKatakanaToHiragana(reading);
- const sourceNormalized = this.convertKatakanaToHiragana(source);
-
- let mainText = term;
- let stemLength = this._getStemLength(termNormalized, sourceNormalized);
-
- // Check if source is derived from the reading instead of the term
- const readingStemLength = this._getStemLength(readingNormalized, sourceNormalized);
- if (readingStemLength > 0 && readingStemLength >= stemLength) {
- mainText = reading;
- stemLength = readingStemLength;
- reading = `${source.substring(0, stemLength)}${reading.substring(stemLength)}`;
- }
-
- const segments = [];
- if (stemLength > 0) {
- mainText = `${source.substring(0, stemLength)}${mainText.substring(stemLength)}`;
- const segments2 = this.distributeFurigana(mainText, reading);
- let consumed = 0;
- for (const segment of segments2) {
- const {text} = segment;
- const start = consumed;
- consumed += text.length;
- if (consumed < stemLength) {
- segments.push(segment);
- } else if (consumed === stemLength) {
- segments.push(segment);
- break;
- } else {
- if (start < stemLength) {
- segments.push(this._createFuriganaSegment(mainText.substring(start, stemLength), ''));
- }
- break;
- }
- }
- }
-
- if (stemLength < source.length) {
- const remainder = source.substring(stemLength);
- const segmentCount = segments.length;
- if (segmentCount > 0 && segments[segmentCount - 1].reading.length === 0) {
- // Append to the last segment if it has an empty reading
- segments[segmentCount - 1].text += remainder;
- } else {
- // Otherwise, create a new segment
- segments.push(this._createFuriganaSegment(remainder, ''));
- }
- }
-
- return segments;
- }
-
- // Miscellaneous
-
- /**
- * @param {string} text
- * @param {boolean} fullCollapse
- * @param {?import('../../general/text-source-map.js').TextSourceMap} [sourceMap]
- * @returns {string}
- */
- collapseEmphaticSequences(text, fullCollapse, sourceMap = null) {
- let result = '';
- let collapseCodePoint = -1;
- const hasSourceMap = (sourceMap !== null);
- for (const char of text) {
- const c = char.codePointAt(0);
- if (
- c === HIRAGANA_SMALL_TSU_CODE_POINT ||
- c === KATAKANA_SMALL_TSU_CODE_POINT ||
- c === KANA_PROLONGED_SOUND_MARK_CODE_POINT
- ) {
- if (collapseCodePoint !== c) {
- collapseCodePoint = c;
- if (!fullCollapse) {
- result += char;
- continue;
- }
- }
- } else {
- collapseCodePoint = -1;
- result += char;
- continue;
- }
-
- if (hasSourceMap) {
- sourceMap.combine(Math.max(0, result.length - 1), 1);
- }
- }
- return result;
- }
-
- // Private
-
- /**
- * @param {string} text
- * @param {string} reading
- * @returns {import('japanese-util').FuriganaSegment}
- */
- _createFuriganaSegment(text, reading) {
- return {text, reading};
- }
-
- /**
- * @param {string} reading
- * @param {string} readingNormalized
- * @param {import('japanese-util').FuriganaGroup[]} groups
- * @param {number} groupsStart
- * @returns {?(import('japanese-util').FuriganaSegment[])}
- */
- _segmentizeFurigana(reading, readingNormalized, groups, groupsStart) {
- const groupCount = groups.length - groupsStart;
- if (groupCount <= 0) {
- return reading.length === 0 ? [] : null;
- }
-
- const group = groups[groupsStart];
- const {isKana, text} = group;
- const textLength = text.length;
- if (isKana) {
- const {textNormalized} = group;
- if (textNormalized !== null && readingNormalized.startsWith(textNormalized)) {
- const segments = this._segmentizeFurigana(
- reading.substring(textLength),
- readingNormalized.substring(textLength),
- groups,
- groupsStart + 1
- );
- if (segments !== null) {
- if (reading.startsWith(text)) {
- segments.unshift(this._createFuriganaSegment(text, ''));
- } else {
- segments.unshift(...this._getFuriganaKanaSegments(text, reading));
- }
- return segments;
- }
- }
- return null;
- } else {
- let result = null;
- for (let i = reading.length; i >= textLength; --i) {
- const segments = this._segmentizeFurigana(
- reading.substring(i),
- readingNormalized.substring(i),
- groups,
- groupsStart + 1
- );
- if (segments !== null) {
- if (result !== null) {
- // More than one way to segmentize the tail; mark as ambiguous
- return null;
- }
- const segmentReading = reading.substring(0, i);
- segments.unshift(this._createFuriganaSegment(text, segmentReading));
- result = segments;
- }
- // There is only one way to segmentize the last non-kana group
- if (groupCount === 1) {
- break;
- }
- }
- return result;
- }
- }
-
- /**
- * @param {string} text
- * @param {string} reading
- * @returns {import('japanese-util').FuriganaSegment[]}
- */
- _getFuriganaKanaSegments(text, reading) {
- const textLength = text.length;
- const newSegments = [];
- let start = 0;
- let state = (reading[0] === text[0]);
- for (let i = 1; i < textLength; ++i) {
- const newState = (reading[i] === text[i]);
- if (state === newState) { continue; }
- newSegments.push(this._createFuriganaSegment(text.substring(start, i), state ? '' : reading.substring(start, i)));
- state = newState;
- start = i;
- }
- newSegments.push(this._createFuriganaSegment(text.substring(start, textLength), state ? '' : reading.substring(start, textLength)));
- return newSegments;
- }
-
- /**
- * @returns {import('wanakana')}
- * @throws {Error}
- */
- _getWanakana() {
- const wanakana = this._wanakana;
- if (wanakana === null) { throw new Error('Functions which use WanaKana are not supported in this context'); }
- return wanakana;
- }
-
- /**
- * @param {string} text
- * @param {?import('../../general/text-source-map.js').TextSourceMap} sourceMap
- * @param {number} sourceMapStart
- * @returns {string}
- */
- _convertAlphabeticPartToKana(text, sourceMap, sourceMapStart) {
- const wanakana = this._getWanakana();
- const result = wanakana.toHiragana(text);
-
- // Generate source mapping
- if (sourceMap !== null) {
- let i = 0;
- let resultPos = 0;
- const ii = text.length;
- while (i < ii) {
- // Find smallest matching substring
- let iNext = i + 1;
- let resultPosNext = result.length;
- while (iNext < ii) {
- const t = wanakana.toHiragana(text.substring(0, iNext));
- if (t === result.substring(0, t.length)) {
- resultPosNext = t.length;
- break;
- }
- ++iNext;
- }
-
- // Merge characters
- const removals = iNext - i - 1;
- if (removals > 0) {
- sourceMap.combine(sourceMapStart, removals);
- }
- ++sourceMapStart;
-
- // Empty elements
- const additions = resultPosNext - resultPos - 1;
- for (let j = 0; j < additions; ++j) {
- sourceMap.insert(sourceMapStart, 0);
- ++sourceMapStart;
- }
-
- i = iNext;
- resultPos = resultPosNext;
- }
- }
-
- return result;
- }
-
- /**
- * @param {string} text1
- * @param {string} text2
- * @returns {number}
- */
- _getStemLength(text1, text2) {
- const minLength = Math.min(text1.length, text2.length);
- if (minLength === 0) { return 0; }
-
- let i = 0;
- while (true) {
- const char1 = /** @type {number} */ (text1.codePointAt(i));
- const char2 = /** @type {number} */ (text2.codePointAt(i));
- if (char1 !== char2) { break; }
- const charLength = String.fromCodePoint(char1).length;
- i += charLength;
- if (i >= minLength) {
- if (i > minLength) {
- i -= charLength; // Don't consume partial UTF16 surrogate characters
- }
- break;
- }
- }
- return i;
- }
-}
diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js
index cedc7d3d..66eeb69f 100644
--- a/ext/js/language/translator.js
+++ b/ext/js/language/translator.js
@@ -19,6 +19,8 @@
import {RegexUtil} from '../general/regex-util.js';
import {TextSourceMap} from '../general/text-source-map.js';
import {Deinflector} from './deinflector.js';
+import {convertAlphabeticToKana} from './japanese-wanakana.js';
+import {collapseEmphaticSequences, convertHalfWidthKanaToFullWidth, convertHiraganaToKatakana, convertKatakanaToHiragana, convertNumericToFullWidth, isCodePointJapanese} from './japanese.js';
/**
* Class which finds term and kanji dictionary entries for text.
@@ -28,9 +30,7 @@ export class Translator {
* Creates a new Translator instance.
* @param {import('translator').ConstructorDetails} details The details for the class.
*/
- constructor({japaneseUtil, database}) {
- /** @type {import('./sandbox/japanese-util.js').JapaneseUtil} */
- this._japaneseUtil = japaneseUtil;
+ constructor({database}) {
/** @type {import('../dictionary/dictionary-database.js').DictionaryDatabase} */
this._database = database;
/** @type {?Deinflector} */
@@ -436,7 +436,6 @@ export class Translator {
this._getCollapseEmphaticOptions(options)
];
- const jp = this._japaneseUtil;
/** @type {import('translation-internal').DatabaseDeinflection[]} */
const deinflections = [];
const used = new Set();
@@ -447,22 +446,22 @@ export class Translator {
text2 = this._applyTextReplacements(text2, sourceMap, textReplacements);
}
if (halfWidth) {
- text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMap);
+ text2 = convertHalfWidthKanaToFullWidth(text2, sourceMap);
}
if (numeric) {
- text2 = jp.convertNumericToFullWidth(text2);
+ text2 = convertNumericToFullWidth(text2);
}
if (alphabetic) {
- text2 = jp.convertAlphabeticToKana(text2, sourceMap);
+ text2 = convertAlphabeticToKana(text2, sourceMap);
}
if (katakana) {
- text2 = jp.convertHiraganaToKatakana(text2);
+ text2 = convertHiraganaToKatakana(text2);
}
if (hiragana) {
- text2 = jp.convertKatakanaToHiragana(text2);
+ text2 = convertKatakanaToHiragana(text2);
}
if (collapseEmphatic) {
- text2 = jp.collapseEmphaticSequences(text2, collapseEmphaticFull, sourceMap);
+ text2 = collapseEmphaticSequences(text2, collapseEmphaticFull, sourceMap);
}
for (
@@ -519,10 +518,9 @@ export class Translator {
* @returns {string}
*/
_getJapaneseOnlyText(text) {
- const jp = this._japaneseUtil;
let length = 0;
for (const c of text) {
- if (!jp.isCodePointJapanese(/** @type {number} */ (c.codePointAt(0)))) {
+ if (!isCodePointJapanese(/** @type {number} */ (c.codePointAt(0)))) {
return text.substring(0, length);
}
length += c.length;
diff --git a/ext/js/media/audio-downloader.js b/ext/js/media/audio-downloader.js
index 3a3b21d0..b4f63b96 100644
--- a/ext/js/media/audio-downloader.js
+++ b/ext/js/media/audio-downloader.js
@@ -23,14 +23,13 @@ import {JsonSchema} from '../data/json-schema.js';
import {ArrayBufferUtil} from '../data/sandbox/array-buffer-util.js';
import {NativeSimpleDOMParser} from '../dom/native-simple-dom-parser.js';
import {SimpleDOMParser} from '../dom/simple-dom-parser.js';
+import {isStringEntirelyKana} from '../language/japanese.js';
export class AudioDownloader {
/**
- * @param {{japaneseUtil: import('../language/sandbox/japanese-util.js').JapaneseUtil, requestBuilder: RequestBuilder}} details
+ * @param {{requestBuilder: RequestBuilder}} details
*/
- constructor({japaneseUtil, requestBuilder}) {
- /** @type {import('../language/sandbox/japanese-util.js').JapaneseUtil} */
- this._japaneseUtil = japaneseUtil;
+ constructor({requestBuilder}) {
/** @type {RequestBuilder} */
this._requestBuilder = requestBuilder;
/** @type {?JsonSchema} */
@@ -111,7 +110,7 @@ export class AudioDownloader {
/** @type {import('audio-downloader').GetInfoHandler} */
async _getInfoJpod101(term, reading) {
- if (reading === term && this._japaneseUtil.isStringEntirelyKana(term)) {
+ if (reading === term && isStringEntirelyKana(term)) {
reading = term;
term = '';
}
diff --git a/ext/js/pages/settings/anki-templates-controller.js b/ext/js/pages/settings/anki-templates-controller.js
index 910e99ac..869c9e16 100644
--- a/ext/js/pages/settings/anki-templates-controller.js
+++ b/ext/js/pages/settings/anki-templates-controller.js
@@ -20,7 +20,6 @@ import {ExtensionError} from '../../core/extension-error.js';
import {toError} from '../../core/to-error.js';
import {AnkiNoteBuilder} from '../../data/anki-note-builder.js';
import {querySelectorNotNull} from '../../dom/query-selector.js';
-import {JapaneseUtil} from '../../language/sandbox/japanese-util.js';
import {TemplateRendererProxy} from '../../templates/template-renderer-proxy.js';
import {yomitan} from '../../yomitan.js';
@@ -56,7 +55,7 @@ export class AnkiTemplatesController {
/** @type {?import('./modal.js').Modal} */
this._fieldTemplateResetModal = null;
/** @type {AnkiNoteBuilder} */
- this._ankiNoteBuilder = new AnkiNoteBuilder(new JapaneseUtil(null), new TemplateRendererProxy());
+ this._ankiNoteBuilder = new AnkiNoteBuilder(new TemplateRendererProxy());
}
/** */
diff --git a/ext/js/templates/sandbox/anki-template-renderer.js b/ext/js/templates/sandbox/anki-template-renderer.js
index ef2c1610..52087336 100644
--- a/ext/js/templates/sandbox/anki-template-renderer.js
+++ b/ext/js/templates/sandbox/anki-template-renderer.js
@@ -22,7 +22,7 @@ import {DictionaryDataUtil} from '../../dictionary/dictionary-data-util.js';
import {PronunciationGenerator} from '../../display/sandbox/pronunciation-generator.js';
import {StructuredContentGenerator} from '../../display/sandbox/structured-content-generator.js';
import {CssStyleApplier} from '../../dom/sandbox/css-style-applier.js';
-import {JapaneseUtil} from '../../language/sandbox/japanese-util.js';
+import {convertHiraganaToKatakana, convertKatakanaToHiragana, distributeFurigana, getKanaMorae, getPitchCategory, isMoraPitchHigh} from '../../language/japanese.js';
import {AnkiTemplateRendererContentManager} from './anki-template-renderer-content-manager.js';
import {TemplateRendererMediaProvider} from './template-renderer-media-provider.js';
import {TemplateRenderer} from './template-renderer.js';
@@ -42,16 +42,14 @@ export class AnkiTemplateRenderer {
this._pronunciationStyleApplier = new CssStyleApplier('/data/pronunciation-style.json');
/** @type {RegExp} */
this._structuredContentDatasetKeyIgnorePattern = /^sc([^a-z]|$)/;
- /** @type {JapaneseUtil} */
- this._japaneseUtil = new JapaneseUtil(null);
/** @type {TemplateRenderer} */
this._templateRenderer = new TemplateRenderer();
/** @type {AnkiNoteDataCreator} */
- this._ankiNoteDataCreator = new AnkiNoteDataCreator(this._japaneseUtil);
+ this._ankiNoteDataCreator = new AnkiNoteDataCreator();
/** @type {TemplateRendererMediaProvider} */
this._mediaProvider = new TemplateRendererMediaProvider();
/** @type {PronunciationGenerator} */
- this._pronunciationGenerator = new PronunciationGenerator(this._japaneseUtil);
+ this._pronunciationGenerator = new PronunciationGenerator();
/** @type {?(Map<string, unknown>[])} */
this._stateStack = null;
/** @type {?import('anki-note-builder').Requirement[]} */
@@ -171,7 +169,7 @@ export class AnkiTemplateRenderer {
/** @type {import('template-renderer').HelperFunction<string>} */
_furigana(args, context, options) {
const {expression, reading} = this._getFuriganaExpressionAndReading(args, context, options);
- const segments = this._japaneseUtil.distributeFurigana(expression, reading);
+ const segments = distributeFurigana(expression, reading);
let result = '';
for (const {text, reading: reading2} of segments) {
@@ -190,7 +188,7 @@ export class AnkiTemplateRenderer {
/** @type {import('template-renderer').HelperFunction<string>} */
_furiganaPlain(args, context, options) {
const {expression, reading} = this._getFuriganaExpressionAndReading(args, context, options);
- const segments = this._japaneseUtil.distributeFurigana(expression, reading);
+ const segments = distributeFurigana(expression, reading);
let result = '';
for (const {text, reading: reading2} of segments) {
@@ -512,13 +510,13 @@ export class AnkiTemplateRenderer {
/** @type {import('template-renderer').HelperFunction<boolean>} */
_isMoraPitchHigh(args) {
const [index, position] = /** @type {[index: number, position: number]} */ (args);
- return this._japaneseUtil.isMoraPitchHigh(index, position);
+ return isMoraPitchHigh(index, position);
}
/** @type {import('template-renderer').HelperFunction<string[]>} */
_getKanaMorae(args) {
const [text] = /** @type {[text: string]} */ (args);
- return this._japaneseUtil.getKanaMorae(`${text}`);
+ return getKanaMorae(`${text}`);
}
/** @type {import('template-renderer').HelperFunction<import('core').TypeofResult>} */
@@ -555,7 +553,7 @@ export class AnkiTemplateRenderer {
const isVerbOrAdjective = DictionaryDataUtil.isNonNounVerbOrAdjective(wordClasses);
const pitches = DictionaryDataUtil.getPronunciationsOfType(pronunciations, 'pitch-accent');
for (const {position} of pitches) {
- const category = this._japaneseUtil.getPitchCategory(reading, position, isVerbOrAdjective);
+ const category = getPitchCategory(reading, position, isVerbOrAdjective);
if (category !== null) {
categories.add(category);
}
@@ -666,7 +664,7 @@ export class AnkiTemplateRenderer {
*/
_createStructuredContentGenerator(data) {
const contentManager = new AnkiTemplateRendererContentManager(this._mediaProvider, data);
- const instance = new StructuredContentGenerator(contentManager, this._japaneseUtil, document);
+ const instance = new StructuredContentGenerator(contentManager, document);
this._cleanupCallbacks.push(() => contentManager.unloadAll());
return instance;
}
@@ -735,7 +733,7 @@ export class AnkiTemplateRenderer {
if (typeof downstepPosition !== 'number') { return ''; }
if (!Array.isArray(nasalPositions)) { nasalPositions = []; }
if (!Array.isArray(devoicePositions)) { devoicePositions = []; }
- const morae = this._japaneseUtil.getKanaMorae(reading);
+ const morae = getKanaMorae(reading);
switch (format) {
case 'text':
@@ -756,7 +754,7 @@ export class AnkiTemplateRenderer {
const ii = args.length;
const {keepProlongedSoundMarks} = options.hash;
const value = (ii > 0 ? args[0] : this._computeValue(options, context));
- return typeof value === 'string' ? this._japaneseUtil.convertKatakanaToHiragana(value, keepProlongedSoundMarks === true) : '';
+ return typeof value === 'string' ? convertKatakanaToHiragana(value, keepProlongedSoundMarks === true) : '';
}
/**
@@ -765,7 +763,7 @@ export class AnkiTemplateRenderer {
_katakana(args, context, options) {
const ii = args.length;
const value = (ii > 0 ? args[0] : this._computeValue(options, context));
- return typeof value === 'string' ? this._japaneseUtil.convertHiraganaToKatakana(value) : '';
+ return typeof value === 'string' ? convertHiraganaToKatakana(value) : '';
}
/**
diff --git a/test/fixtures/translator-test.js b/test/fixtures/translator-test.js
index f162972d..6562931c 100644
--- a/test/fixtures/translator-test.js
+++ b/test/fixtures/translator-test.js
@@ -26,7 +26,6 @@ import {createDictionaryArchive} from '../../dev/util.js';
import {AnkiNoteDataCreator} from '../../ext/js/data/sandbox/anki-note-data-creator.js';
import {DictionaryDatabase} from '../../ext/js/dictionary/dictionary-database.js';
import {DictionaryImporter} from '../../ext/js/dictionary/dictionary-importer.js';
-import {JapaneseUtil} from '../../ext/js/language/sandbox/japanese-util.js';
import {Translator} from '../../ext/js/language/translator.js';
import {chrome, fetch} from '../mocks/common.js';
import {DictionaryImporterMediaLoader} from '../mocks/dictionary-importer-media-loader.js';
@@ -65,14 +64,13 @@ async function createTranslatorContext(dictionaryDirectory, dictionaryName) {
expect(errors.length).toEqual(0);
// Setup translator
- const japaneseUtil = new JapaneseUtil(null);
- const translator = new Translator({japaneseUtil, database: dictionaryDatabase});
+ const translator = new Translator({database: dictionaryDatabase});
/** @type {import('deinflector').ReasonsRaw} */
const deinflectionReasons = parseJson(readFileSync(deinflectionReasonsPath, {encoding: 'utf8'}));
translator.prepare(deinflectionReasons);
// Assign properties
- const ankiNoteDataCreator = new AnkiNoteDataCreator(japaneseUtil);
+ const ankiNoteDataCreator = new AnkiNoteDataCreator();
return {translator, ankiNoteDataCreator};
}
diff --git a/test/japanese-util.test.js b/test/japanese-util.test.js
index ab14f209..d7b05c3e 100644
--- a/test/japanese-util.test.js
+++ b/test/japanese-util.test.js
@@ -18,10 +18,8 @@
import {describe, expect, test} from 'vitest';
import {TextSourceMap} from '../ext/js/general/text-source-map.js';
-import {JapaneseUtil} from '../ext/js/language/sandbox/japanese-util.js';
-import * as wanakana from '../ext/lib/wanakana.js';
-
-const jp = new JapaneseUtil(wanakana);
+import * as jpw from '../ext/js/language/japanese-wanakana.js';
+import * as jp from '../ext/js/language/japanese.js';
/** */
function testIsCodePointKanji() {
@@ -199,7 +197,7 @@ function testConvertToRomaji() {
];
test.each(data)('%s -> %o', (string, expected) => {
- expect(jp.convertToRomaji(string)).toStrictEqual(expected);
+ expect(jpw.convertToRomaji(string)).toStrictEqual(expected);
});
});
}
@@ -268,8 +266,8 @@ function testConvertAlphabeticToKana() {
for (const [string, expected, expectedSourceMapping] of data) {
test(`${string} -> ${string}${typeof expectedSourceMapping !== 'undefined' ? ', ' + JSON.stringify(expectedSourceMapping) : ''}`, () => {
const sourceMap = new TextSourceMap(string);
- const actual1 = jp.convertAlphabeticToKana(string, null);
- const actual2 = jp.convertAlphabeticToKana(string, sourceMap);
+ const actual1 = jpw.convertAlphabeticToKana(string, null);
+ const actual2 = jpw.convertAlphabeticToKana(string, sourceMap);
expect(actual1).toStrictEqual(expected);
expect(actual2).toStrictEqual(expected);
if (typeof expectedSourceMapping !== 'undefined') {
diff --git a/test/utilities/anki.js b/test/utilities/anki.js
index 322acb0d..e30d578f 100644
--- a/test/utilities/anki.js
+++ b/test/utilities/anki.js
@@ -16,7 +16,6 @@
*/
import {AnkiNoteBuilder} from '../../ext/js/data/anki-note-builder.js';
-import {JapaneseUtil} from '../../ext/js/language/sandbox/japanese-util.js';
import {AnkiTemplateRenderer} from '../../ext/js/templates/sandbox/anki-template-renderer.js';
/**
@@ -130,7 +129,6 @@ export async function getTemplateRenderResults(dictionaryEntries, type, mode, te
const ankiTemplateRenderer = new AnkiTemplateRenderer();
await ankiTemplateRenderer.prepare();
- const japaneseUtil = new JapaneseUtil(null);
const clozePrefix = 'cloze-prefix';
const clozeSuffix = 'cloze-suffix';
const results = [];
@@ -146,7 +144,7 @@ export async function getTemplateRenderResults(dictionaryEntries, type, mode, te
}
break;
}
- const ankiNoteBuilder = new AnkiNoteBuilder(japaneseUtil, ankiTemplateRenderer.templateRenderer);
+ const ankiNoteBuilder = new AnkiNoteBuilder(ankiTemplateRenderer.templateRenderer);
const context = {
url: 'url:',
sentence: {
diff --git a/types/ext/display.d.ts b/types/ext/display.d.ts
index 86662659..b11d54e1 100644
--- a/types/ext/display.d.ts
+++ b/types/ext/display.d.ts
@@ -17,7 +17,6 @@
import type {DisplayContentManager} from '../../ext/js/display/display-content-manager';
import type {HotkeyHelpController} from '../../ext/js/input/hotkey-help-controller';
-import type {JapaneseUtil} from '../../ext/js/language/sandbox/japanese-util';
import type * as Dictionary from './dictionary';
import type * as Extension from './extension';
import type * as Settings from './settings';
@@ -128,7 +127,6 @@ export type GetSearchContextCallback = TextScannerTypes.GetSearchContextCallback
export type QueryParserConstructorDetails = {
getSearchContext: GetSearchContextCallback;
- japaneseUtil: JapaneseUtil;
};
export type QueryParserOptions = {
@@ -169,7 +167,6 @@ export type Events = {
export type EventArgument<TName extends EventNames<Events>> = BaseEventArgument<Events, TName>;
export type DisplayGeneratorConstructorDetails = {
- japaneseUtil: JapaneseUtil;
contentManager: DisplayContentManager;
hotkeyHelpController?: HotkeyHelpController | null;
};
diff --git a/types/ext/translator.d.ts b/types/ext/translator.d.ts
index 65a77e90..5d552ca8 100644
--- a/types/ext/translator.d.ts
+++ b/types/ext/translator.d.ts
@@ -16,13 +16,10 @@
*/
import type {DictionaryDatabase} from '../../ext/js/dictionary/dictionary-database';
-import type {JapaneseUtil} from '../../ext/js/language/sandbox/japanese-util';
import type * as Dictionary from './dictionary';
import type * as DictionaryDatabaseTypes from './dictionary-database';
export type ConstructorDetails = {
- /** An instance of JapaneseUtil. */
- japaneseUtil: JapaneseUtil;
/** An instance of DictionaryDatabase. */
database: DictionaryDatabase;
};