/* * Copyright (C) 2024 Yomitan Authors * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ import {escapeRegExp} from '../core/utilities.js'; export class LanguageTransformer { constructor() { /** @type {number} */ this._nextFlagIndex = 0; /** @type {import('language-transformer-internal').Transform[]} */ this._transforms = []; /** @type {Map} */ this._conditionTypeToConditionFlagsMap = new Map(); /** @type {Map} */ this._partOfSpeechToConditionFlagsMap = new Map(); } /** */ clear() { this._nextFlagIndex = 0; this._transforms = []; this._conditionTypeToConditionFlagsMap.clear(); this._partOfSpeechToConditionFlagsMap.clear(); } /** * Note: this function does not currently combine properly with previous descriptors, * they are treated as completely separate collections. This should eventually be changed. * @param {import('language-transformer').LanguageTransformDescriptor} descriptor * @throws {Error} */ addDescriptor(descriptor) { const {conditions, transforms} = descriptor; const conditionEntries = Object.entries(conditions); const {conditionFlagsMap, nextFlagIndex} = this._getConditionFlagsMap(conditionEntries, this._nextFlagIndex); /** @type {import('language-transformer-internal').Transform[]} */ const transforms2 = []; for (let i = 0, ii = transforms.length; i < ii; ++i) { const {name, rules} = transforms[i]; /** @type {import('language-transformer-internal').Rule[]} */ const rules2 = []; for (let j = 0, jj = rules.length; j < jj; ++j) { const {suffixIn, suffixOut, conditionsIn, conditionsOut} = rules[j]; const conditionFlagsIn = this._getConditionFlags(conditionFlagsMap, conditionsIn); if (conditionFlagsIn === null) { throw new Error(`Invalid conditionsIn for transform[${i}].rules[${j}]`); } const conditionFlagsOut = this._getConditionFlags(conditionFlagsMap, conditionsOut); if (conditionFlagsOut === null) { throw new Error(`Invalid conditionsOut for transform[${i}].rules[${j}]`); } rules2.push({ suffixIn, suffixOut, conditionsIn: conditionFlagsIn, conditionsOut: conditionFlagsOut }); } const suffixes = rules.map((rule) => rule.suffixIn); const suffixHeuristic = new RegExp(`(${suffixes.map((suffix) => escapeRegExp(suffix)).join('|')})$`); transforms2.push({name, rules: rules2, suffixHeuristic}); } this._nextFlagIndex = nextFlagIndex; for (const transform of transforms2) { this._transforms.push(transform); } for (const [type, condition] of conditionEntries) { const flags = conditionFlagsMap.get(type); if (typeof flags === 'undefined') { continue; } // This case should never happen this._conditionTypeToConditionFlagsMap.set(type, flags); for (const partOfSpeech of condition.partsOfSpeech) { this._partOfSpeechToConditionFlagsMap.set(partOfSpeech, this.getConditionFlagsFromPartOfSpeech(partOfSpeech) | flags); } } } /** * @param {string} partOfSpeech * @returns {number} */ getConditionFlagsFromPartOfSpeech(partOfSpeech) { const conditionFlags = this._partOfSpeechToConditionFlagsMap.get(partOfSpeech); return typeof conditionFlags !== 'undefined' ? conditionFlags : 0; } /** * @param {string[]} partsOfSpeech * @returns {number} */ getConditionFlagsFromPartsOfSpeech(partsOfSpeech) { let result = 0; for (const partOfSpeech of partsOfSpeech) { result |= this.getConditionFlagsFromPartOfSpeech(partOfSpeech); } return result; } /** * @param {string} conditionType * @returns {number} */ getConditionFlagsFromConditionType(conditionType) { const conditionFlags = this._conditionTypeToConditionFlagsMap.get(conditionType); return typeof conditionFlags !== 'undefined' ? conditionFlags : 0; } /** * @param {string[]} conditionTypes * @returns {number} */ getConditionFlagsFromConditionTypes(conditionTypes) { let result = 0; for (const conditionType of conditionTypes) { result |= this.getConditionFlagsFromConditionType(conditionType); } return result; } /** * @param {string} sourceText * @returns {import('language-transformer-internal').TransformedText[]} */ transform(sourceText) { const results = [this._createTransformedText(sourceText, 0, [])]; for (let i = 0; i < results.length; ++i) { const {text, conditions, trace} = results[i]; for (const transform of this._transforms) { if (!transform.suffixHeuristic.test(text)) { continue; } const {name, rules} = transform; for (let j = 0, jj = rules.length; j < jj; ++j) { const rule = rules[j]; if (!LanguageTransformer.conditionsMatch(conditions, rule.conditionsIn)) { continue; } const {suffixIn, suffixOut} = rule; if (!text.endsWith(suffixIn) || (text.length - suffixIn.length + suffixOut.length) <= 0) { continue; } results.push(this._createTransformedText( text.substring(0, text.length - suffixIn.length) + suffixOut, rule.conditionsOut, this._extendTrace(trace, {transform: name, ruleIndex: j}) )); } } } return results; } /** * @param {import('language-transformer').ConditionMapEntries} conditions * @param {number} nextFlagIndex * @returns {{conditionFlagsMap: Map, nextFlagIndex: number}} * @throws {Error} */ _getConditionFlagsMap(conditions, nextFlagIndex) { /** @type {Map} */ const conditionFlagsMap = new Map(); /** @type {import('language-transformer').ConditionMapEntries} */ let targets = conditions; while (targets.length > 0) { const nextTargets = []; for (const target of targets) { const [type, condition] = target; const {subConditions} = condition; let flags = 0; if (typeof subConditions === 'undefined') { if (nextFlagIndex >= 32) { // Flags greater than or equal to 32 don't work because JavaScript only supports up to 32-bit integer operations throw new Error('Maximum number of conditions was exceeded'); } flags = 1 << nextFlagIndex; ++nextFlagIndex; } else { const multiFlags = this._getConditionFlags(conditionFlagsMap, subConditions); if (multiFlags === null) { nextTargets.push(target); continue; } else { flags = multiFlags; } } conditionFlagsMap.set(type, flags); } if (nextTargets.length === targets.length) { // Cycle in subRule declaration throw new Error('Maximum number of conditions was exceeded'); } targets = nextTargets; } return {conditionFlagsMap, nextFlagIndex}; } /** * @param {Map} conditionFlagsMap * @param {string[]} conditionTypes * @returns {?number} */ _getConditionFlags(conditionFlagsMap, conditionTypes) { let flags = 0; for (const conditionType of conditionTypes) { const flags2 = conditionFlagsMap.get(conditionType); if (typeof flags2 === 'undefined') { return null; } flags |= flags2; } return flags; } /** * @param {string} text * @param {number} conditions * @param {import('language-transformer-internal').Trace} trace * @returns {import('language-transformer-internal').TransformedText} */ _createTransformedText(text, conditions, trace) { return {text, conditions, trace}; } /** * @param {import('language-transformer-internal').Trace} trace * @param {import('language-transformer-internal').TraceFrame} newFrame * @returns {import('language-transformer-internal').Trace} */ _extendTrace(trace, newFrame) { const newTrace = [newFrame]; for (const {transform, ruleIndex} of trace) { newTrace.push({transform, ruleIndex}); } return newTrace; } /** * If `currentConditions` is `0`, then `nextConditions` is ignored and `true` is returned. * Otherwise, there must be at least one shared condition between `currentConditions` and `nextConditions`. * @param {number} currentConditions * @param {number} nextConditions * @returns {boolean} */ static conditionsMatch(currentConditions, nextConditions) { return currentConditions === 0 || (currentConditions & nextConditions) !== 0; } }