diff options
author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2024-01-31 08:28:05 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-31 13:28:05 +0000 |
commit | 3e419aa562aab03ca20421aaf7e4d1a39194a5b4 (patch) | |
tree | 15e8bfe81fa5e3fae55e54802f14d94a7502a469 /ext/js/language/language-transformer.js | |
parent | 6807b05e9bd41f013364fae0cbcce83cf1ed37b6 (diff) |
Language transformer (#582)
* Set up new deinflection data file
* Define types
* Test
* Add internal types
* Set up loading for transforms
* Add getPartOfSpeechFlags
* Convert static methods
* Add note
* Add transform function
* Update trace structure
* Add a language tag to the language transform descriptor
* Add clear function
* Add function for multiple parts of speech
* Clarify naming
* Add getConditionFlagsFromConditionType
* Add plural function
* Replace usages of Deinflector
* Update tests
* Update config
* Remove old
* Rename
* Rename files
Diffstat (limited to 'ext/js/language/language-transformer.js')
-rw-r--r-- | ext/js/language/language-transformer.js | 245 |
1 files changed, 245 insertions, 0 deletions
diff --git a/ext/js/language/language-transformer.js b/ext/js/language/language-transformer.js new file mode 100644 index 00000000..c9e261ea --- /dev/null +++ b/ext/js/language/language-transformer.js @@ -0,0 +1,245 @@ +/* + * Copyright (C) 2024 Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +export class LanguageTransformer { + constructor() { + /** @type {number} */ + this._nextFlagIndex = 0; + /** @type {import('language-transformer-internal').Transform[]} */ + this._transforms = []; + /** @type {Map<string, number>} */ + this._conditionTypeToConditionFlagsMap = new Map(); + /** @type {Map<string, number>} */ + this._partOfSpeechToConditionFlagsMap = new Map(); + } + + /** */ + clear() { + this._nextFlagIndex = 0; + this._transforms = []; + this._conditionTypeToConditionFlagsMap.clear(); + this._partOfSpeechToConditionFlagsMap.clear(); + } + + /** + * Note: this function does not currently combine properly with previous descriptors, + * they are treated as completely separate collections. This should eventually be changed. + * @param {import('language-transformer').LanguageTransformDescriptor} descriptor + * @throws {Error} + */ + addDescriptor(descriptor) { + const {conditions, transforms} = descriptor; + const conditionEntries = Object.entries(conditions); + const {conditionFlagsMap, nextFlagIndex} = this._getConditionFlagsMap(conditionEntries, this._nextFlagIndex); + + /** @type {import('language-transformer-internal').Transform[]} */ + const transforms2 = []; + for (let i = 0, ii = transforms.length; i < ii; ++i) { + const {name, rules} = transforms[i]; + /** @type {import('language-transformer-internal').Rule[]} */ + const rules2 = []; + for (let j = 0, jj = rules.length; j < jj; ++j) { + const {suffixIn, suffixOut, conditionsIn, conditionsOut} = rules[j]; + const conditionFlagsIn = this._getConditionFlags(conditionFlagsMap, conditionsIn); + if (conditionFlagsIn === null) { throw new Error(`Invalid conditionsIn for transform[${i}].rules[${j}]`); } + const conditionFlagsOut = this._getConditionFlags(conditionFlagsMap, conditionsOut); + if (conditionFlagsOut === null) { throw new Error(`Invalid conditionsOut for transform[${i}].rules[${j}]`); } + rules2.push({ + suffixIn, + suffixOut, + conditionsIn: conditionFlagsIn, + conditionsOut: conditionFlagsOut + }); + } + transforms2.push({name, rules: rules2}); + } + + this._nextFlagIndex = nextFlagIndex; + for (const transform of transforms2) { + this._transforms.push(transform); + } + + for (const [type, condition] of conditionEntries) { + const flags = conditionFlagsMap.get(type); + if (typeof flags === 'undefined') { continue; } // This case should never happen + this._conditionTypeToConditionFlagsMap.set(type, flags); + for (const partOfSpeech of condition.partsOfSpeech) { + this._partOfSpeechToConditionFlagsMap.set(partOfSpeech, this.getConditionFlagsFromPartOfSpeech(partOfSpeech) | flags); + } + } + } + + /** + * @param {string} partOfSpeech + * @returns {number} + */ + getConditionFlagsFromPartOfSpeech(partOfSpeech) { + const conditionFlags = this._partOfSpeechToConditionFlagsMap.get(partOfSpeech); + return typeof conditionFlags !== 'undefined' ? conditionFlags : 0; + } + + /** + * @param {string[]} partsOfSpeech + * @returns {number} + */ + getConditionFlagsFromPartsOfSpeech(partsOfSpeech) { + let result = 0; + for (const partOfSpeech of partsOfSpeech) { + result |= this.getConditionFlagsFromPartOfSpeech(partOfSpeech); + } + return result; + } + + /** + * @param {string} conditionType + * @returns {number} + */ + getConditionFlagsFromConditionType(conditionType) { + const conditionFlags = this._conditionTypeToConditionFlagsMap.get(conditionType); + return typeof conditionFlags !== 'undefined' ? conditionFlags : 0; + } + + /** + * @param {string[]} conditionTypes + * @returns {number} + */ + getConditionFlagsFromConditionTypes(conditionTypes) { + let result = 0; + for (const conditionType of conditionTypes) { + result |= this.getConditionFlagsFromConditionType(conditionType); + } + return result; + } + + /** + * @param {string} sourceText + * @returns {import('language-transformer-internal').TransformedText[]} + */ + transform(sourceText) { + const results = [this._createTransformedText(sourceText, 0, [])]; + for (let i = 0; i < results.length; ++i) { + const {text, conditions, trace} = results[i]; + for (const {name, rules} of this._transforms) { + for (let j = 0, jj = rules.length; j < jj; ++j) { + const rule = rules[j]; + if (!LanguageTransformer.conditionsMatch(conditions, rule.conditionsIn)) { continue; } + const {suffixIn, suffixOut} = rule; + if (!text.endsWith(suffixIn) || (text.length - suffixIn.length + suffixOut.length) <= 0) { continue; } + results.push(this._createTransformedText( + text.substring(0, text.length - suffixIn.length) + suffixOut, + rule.conditionsOut, + this._extendTrace(trace, {transform: name, ruleIndex: j}) + )); + } + } + } + return results; + } + + /** + * @param {import('language-transformer').ConditionMapEntries} conditions + * @param {number} nextFlagIndex + * @returns {{conditionFlagsMap: Map<string, number>, nextFlagIndex: number}} + * @throws {Error} + */ + _getConditionFlagsMap(conditions, nextFlagIndex) { + /** @type {Map<string, number>} */ + const conditionFlagsMap = new Map(); + /** @type {import('language-transformer').ConditionMapEntries} */ + let targets = conditions; + while (targets.length > 0) { + const nextTargets = []; + for (const target of targets) { + const [type, condition] = target; + const {subConditions} = condition; + let flags = 0; + if (typeof subConditions === 'undefined') { + if (nextFlagIndex >= 32) { + // Flags greater than or equal to 32 don't work because JavaScript only supports up to 32-bit integer operations + throw new Error('Maximum number of conditions was exceeded'); + } + flags = 1 << nextFlagIndex; + ++nextFlagIndex; + } else { + const multiFlags = this._getConditionFlags(conditionFlagsMap, subConditions); + if (multiFlags === null) { + nextTargets.push(target); + continue; + } else { + flags = multiFlags; + } + } + conditionFlagsMap.set(type, flags); + } + if (nextTargets.length === targets.length) { + // Cycle in subRule declaration + throw new Error('Maximum number of conditions was exceeded'); + } + targets = nextTargets; + } + return {conditionFlagsMap, nextFlagIndex}; + } + + /** + * @param {Map<string, number>} conditionFlagsMap + * @param {string[]} conditionTypes + * @returns {?number} + */ + _getConditionFlags(conditionFlagsMap, conditionTypes) { + let flags = 0; + for (const conditionType of conditionTypes) { + const flags2 = conditionFlagsMap.get(conditionType); + if (typeof flags2 === 'undefined') { return null; } + flags |= flags2; + } + return flags; + } + + /** + * @param {string} text + * @param {number} conditions + * @param {import('language-transformer-internal').Trace} trace + * @returns {import('language-transformer-internal').TransformedText} + */ + _createTransformedText(text, conditions, trace) { + return {text, conditions, trace}; + } + + /** + * @param {import('language-transformer-internal').Trace} trace + * @param {import('language-transformer-internal').TraceFrame} newFrame + * @returns {import('language-transformer-internal').Trace} + */ + _extendTrace(trace, newFrame) { + const newTrace = [newFrame]; + for (const {transform, ruleIndex} of trace) { + newTrace.push({transform, ruleIndex}); + } + return newTrace; + } + + /** + * If `currentConditions` is `0`, then `nextConditions` is ignored and `true` is returned. + * Otherwise, there must be at least one shared condition between `currentConditions` and `nextConditions`. + * @param {number} currentConditions + * @param {number} nextConditions + * @returns {boolean} + */ + static conditionsMatch(currentConditions, nextConditions) { + return currentConditions === 0 || (currentConditions & nextConditions) !== 0; + } +} |