/*
* Copyright (C) 2024 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
import {log} from '../core/log.js';
export class LanguageTransformer {
constructor() {
/** @type {number} */
this._nextFlagIndex = 0;
/** @type {import('language-transformer-internal').Transform[]} */
this._transforms = [];
/** @type {Map} */
this._conditionTypeToConditionFlagsMap = new Map();
/** @type {Map} */
this._partOfSpeechToConditionFlagsMap = new Map();
}
/** */
clear() {
this._nextFlagIndex = 0;
this._transforms = [];
this._conditionTypeToConditionFlagsMap.clear();
this._partOfSpeechToConditionFlagsMap.clear();
}
/**
* Note: this function does not currently combine properly with previous descriptors,
* they are treated as completely separate collections. This should eventually be changed.
* @param {import('language-transformer').LanguageTransformDescriptor} descriptor
* @throws {Error}
*/
addDescriptor(descriptor) {
const {conditions, transforms} = descriptor;
const conditionEntries = Object.entries(conditions);
const {conditionFlagsMap, nextFlagIndex} = this._getConditionFlagsMap(conditionEntries, this._nextFlagIndex);
/** @type {import('language-transformer-internal').Transform[]} */
const transforms2 = [];
for (const [transformId, transform] of Object.entries(transforms)) {
const {name, description, rules} = transform;
/** @type {import('language-transformer-internal').Rule[]} */
const rules2 = [];
for (let j = 0, jj = rules.length; j < jj; ++j) {
const {type, isInflected, deinflect, conditionsIn, conditionsOut} = rules[j];
const conditionFlagsIn = this._getConditionFlagsStrict(conditionFlagsMap, conditionsIn);
if (conditionFlagsIn === null) { throw new Error(`Invalid conditionsIn for transform ${transformId}.rules[${j}]`); }
const conditionFlagsOut = this._getConditionFlagsStrict(conditionFlagsMap, conditionsOut);
if (conditionFlagsOut === null) { throw new Error(`Invalid conditionsOut for transform ${transformId}.rules[${j}]`); }
rules2.push({
type,
isInflected,
deinflect,
conditionsIn: conditionFlagsIn,
conditionsOut: conditionFlagsOut,
});
}
const isInflectedTests = rules.map((rule) => rule.isInflected);
const heuristic = new RegExp(isInflectedTests.map((regExp) => regExp.source).join('|'));
transforms2.push({id: transformId, name, description, rules: rules2, heuristic});
}
this._nextFlagIndex = nextFlagIndex;
for (const transform of transforms2) {
this._transforms.push(transform);
}
for (const [type, {isDictionaryForm}] of conditionEntries) {
const flags = conditionFlagsMap.get(type);
if (typeof flags === 'undefined') { continue; } // This case should never happen
this._conditionTypeToConditionFlagsMap.set(type, flags);
if (isDictionaryForm) {
this._partOfSpeechToConditionFlagsMap.set(type, flags);
}
}
}
/**
* @param {string[]} partsOfSpeech
* @returns {number}
*/
getConditionFlagsFromPartsOfSpeech(partsOfSpeech) {
return this._getConditionFlags(this._partOfSpeechToConditionFlagsMap, partsOfSpeech);
}
/**
* @param {string[]} conditionTypes
* @returns {number}
*/
getConditionFlagsFromConditionTypes(conditionTypes) {
return this._getConditionFlags(this._conditionTypeToConditionFlagsMap, conditionTypes);
}
/**
* @param {string} conditionType
* @returns {number}
*/
getConditionFlagsFromConditionType(conditionType) {
return this._getConditionFlags(this._conditionTypeToConditionFlagsMap, [conditionType]);
}
/**
* @param {string} sourceText
* @returns {import('language-transformer-internal').TransformedText[]}
*/
transform(sourceText) {
const results = [LanguageTransformer.createTransformedText(sourceText, 0, [])];
for (let i = 0; i < results.length; ++i) {
const {text, conditions, trace} = results[i];
for (const transform of this._transforms) {
if (!transform.heuristic.test(text)) { continue; }
const {id, rules} = transform;
for (let j = 0, jj = rules.length; j < jj; ++j) {
const rule = rules[j];
if (!LanguageTransformer.conditionsMatch(conditions, rule.conditionsIn)) { continue; }
const {isInflected, deinflect} = rule;
if (!isInflected.test(text)) { continue; }
const isCycle = trace.some((frame) => frame.transform === id && frame.ruleIndex === j && frame.text === text);
if (isCycle) {
log.warn(new Error(`Cycle detected in transform[${name}] rule[${j}] for text: ${text}\nTrace: ${JSON.stringify(trace)}`));
continue;
}
results.push(LanguageTransformer.createTransformedText(
deinflect(text),
rule.conditionsOut,
this._extendTrace(trace, {transform: id, ruleIndex: j, text}),
));
}
}
}
return results;
}
/**
* @param {string[]} inflectionRules
* @returns {import('dictionary').InflectionRuleChain}
*/
getUserFacingInflectionRules(inflectionRules) {
return inflectionRules.map((rule) => {
const fullRule = this._transforms.find((transform) => transform.id === rule);
if (typeof fullRule === 'undefined') { return {name: rule}; }
const {name, description} = fullRule;
return description ? {name, description} : {name};
});
}
/**
* @param {string} text
* @param {number} conditions
* @param {import('language-transformer-internal').Trace} trace
* @returns {import('language-transformer-internal').TransformedText}
*/
static createTransformedText(text, conditions, trace) {
return {text, conditions, trace};
}
/**
* If `currentConditions` is `0`, then `nextConditions` is ignored and `true` is returned.
* Otherwise, there must be at least one shared condition between `currentConditions` and `nextConditions`.
* @param {number} currentConditions
* @param {number} nextConditions
* @returns {boolean}
*/
static conditionsMatch(currentConditions, nextConditions) {
return currentConditions === 0 || (currentConditions & nextConditions) !== 0;
}
/**
* @param {import('language-transformer').ConditionMapEntries} conditions
* @param {number} nextFlagIndex
* @returns {{conditionFlagsMap: Map, nextFlagIndex: number}}
* @throws {Error}
*/
_getConditionFlagsMap(conditions, nextFlagIndex) {
/** @type {Map} */
const conditionFlagsMap = new Map();
/** @type {import('language-transformer').ConditionMapEntries} */
let targets = conditions;
while (targets.length > 0) {
const nextTargets = [];
for (const target of targets) {
const [type, condition] = target;
const {subConditions} = condition;
let flags = 0;
if (typeof subConditions === 'undefined') {
if (nextFlagIndex >= 32) {
// Flags greater than or equal to 32 don't work because JavaScript only supports up to 32-bit integer operations
throw new Error('Maximum number of conditions was exceeded');
}
flags = 1 << nextFlagIndex;
++nextFlagIndex;
} else {
const multiFlags = this._getConditionFlagsStrict(conditionFlagsMap, subConditions);
if (multiFlags === null) {
nextTargets.push(target);
continue;
} else {
flags = multiFlags;
}
}
conditionFlagsMap.set(type, flags);
}
if (nextTargets.length === targets.length) {
// Cycle in subRule declaration
throw new Error('Maximum number of conditions was exceeded');
}
targets = nextTargets;
}
return {conditionFlagsMap, nextFlagIndex};
}
/**
* @param {Map} conditionFlagsMap
* @param {string[]} conditionTypes
* @returns {?number}
*/
_getConditionFlagsStrict(conditionFlagsMap, conditionTypes) {
let flags = 0;
for (const conditionType of conditionTypes) {
const flags2 = conditionFlagsMap.get(conditionType);
if (typeof flags2 === 'undefined') {
return null;
}
flags |= flags2;
}
return flags;
}
/**
* @param {Map} conditionFlagsMap
* @param {string[]} conditionTypes
* @returns {number}
*/
_getConditionFlags(conditionFlagsMap, conditionTypes) {
let flags = 0;
for (const conditionType of conditionTypes) {
let flags2 = conditionFlagsMap.get(conditionType);
if (typeof flags2 === 'undefined') {
flags2 = 0;
}
flags |= flags2;
}
return flags;
}
/**
* @param {import('language-transformer-internal').Trace} trace
* @param {import('language-transformer-internal').TraceFrame} newFrame
* @returns {import('language-transformer-internal').Trace}
*/
_extendTrace(trace, newFrame) {
const newTrace = [newFrame];
for (const {transform, ruleIndex, text} of trace) {
newTrace.push({transform, ruleIndex, text});
}
return newTrace;
}
}