aboutsummaryrefslogtreecommitdiff
path: root/ext/js/language/language-transformer.js
diff options
context:
space:
mode:
authorStefanVukovic99 <stefanvukovic44@gmail.com>2024-03-22 15:27:35 +0100
committerGitHub <noreply@github.com>2024-03-22 14:27:35 +0000
commitd2e9841f96ebff61d4a5c26a322484f6268115f1 (patch)
tree3c1351fa3b25994eab8456ea8457ee153bb98aa9 /ext/js/language/language-transformer.js
parent7681131782d958997663b1fb443a3e32e8eef550 (diff)
expand deinflection format (#745)
* abstract deinflections * undo redundant changes * remove cast * switch to js * MultiLanguageTransformer * comments * comments * fix test * suffixInflection * fix bench * substring instead of replace * without heuristic * suffixMap * add other language deinflections * wip * catch cycles * fix tests * uninflect to deinflect * use less regex * add suru masu stem deinflection
Diffstat (limited to 'ext/js/language/language-transformer.js')
-rw-r--r--ext/js/language/language-transformer.js36
1 files changed, 22 insertions, 14 deletions
diff --git a/ext/js/language/language-transformer.js b/ext/js/language/language-transformer.js
index 8a82e4d2..47f31b5f 100644
--- a/ext/js/language/language-transformer.js
+++ b/ext/js/language/language-transformer.js
@@ -15,7 +15,7 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
-import {escapeRegExp} from '../core/utilities.js';
+import {log} from '../core/log.js';
export class LanguageTransformer {
constructor() {
@@ -55,21 +55,22 @@ export class LanguageTransformer {
/** @type {import('language-transformer-internal').Rule[]} */
const rules2 = [];
for (let j = 0, jj = rules.length; j < jj; ++j) {
- const {suffixIn, suffixOut, conditionsIn, conditionsOut} = rules[j];
+ const {type, isInflected, deinflect, conditionsIn, conditionsOut} = rules[j];
const conditionFlagsIn = this._getConditionFlagsStrict(conditionFlagsMap, conditionsIn);
if (conditionFlagsIn === null) { throw new Error(`Invalid conditionsIn for transform[${i}].rules[${j}]`); }
const conditionFlagsOut = this._getConditionFlagsStrict(conditionFlagsMap, conditionsOut);
if (conditionFlagsOut === null) { throw new Error(`Invalid conditionsOut for transform[${i}].rules[${j}]`); }
rules2.push({
- suffixIn,
- suffixOut,
+ type,
+ isInflected,
+ deinflect,
conditionsIn: conditionFlagsIn,
conditionsOut: conditionFlagsOut
});
}
- const suffixes = rules.map((rule) => rule.suffixIn);
- const suffixHeuristic = new RegExp(`(${suffixes.map((suffix) => escapeRegExp(suffix)).join('|')})$`);
- transforms2.push({name, rules: rules2, suffixHeuristic});
+ const isInflectedTests = rules.map((rule) => rule.isInflected);
+ const heuristic = new RegExp(isInflectedTests.map((regExp) => regExp.source).join('|'));
+ transforms2.push({name, rules: rules2, heuristic});
}
this._nextFlagIndex = nextFlagIndex;
@@ -120,18 +121,25 @@ export class LanguageTransformer {
for (let i = 0; i < results.length; ++i) {
const {text, conditions, trace} = results[i];
for (const transform of this._transforms) {
- if (!transform.suffixHeuristic.test(text)) { continue; }
+ if (!transform.heuristic.test(text)) { continue; }
const {name, rules} = transform;
for (let j = 0, jj = rules.length; j < jj; ++j) {
const rule = rules[j];
if (!LanguageTransformer.conditionsMatch(conditions, rule.conditionsIn)) { continue; }
- const {suffixIn, suffixOut} = rule;
- if (!text.endsWith(suffixIn) || (text.length - suffixIn.length + suffixOut.length) <= 0) { continue; }
+ const {isInflected, deinflect} = rule;
+ if (!isInflected.test(text)) { continue; }
+
+ const isCycle = trace.some((frame) => frame.transform === name && frame.ruleIndex === j && frame.text === text);
+ if (isCycle) {
+ log.warn(new Error(`Cycle detected in transform[${name}] rule[${j}] for text: ${text}`));
+ continue;
+ }
+
results.push(LanguageTransformer.createTransformedText(
- text.substring(0, text.length - suffixIn.length) + suffixOut,
+ deinflect(text),
rule.conditionsOut,
- this._extendTrace(trace, {transform: name, ruleIndex: j})
+ this._extendTrace(trace, {transform: name, ruleIndex: j, text})
));
}
}
@@ -245,8 +253,8 @@ export class LanguageTransformer {
*/
_extendTrace(trace, newFrame) {
const newTrace = [newFrame];
- for (const {transform, ruleIndex} of trace) {
- newTrace.push({transform, ruleIndex});
+ for (const {transform, ruleIndex, text} of trace) {
+ newTrace.push({transform, ruleIndex, text});
}
return newTrace;
}