diff options
author | StefanVukovic99 <stefanvukovic44@gmail.com> | 2024-03-22 15:27:35 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-22 14:27:35 +0000 |
commit | d2e9841f96ebff61d4a5c26a322484f6268115f1 (patch) | |
tree | 3c1351fa3b25994eab8456ea8457ee153bb98aa9 /ext/js/language/language-transformer.js | |
parent | 7681131782d958997663b1fb443a3e32e8eef550 (diff) |
expand deinflection format (#745)
* abstract deinflections
* undo redundant changes
* remove cast
* switch to js
* MultiLanguageTransformer
* comments
* comments
* fix test
* suffixInflection
* fix bench
* substring instead of replace
* without heuristic
* suffixMap
* add other language deinflections
* wip
* catch cycles
* fix tests
* uninflect to deinflect
* use less regex
* add suru masu stem deinflection
Diffstat (limited to 'ext/js/language/language-transformer.js')
-rw-r--r-- | ext/js/language/language-transformer.js | 36 |
1 files changed, 22 insertions, 14 deletions
diff --git a/ext/js/language/language-transformer.js b/ext/js/language/language-transformer.js index 8a82e4d2..47f31b5f 100644 --- a/ext/js/language/language-transformer.js +++ b/ext/js/language/language-transformer.js @@ -15,7 +15,7 @@ * along with this program. If not, see <https://www.gnu.org/licenses/>. */ -import {escapeRegExp} from '../core/utilities.js'; +import {log} from '../core/log.js'; export class LanguageTransformer { constructor() { @@ -55,21 +55,22 @@ export class LanguageTransformer { /** @type {import('language-transformer-internal').Rule[]} */ const rules2 = []; for (let j = 0, jj = rules.length; j < jj; ++j) { - const {suffixIn, suffixOut, conditionsIn, conditionsOut} = rules[j]; + const {type, isInflected, deinflect, conditionsIn, conditionsOut} = rules[j]; const conditionFlagsIn = this._getConditionFlagsStrict(conditionFlagsMap, conditionsIn); if (conditionFlagsIn === null) { throw new Error(`Invalid conditionsIn for transform[${i}].rules[${j}]`); } const conditionFlagsOut = this._getConditionFlagsStrict(conditionFlagsMap, conditionsOut); if (conditionFlagsOut === null) { throw new Error(`Invalid conditionsOut for transform[${i}].rules[${j}]`); } rules2.push({ - suffixIn, - suffixOut, + type, + isInflected, + deinflect, conditionsIn: conditionFlagsIn, conditionsOut: conditionFlagsOut }); } - const suffixes = rules.map((rule) => rule.suffixIn); - const suffixHeuristic = new RegExp(`(${suffixes.map((suffix) => escapeRegExp(suffix)).join('|')})$`); - transforms2.push({name, rules: rules2, suffixHeuristic}); + const isInflectedTests = rules.map((rule) => rule.isInflected); + const heuristic = new RegExp(isInflectedTests.map((regExp) => regExp.source).join('|')); + transforms2.push({name, rules: rules2, heuristic}); } this._nextFlagIndex = nextFlagIndex; @@ -120,18 +121,25 @@ export class LanguageTransformer { for (let i = 0; i < results.length; ++i) { const {text, conditions, trace} = results[i]; for (const transform of this._transforms) { - if (!transform.suffixHeuristic.test(text)) { continue; } + if (!transform.heuristic.test(text)) { continue; } const {name, rules} = transform; for (let j = 0, jj = rules.length; j < jj; ++j) { const rule = rules[j]; if (!LanguageTransformer.conditionsMatch(conditions, rule.conditionsIn)) { continue; } - const {suffixIn, suffixOut} = rule; - if (!text.endsWith(suffixIn) || (text.length - suffixIn.length + suffixOut.length) <= 0) { continue; } + const {isInflected, deinflect} = rule; + if (!isInflected.test(text)) { continue; } + + const isCycle = trace.some((frame) => frame.transform === name && frame.ruleIndex === j && frame.text === text); + if (isCycle) { + log.warn(new Error(`Cycle detected in transform[${name}] rule[${j}] for text: ${text}`)); + continue; + } + results.push(LanguageTransformer.createTransformedText( - text.substring(0, text.length - suffixIn.length) + suffixOut, + deinflect(text), rule.conditionsOut, - this._extendTrace(trace, {transform: name, ruleIndex: j}) + this._extendTrace(trace, {transform: name, ruleIndex: j, text}) )); } } @@ -245,8 +253,8 @@ export class LanguageTransformer { */ _extendTrace(trace, newFrame) { const newTrace = [newFrame]; - for (const {transform, ruleIndex} of trace) { - newTrace.push({transform, ruleIndex}); + for (const {transform, ruleIndex, text} of trace) { + newTrace.push({transform, ruleIndex, text}); } return newTrace; } |