summaryrefslogtreecommitdiff
path: root/ext/bg
diff options
context:
space:
mode:
authorAlex Yatskov <FooSoft@users.noreply.github.com>2019-10-05 19:49:54 -0700
committerGitHub <noreply@github.com>2019-10-05 19:49:54 -0700
commit14a5e3ce209cba41000251c460e790bbafe6de03 (patch)
tree8d1576b86c6d644ae14ba5f556ad96220afb2e8b /ext/bg
parent4df8662ea9ae99d6e8a11c49dfc9b702eb360049 (diff)
parent50a47348a7a040d1bcaf0a12a38cca049dc207f7 (diff)
Merge pull request #238 from toasted-nutbread/deinflector-optimization
Deinflector optimization
Diffstat (limited to 'ext/bg')
-rw-r--r--ext/bg/js/deinflector.js73
-rw-r--r--ext/bg/js/translator.js17
-rw-r--r--ext/bg/lang/deinflect.json163
3 files changed, 141 insertions, 112 deletions
diff --git a/ext/bg/js/deinflector.js b/ext/bg/js/deinflector.js
index ad77895c..ce4b2961 100644
--- a/ext/bg/js/deinflector.js
+++ b/ext/bg/js/deinflector.js
@@ -19,51 +19,74 @@
class Deinflector {
constructor(reasons) {
- this.reasons = reasons;
+ this.reasons = Deinflector.normalizeReasons(reasons);
}
deinflect(source) {
const results = [{
source,
term: source,
- rules: [],
+ rules: 0,
definitions: [],
reasons: []
}];
for (let i = 0; i < results.length; ++i) {
- const entry = results[i];
-
- for (const reason in this.reasons) {
- for (const variant of this.reasons[reason]) {
- let accept = entry.rules.length === 0;
- if (!accept) {
- for (const rule of entry.rules) {
- if (variant.rulesIn.includes(rule)) {
- accept = true;
- break;
- }
- }
- }
-
- if (!accept || !entry.term.endsWith(variant.kanaIn)) {
- continue;
- }
-
- const term = entry.term.slice(0, -variant.kanaIn.length) + variant.kanaOut;
- if (term.length === 0) {
+ const {rules, term, reasons} = results[i];
+ for (const [reason, variants] of this.reasons) {
+ for (const [kanaIn, kanaOut, rulesIn, rulesOut] of variants) {
+ if (
+ (rules !== 0 && (rules & rulesIn) === 0) ||
+ !term.endsWith(kanaIn) ||
+ (term.length - kanaIn.length + kanaOut.length) <= 0
+ ) {
continue;
}
results.push({
source,
- term,
- rules: variant.rulesOut,
+ term: term.slice(0, -kanaIn.length) + kanaOut,
+ rules: rulesOut,
definitions: [],
- reasons: [reason, ...entry.reasons]
+ reasons: [reason, ...reasons]
});
}
}
}
return results;
}
+
+ static normalizeReasons(reasons) {
+ const normalizedReasons = [];
+ for (const reason in reasons) {
+ const variants = [];
+ for (const {kanaIn, kanaOut, rulesIn, rulesOut} of reasons[reason]) {
+ variants.push([
+ kanaIn,
+ kanaOut,
+ Deinflector.rulesToRuleFlags(rulesIn),
+ Deinflector.rulesToRuleFlags(rulesOut)
+ ]);
+ }
+ normalizedReasons.push([reason, variants]);
+ }
+ return normalizedReasons;
+ }
+
+ static rulesToRuleFlags(rules) {
+ const ruleTypes = Deinflector.ruleTypes;
+ let value = 0;
+ for (const rule of rules) {
+ value |= ruleTypes[rule];
+ }
+ return value;
+ }
}
+
+Deinflector.ruleTypes = {
+ 'v1': 0b0000001, // Verb ichidan
+ 'v5': 0b0000010, // Verb godan
+ 'vs': 0b0000100, // Verb suru
+ 'vk': 0b0001000, // Verb kuru
+ 'adj-i': 0b0010000, // Adjective i
+ 'iru': 0b0100000, // Intermediate -iru endings for progressive or perfect tense
+};
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js
index 65d746ea..601ee30c 100644
--- a/ext/bg/js/translator.js
+++ b/ext/bg/js/translator.js
@@ -238,8 +238,10 @@ class Translator {
const definitions = await this.database.findTermsBulk(uniqueDeinflectionTerms, titles);
for (const definition of definitions) {
+ const definitionRules = Deinflector.rulesToRuleFlags(definition.rules);
for (const deinflection of uniqueDeinflectionArrays[definition.index]) {
- if (Translator.definitionContainsAnyRule(definition, deinflection.rules)) {
+ const deinflectionRules = deinflection.rules;
+ if (deinflectionRules === 0 || (definitionRules & deinflectionRules) !== 0) {
deinflection.definitions.push(definition);
}
}
@@ -248,19 +250,6 @@ class Translator {
return deinflections.filter(e => e.definitions.length > 0);
}
- static definitionContainsAnyRule(definition, rules) {
- if (rules.length === 0) {
- return true;
- }
- const definitionRules = definition.rules;
- for (const rule of rules) {
- if (definitionRules.includes(rule)) {
- return true;
- }
- }
- return false;
- }
-
getDeinflections(text) {
const deinflections = [];
diff --git a/ext/bg/lang/deinflect.json b/ext/bg/lang/deinflect.json
index c7977c88..682093e1 100644
--- a/ext/bg/lang/deinflect.json
+++ b/ext/bg/lang/deinflect.json
@@ -1186,7 +1186,7 @@
"kanaIn": "て",
"kanaOut": "る",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v1",
@@ -1197,7 +1197,7 @@
"kanaIn": "いて",
"kanaOut": "く",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1207,7 +1207,7 @@
"kanaIn": "いで",
"kanaOut": "ぐ",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1217,7 +1217,7 @@
"kanaIn": "きて",
"kanaOut": "くる",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"vk"
@@ -1227,7 +1227,7 @@
"kanaIn": "くて",
"kanaOut": "い",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"adj-i"
@@ -1237,7 +1237,7 @@
"kanaIn": "して",
"kanaOut": "す",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1247,7 +1247,7 @@
"kanaIn": "して",
"kanaOut": "する",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"vs"
@@ -1257,7 +1257,7 @@
"kanaIn": "って",
"kanaOut": "う",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1267,7 +1267,7 @@
"kanaIn": "って",
"kanaOut": "つ",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1277,7 +1277,7 @@
"kanaIn": "って",
"kanaOut": "る",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1287,7 +1287,7 @@
"kanaIn": "んで",
"kanaOut": "ぬ",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1297,7 +1297,7 @@
"kanaIn": "んで",
"kanaOut": "ぶ",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1307,7 +1307,7 @@
"kanaIn": "んで",
"kanaOut": "む",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1317,7 +1317,7 @@
"kanaIn": "のたもうて",
"kanaOut": "のたまう",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1327,7 +1327,7 @@
"kanaIn": "いって",
"kanaOut": "いく",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1337,7 +1337,7 @@
"kanaIn": "おうて",
"kanaOut": "おう",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1347,7 +1347,7 @@
"kanaIn": "こうて",
"kanaOut": "こう",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1357,7 +1357,7 @@
"kanaIn": "そうて",
"kanaOut": "そう",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1367,7 +1367,7 @@
"kanaIn": "とうて",
"kanaOut": "とう",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1377,7 +1377,7 @@
"kanaIn": "行って",
"kanaOut": "行く",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1387,7 +1387,7 @@
"kanaIn": "逝って",
"kanaOut": "逝く",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1397,7 +1397,7 @@
"kanaIn": "往って",
"kanaOut": "往く",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1407,7 +1407,7 @@
"kanaIn": "請うて",
"kanaOut": "請う",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1417,7 +1417,7 @@
"kanaIn": "乞うて",
"kanaOut": "乞う",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1427,7 +1427,7 @@
"kanaIn": "恋うて",
"kanaOut": "恋う",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1437,7 +1437,7 @@
"kanaIn": "問うて",
"kanaOut": "問う",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1447,7 +1447,7 @@
"kanaIn": "負うて",
"kanaOut": "負う",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1457,7 +1457,7 @@
"kanaIn": "沿うて",
"kanaOut": "沿う",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1467,7 +1467,7 @@
"kanaIn": "添うて",
"kanaOut": "添う",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1477,7 +1477,7 @@
"kanaIn": "副うて",
"kanaOut": "副う",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
@@ -1487,21 +1487,11 @@
"kanaIn": "厭うて",
"kanaOut": "厭う",
"rulesIn": [
- "iru"
+ "iru"
],
"rulesOut": [
"v5"
]
- },
- {
- "kanaIn": "で",
- "kanaOut": "",
- "rulesIn": [
- "iru"
- ],
- "rulesOut": [
- "neg-de"
- ]
}
],
"-zu": [
@@ -2233,8 +2223,7 @@
"kanaIn": "ない",
"kanaOut": "る",
"rulesIn": [
- "adj-i",
- "neg-de"
+ "adj-i"
],
"rulesOut": [
"v1",
@@ -2245,8 +2234,7 @@
"kanaIn": "かない",
"kanaOut": "く",
"rulesIn": [
- "adj-i",
- "neg-de"
+ "adj-i"
],
"rulesOut": [
"v5"
@@ -2256,8 +2244,7 @@
"kanaIn": "がない",
"kanaOut": "ぐ",
"rulesIn": [
- "adj-i",
- "neg-de"
+ "adj-i"
],
"rulesOut": [
"v5"
@@ -2267,8 +2254,7 @@
"kanaIn": "くない",
"kanaOut": "い",
"rulesIn": [
- "adj-i",
- "neg-de"
+ "adj-i"
],
"rulesOut": [
"adj-i"
@@ -2278,8 +2264,7 @@
"kanaIn": "こない",
"kanaOut": "くる",
"rulesIn": [
- "adj-i",
- "neg-de"
+ "adj-i"
],
"rulesOut": [
"vk"
@@ -2289,8 +2274,7 @@
"kanaIn": "さない",
"kanaOut": "す",
"rulesIn": [
- "adj-i",
- "neg-de"
+ "adj-i"
],
"rulesOut": [
"v5"
@@ -2300,8 +2284,7 @@
"kanaIn": "しない",
"kanaOut": "する",
"rulesIn": [
- "adj-i",
- "neg-de"
+ "adj-i"
],
"rulesOut": [
"vs"
@@ -2311,8 +2294,7 @@
"kanaIn": "たない",
"kanaOut": "つ",
"rulesIn": [
- "adj-i",
- "neg-de"
+ "adj-i"
],
"rulesOut": [
"v5"
@@ -2322,8 +2304,7 @@
"kanaIn": "なない",
"kanaOut": "ぬ",
"rulesIn": [
- "adj-i",
- "neg-de"
+ "adj-i"
],
"rulesOut": [
"v5"
@@ -2333,8 +2314,7 @@
"kanaIn": "ばない",
"kanaOut": "ぶ",
"rulesIn": [
- "adj-i",
- "neg-de"
+ "adj-i"
],
"rulesOut": [
"v5"
@@ -2344,8 +2324,7 @@
"kanaIn": "まない",
"kanaOut": "む",
"rulesIn": [
- "adj-i",
- "neg-de"
+ "adj-i"
],
"rulesOut": [
"v5"
@@ -2355,8 +2334,7 @@
"kanaIn": "らない",
"kanaOut": "る",
"rulesIn": [
- "adj-i",
- "neg-de"
+ "adj-i"
],
"rulesOut": [
"v5"
@@ -2366,8 +2344,7 @@
"kanaIn": "わない",
"kanaOut": "う",
"rulesIn": [
- "adj-i",
- "neg-de"
+ "adj-i"
],
"rulesOut": [
"v5"
@@ -3681,8 +3658,8 @@
],
"progressive or perfect": [
{
- "kanaIn": "いる",
- "kanaOut": "",
+ "kanaIn": "ている",
+ "kanaOut": "て",
"rulesIn": [
"v1"
],
@@ -3691,8 +3668,8 @@
]
},
{
- "kanaIn": "る",
- "kanaOut": "",
+ "kanaIn": "ておる",
+ "kanaOut": "て",
"rulesIn": [
"v1"
],
@@ -3701,14 +3678,54 @@
]
},
{
- "kanaIn": "おる",
- "kanaOut": "",
+ "kanaIn": "てる",
+ "kanaOut": "て",
+ "rulesIn": [
+ "v1"
+ ],
+ "rulesOut": [
+ "iru"
+ ]
+ },
+ {
+ "kanaIn": "でいる",
+ "kanaOut": "で",
+ "rulesIn": [
+ "v1"
+ ],
+ "rulesOut": [
+ "iru"
+ ]
+ },
+ {
+ "kanaIn": "でおる",
+ "kanaOut": "で",
+ "rulesIn": [
+ "v1"
+ ],
+ "rulesOut": [
+ "iru"
+ ]
+ },
+ {
+ "kanaIn": "とる",
+ "kanaOut": "て",
"rulesIn": [
"v1"
],
"rulesOut": [
"iru"
]
+ },
+ {
+ "kanaIn": "ないでいる",
+ "kanaOut": "ない",
+ "rulesIn": [
+ "v1"
+ ],
+ "rulesOut": [
+ "adj-i"
+ ]
}
]
}