aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Yatskov <alex@foosoft.net>2016-04-16 20:11:27 -0700
committerAlex Yatskov <alex@foosoft.net>2016-04-16 20:11:27 -0700
commit9ceef4649f07211079017d2cfcbc3756dd9a509b (patch)
tree8bc506f337a19e165e1391229c12e32480accd24
parentccf545919054be4dc2aa9efdc7cd914c867775a8 (diff)
Fixes to deinflection logic
-rw-r--r--ext/bg/js/deinflector.js19
-rw-r--r--ext/bg/js/translator.js36
-rwxr-xr-xutil/compile.py41
3 files changed, 52 insertions, 44 deletions
diff --git a/ext/bg/js/deinflector.js b/ext/bg/js/deinflector.js
index 03f9d40a..3c24ae3a 100644
--- a/ext/bg/js/deinflector.js
+++ b/ext/bg/js/deinflector.js
@@ -32,7 +32,7 @@ class Deinflection {
}
for (const tag of this.tags) {
- if (this.searchTags(tag, tags)) {
+ if (tags.indexOf(tag) !== -1) {
return true;
}
}
@@ -43,7 +43,7 @@ class Deinflection {
deinflect(validator, rules) {
if (this.validate(validator)) {
- const child = new Deinflection(this.term);
+ const child = new Deinflection(this.term, this.tags);
this.children.push(child);
}
@@ -52,7 +52,7 @@ class Deinflection {
for (const v of variants) {
let allowed = this.tags.length === 0;
for (const tag of this.tags) {
- if (this.searchTags(tag, v.tagsIn)) {
+ if (v.tagsIn.indexOf(tag) !== -1) {
allowed = true;
break;
}
@@ -73,20 +73,9 @@ class Deinflection {
return this.children.length > 0;
}
- searchTags(tag, tags) {
- for (const t of tags) {
- const re = new RegExp(tag);
- if (re.test(t)) {
- return true;
- }
- }
-
- return false;
- }
-
gather() {
if (this.children.length === 0) {
- return [{root: this.term, rules: []}];
+ return [{root: this.term, tags: this.tags, rules: []}];
}
const paths = [];
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js
index 101083e8..46d63ed0 100644
--- a/ext/bg/js/translator.js
+++ b/ext/bg/js/translator.js
@@ -77,11 +77,9 @@ class Translator {
return tags;
});
- if (dfs === null) {
- this.processTerm(groups, term);
- } else {
+ if (dfs !== null) {
for (const df of dfs) {
- this.processTerm(groups, df.source, df.rules, df.root);
+ this.processTerm(groups, df.source, df.tags, df.rules, df.root);
}
}
}
@@ -141,20 +139,30 @@ class Translator {
return results;
}
- processTerm(groups, source, rules=[], root='') {
- for (const entry of this.dictionary.findTerm(root || source)) {
+ processTerm(groups, source, tags, rules=[], root='') {
+ for (const entry of this.dictionary.findTerm(root)) {
if (entry.id in groups) {
continue;
}
- groups[entry.id] = {
- expression: entry.expression,
- reading: entry.reading,
- glossary: entry.glossary,
- tags: entry.tags,
- source: source,
- rules: rules
- };
+ let matched = tags.length == 0;
+ for (const tag of tags) {
+ if (entry.tags.indexOf(tag) !== -1) {
+ matched = true;
+ break;
+ }
+ }
+
+ if (matched) {
+ groups[entry.id] = {
+ expression: entry.expression,
+ reading: entry.reading,
+ glossary: entry.glossary,
+ tags: entry.tags,
+ source: source,
+ rules: rules
+ };
+ }
}
}
diff --git a/util/compile.py b/util/compile.py
index 1ed299f7..9c5a6bdd 100755
--- a/util/compile.py
+++ b/util/compile.py
@@ -112,21 +112,21 @@ PARSED_TAGS = {
'v4h': 'Yodan verb with "hu/fu" ending (archaic)',
'v4r': 'Yodan verb with "ru" ending (archaic)',
'v5': 'Godan verb (not completely classified)',
- 'v5aru': 'Godan verb - -aru special class',
- 'v5b': 'Godan verb with "bu" ending',
- 'v5g': 'Godan verb with "gu" ending',
- 'v5k': 'Godan verb with "ku" ending',
- 'v5k-s': 'Godan verb - iku/yuku special class',
- 'v5m': 'Godan verb with "mu" ending',
- 'v5n': 'Godan verb with "nu" ending',
- 'v5r': 'Godan verb with "ru" ending',
- 'v5r-i': 'Godan verb with "ru" ending (irregular verb)',
- 'v5s': 'Godan verb with "su" ending',
- 'v5t': 'Godan verb with "tsu" ending',
- 'v5u': 'Godan verb with "u" ending',
- 'v5u-s': 'Godan verb with "u" ending (special class)',
- 'v5uru': 'Godan verb - uru old class verb (old form of Eru)',
- 'v5z': 'Godan verb with "zu" ending',
+ # 'v5aru': 'Godan verb - -aru special class',
+ # 'v5b': 'Godan verb with "bu" ending',
+ # 'v5g': 'Godan verb with "gu" ending',
+ # 'v5k': 'Godan verb with "ku" ending',
+ # 'v5k-s': 'Godan verb - iku/yuku special class',
+ # 'v5m': 'Godan verb with "mu" ending',
+ # 'v5n': 'Godan verb with "nu" ending',
+ # 'v5r': 'Godan verb with "ru" ending',
+ # 'v5r-i': 'Godan verb with "ru" ending (irregular verb)',
+ # 'v5s': 'Godan verb with "su" ending',
+ # 'v5t': 'Godan verb with "tsu" ending',
+ # 'v5u': 'Godan verb with "u" ending',
+ # 'v5u-s': 'Godan verb with "u" ending (special class)',
+ # 'v5uru': 'Godan verb - uru old class verb (old form of Eru)',
+ # 'v5z': 'Godan verb with "zu" ending',
'vi': 'intransitive verb',
'vk': 'kuru verb - special class',
'vn': 'irregular nu verb',
@@ -167,6 +167,16 @@ def parse_kanji_dic(path):
return results
+def fixup_godan_verbs(tags):
+ results = []
+ for tag in tags:
+ if tag.startswith('v5'):
+ tag = 'v5'
+ results.append(tag)
+
+ return set(results)
+
+
def parse_edict(path):
results = []
for line in load_definitions(path):
@@ -188,6 +198,7 @@ def parse_edict(path):
continue
tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1))))
+ tags_raw = fixup_godan_verbs(tags_raw)
tags_raw = tags_raw.intersection(set(PARSED_TAGS.keys()))
tags = tags.union(tags_raw)