Fixes to deinflection logic

author: Alex Yatskov <alex@foosoft.net> 2016-04-16 20:11:27 -0700
committer: Alex Yatskov <alex@foosoft.net> 2016-04-16 20:11:27 -0700
commit: 9ceef4649f07211079017d2cfcbc3756dd9a509b (patch)
tree: 8bc506f337a19e165e1391229c12e32480accd24
parent: ccf545919054be4dc2aa9efdc7cd914c867775a8 (diff)
3 files changed, 52 insertions, 44 deletions
diff --git a/ext/bg/js/deinflector.js b/ext/bg/js/deinflector.js
index 03f9d40a..3c24ae3a 100644
--- a/ext/bg/js/deinflector.js
+++ b/ext/bg/js/deinflector.js
@@ -32,7 +32,7 @@ class Deinflection {
             }
 
             for (const tag of this.tags) {
-                if (this.searchTags(tag, tags)) {
+                if (tags.indexOf(tag) !== -1) {
                     return true;
                 }
             }
@@ -43,7 +43,7 @@ class Deinflection {
 
     deinflect(validator, rules) {
         if (this.validate(validator)) {
-            const child = new Deinflection(this.term);
+            const child = new Deinflection(this.term, this.tags);
             this.children.push(child);
         }
 
@@ -52,7 +52,7 @@ class Deinflection {
             for (const v of variants) {
                 let allowed = this.tags.length === 0;
                 for (const tag of this.tags) {
-                    if (this.searchTags(tag, v.tagsIn)) {
+                    if (v.tagsIn.indexOf(tag) !== -1) {
                         allowed = true;
                         break;
                     }
@@ -73,20 +73,9 @@ class Deinflection {
         return this.children.length > 0;
     }
 
-    searchTags(tag, tags) {
-        for (const t of tags) {
-            const re = new RegExp(tag);
-            if (re.test(t)) {
-                return true;
-            }
-        }
-
-        return false;
-    }
-
     gather() {
         if (this.children.length === 0) {
-            return [{root: this.term, rules: []}];
+            return [{root: this.term, tags: this.tags, rules: []}];
         }
 
         const paths = [];
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js
index 101083e8..46d63ed0 100644
--- a/ext/bg/js/translator.js
+++ b/ext/bg/js/translator.js
@@ -77,11 +77,9 @@ class Translator {
                 return tags;
             });
 
-            if (dfs === null) {
-                this.processTerm(groups, term);
-            } else {
+            if (dfs !== null) {
                 for (const df of dfs) {
-                    this.processTerm(groups, df.source, df.rules, df.root);
+                    this.processTerm(groups, df.source, df.tags, df.rules, df.root);
                 }
             }
         }
@@ -141,20 +139,30 @@ class Translator {
         return results;
     }
 
-    processTerm(groups, source, rules=[], root='') {
-        for (const entry of this.dictionary.findTerm(root || source)) {
+    processTerm(groups, source, tags, rules=[], root='') {
+        for (const entry of this.dictionary.findTerm(root)) {
             if (entry.id in groups) {
                 continue;
             }
 
-            groups[entry.id] = {
-                expression: entry.expression,
-                reading:    entry.reading,
-                glossary:   entry.glossary,
-                tags:       entry.tags,
-                source:     source,
-                rules:      rules
-            };
+            let matched = tags.length == 0;
+            for (const tag of tags) {
+                if (entry.tags.indexOf(tag) !== -1) {
+                    matched = true;
+                    break;
+                }
+            }
+
+            if (matched) {
+                groups[entry.id] = {
+                    expression: entry.expression,
+                    reading:    entry.reading,
+                    glossary:   entry.glossary,
+                    tags:       entry.tags,
+                    source:     source,
+                    rules:      rules
+                };
+            }
         }
     }
 
diff --git a/util/compile.py b/util/compile.py
index 1ed299f7..9c5a6bdd 100755
--- a/util/compile.py
+++ b/util/compile.py
@@ -112,21 +112,21 @@ PARSED_TAGS = {
     'v4h':     'Yodan verb with "hu/fu" ending (archaic)',
     'v4r':     'Yodan verb with "ru" ending (archaic)',
     'v5':      'Godan verb (not completely classified)',
-    'v5aru':   'Godan verb - -aru special class',
-    'v5b':     'Godan verb with "bu" ending',
-    'v5g':     'Godan verb with "gu" ending',
-    'v5k':     'Godan verb with "ku" ending',
-    'v5k-s':   'Godan verb - iku/yuku special class',
-    'v5m':     'Godan verb with "mu" ending',
-    'v5n':     'Godan verb with "nu" ending',
-    'v5r':     'Godan verb with "ru" ending',
-    'v5r-i':   'Godan verb with "ru" ending (irregular verb)',
-    'v5s':     'Godan verb with "su" ending',
-    'v5t':     'Godan verb with "tsu" ending',
-    'v5u':     'Godan verb with "u" ending',
-    'v5u-s':   'Godan verb with "u" ending (special class)',
-    'v5uru':   'Godan verb - uru old class verb (old form of Eru)',
-    'v5z':     'Godan verb with "zu" ending',
+    # 'v5aru':   'Godan verb - -aru special class',
+    # 'v5b':     'Godan verb with "bu" ending',
+    # 'v5g':     'Godan verb with "gu" ending',
+    # 'v5k':     'Godan verb with "ku" ending',
+    # 'v5k-s':   'Godan verb - iku/yuku special class',
+    # 'v5m':     'Godan verb with "mu" ending',
+    # 'v5n':     'Godan verb with "nu" ending',
+    # 'v5r':     'Godan verb with "ru" ending',
+    # 'v5r-i':   'Godan verb with "ru" ending (irregular verb)',
+    # 'v5s':     'Godan verb with "su" ending',
+    # 'v5t':     'Godan verb with "tsu" ending',
+    # 'v5u':     'Godan verb with "u" ending',
+    # 'v5u-s':   'Godan verb with "u" ending (special class)',
+    # 'v5uru':   'Godan verb - uru old class verb (old form of Eru)',
+    # 'v5z':     'Godan verb with "zu" ending',
     'vi':      'intransitive verb',
     'vk':      'kuru verb - special class',
     'vn':      'irregular nu verb',
@@ -167,6 +167,16 @@ def parse_kanji_dic(path):
     return results
 
 
+def fixup_godan_verbs(tags):
+    results = []
+    for tag in tags:
+        if tag.startswith('v5'):
+            tag = 'v5'
+        results.append(tag)
+
+    return set(results)
+
+
 def parse_edict(path):
     results = []
     for line in load_definitions(path):
@@ -188,6 +198,7 @@ def parse_edict(path):
                 continue
 
             tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1))))
+            tags_raw = fixup_godan_verbs(tags_raw)
             tags_raw = tags_raw.intersection(set(PARSED_TAGS.keys()))
             tags = tags.union(tags_raw)
author	Alex Yatskov <alex@foosoft.net>	2016-04-16 20:11:27 -0700
committer	Alex Yatskov <alex@foosoft.net>	2016-04-16 20:11:27 -0700
commit	9ceef4649f07211079017d2cfcbc3756dd9a509b (patch)
tree	8bc506f337a19e165e1391229c12e32480accd24
parent	ccf545919054be4dc2aa9efdc7cd914c867775a8 (diff)