Updating dictionaries

author: Alex Yatskov <alex@foosoft.net> 2016-04-12 20:58:41 -0700
committer: Alex Yatskov <alex@foosoft.net> 2016-04-12 20:58:41 -0700
commit: bf28dea2838619c6446367f240e240cdddef3586 (patch)
tree: b30b9c6b2a8c70092df395492c44300c4d8c6a63
parent: 7d0991f9c1b1379c669c0da9f1f678285dd0c968 (diff)
3 files changed, 19 insertions, 39 deletions
diff --git a/ext/bg/js/dictionary.js b/ext/bg/js/dictionary.js
index a68c2daf..feb8c92b 100644
--- a/ext/bg/js/dictionary.js
+++ b/ext/bg/js/dictionary.js
@@ -19,31 +19,20 @@
 
 class Dictionary {
     constructor() {
-        this.terms       = [];
-        this.termIndices = {};
-
-        this.kanji        = [];
-        this.kanjiIndices = {};
+        this.termDicts  = {};
+        this.kanjiDicts = {};
     }
 
-    addTermData(terms) {
-        let index = this.terms.length;
-        for (const [e, r, g, t] of terms) {
-            this.storeIndex(this.termIndices, e, index);
-            this.storeIndex(this.termIndices, r, index++);
-            this.terms.push([e, r, g, t]);
-        }
+    addTermDict(name, dict) {
+        this.termDicts[name] = dict;
     }
 
-    addKanjiData(kanji) {
-        let index = this.kanji.length;
-        for (const [c, k, o, g] of kanji) {
-            this.storeIndex(this.kanjiIndices, c, index++);
-            this.kanji.push([c, k, o, g]);
-        }
+    addKanjiDict(name, dict) {
+        this.kanjiDicts[name] = dict;
     }
 
     findTerm(term) {
+        const results = [];
         return (this.termIndices[term] || []).map(index => {
             const [e, r, g, t] = this.terms[index];
             return {id: index, expression: e, reading: r, glossary: g, tags: t.split(' ')};
@@ -56,12 +45,4 @@ class Dictionary {
             return {id: index, character: c, kunyomi: k, onyomi: o, glossary: g};
         });
     }
-
-    storeIndex(indices, term, index) {
-        if (term.length > 0) {
-            const indices = this.termIndices[term] || [];
-            indices.push(index);
-            this.termIndices[term] = indices;
-        }
-    }
 }
diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js
index e8224320..5414a553 100644
--- a/ext/bg/js/translator.js
+++ b/ext/bg/js/translator.js
@@ -46,11 +46,11 @@ class Translator {
                         this.deinflector.setRules(JSON.parse(response));
                         break;
                     case 'kanjidic':
-                        this.dictionary.addKanjiData(Translator.parseCsv(response));
+                        this.dictionary.addKanjiDict('kanjidic', JSON.parse(response));
                         break;
                     case 'edict':
                     case 'enamdict':
-                        this.dictionary.addTermData(Translator.parseCsv(response));
+                        this.dictionary.addTermDict(key, JSON.parse(response));
                         break;
                 }
 
@@ -164,15 +164,4 @@ class Translator {
         xhr.open('GET', chrome.extension.getURL(url), true);
         xhr.send();
     }
-
-    static parseCsv(data) {
-        const result = [];
-        for (const row of data.split('\n')) {
-            if (row.length > 0) {
-                result.push(row.split('\t'));
-            }
-        }
-
-        return result;
-    }
 }
diff --git a/util/compile.py b/util/compile.py
index b41fdf6d..8d8470dd 100755
--- a/util/compile.py
+++ b/util/compile.py
@@ -44,6 +44,7 @@ PARSED_TAGS = {
     'aux':     'auxiliary',
     'aux-adj': 'auxiliary adjective',
     'aux-v':   'auxiliary verb',
+    'c':       'company name',
     'chn':     'children\'s language',
     'col':     'colloquialism',
     'comp':    'computer terminology',
@@ -53,12 +54,15 @@ PARSED_TAGS = {
     'eK':      'exclusively kanji',
     'ek':      'exclusively kana',
     'exp':     'Expressions (phrases, clauses, etc.)',
+    'f':       'female given name',
     'fam':     'familiar language',
     'fem':     'female term or language',
     'food':    'food term',
+    'g':       'given name, as-yet not classified by sex',
     'geom':    'geometry term',
     'gikun':   'gikun (meaning) reading',
     'gram':    'grammatical term',
+    'h':       'full (usually family plus given) name of a particular person',
     'hon':     'honorific or respectful (sonkeigo) language',
     'hum':     'humble (kenjougo) language',
     'iK':      'word containing irregular kanji usage',
@@ -68,6 +72,7 @@ PARSED_TAGS = {
     'io':      'irregular okurigana usage',
     'iv':      'irregular verb',
     'ling':    'linguistics terminology',
+    'm':       'male given name',
     'm-sl':    'manga slang',
     'male':    'male term or language',
     'male-sl': 'male slang',
@@ -84,16 +89,21 @@ PARSED_TAGS = {
     'obsc':    'obscure term',
     'ok':      'out-dated or obsolete kana usage',
     'on-mim':  'onomatopoeic or mimetic word',
+    'p':       'place-name',
     'physics': 'physics terminology',
     'pn':      'pronoun',
     'poet':    'poetical term',
     'pol':     'polite (teineigo) language',
+    'pr':      'product name',
     'pref':    'prefix',
     'prt':     'particle',
     'rare':    'rare (now replaced by "obsc")',
+    's':       'surname',
     'sens':    'sensitive word',
     'sl':      'slang',
+    'st':      'stations',
     'suf':     'suffix',
+    'u':       'person name, either given or surname, as-yet unclassified',
     'uK':      'word usually written using kanji alone',
     'uk':      'word usually written using kana alone',
     'v1':      'Ichidan verb',
author	Alex Yatskov <alex@foosoft.net>	2016-04-12 20:58:41 -0700
committer	Alex Yatskov <alex@foosoft.net>	2016-04-12 20:58:41 -0700
commit	bf28dea2838619c6446367f240e240cdddef3586 (patch)
tree	b30b9c6b2a8c70092df395492c44300c4d8c6a63
parent	7d0991f9c1b1379c669c0da9f1f678285dd0c968 (diff)