summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Yatskov <alex@foosoft.net>2016-03-31 20:03:39 -0700
committerAlex Yatskov <alex@foosoft.net>2016-03-31 20:03:39 -0700
commit7eadff3457690074c5c0140a6e9ffd6164021176 (patch)
treead8ba8c31cba11f54ca8cab186d1d36090e070c0
parentb97e75ba32781341c221f549780f3444d0916714 (diff)
Moving large files to CSV format, deleting unused kradfile
-rwxr-xr-xbuild_dict.sh5
-rw-r--r--ext/bg/dictionary.js77
-rw-r--r--ext/client.js4
-rwxr-xr-xutil/compile.py51
4 files changed, 52 insertions, 85 deletions
diff --git a/build_dict.sh b/build_dict.sh
index a13b4ed7..42eed600 100755
--- a/build_dict.sh
+++ b/build_dict.sh
@@ -3,7 +3,6 @@
KANJIDIC=util/data/kanjidic
EDICT=util/data/edict
ENAMDICT=util/data/enamdict
-KRADFILE=util/data/kradfile
-DICT_DIR=ext/jp/data
+DICT_DIR=ext/bg/data
-util/compile.py --kanjidic $KANJIDIC --kradfile $KRADFILE --edict $EDICT --enamdict $ENAMDICT $DICT_DIR
+util/compile.py --kanjidic $KANJIDIC --edict $EDICT --enamdict $ENAMDICT $DICT_DIR
diff --git a/ext/bg/dictionary.js b/ext/bg/dictionary.js
index eff54890..30c34687 100644
--- a/ext/bg/dictionary.js
+++ b/ext/bg/dictionary.js
@@ -19,43 +19,33 @@
class Dictionary {
constructor() {
- this.termDicts = [];
- this.kanjiDicts = [];
- }
-
- addTermDict(termDict) {
- this.termDicts.push(termDict);
- }
+ this.terms = [];
+ this.termIndices = {};
- addKanjiDict(kanjiDict) {
- this.kanjiDicts.push(kanjiDict);
+ this.kanji = [];
+ this.kanjiIndices = {};
}
-
- findTerm(term) {
- let results = [];
- for (let dict of this.termDicts) {
- results = results.concat(this.findTermInDict(term, dict));
+ addTermDict(terms) {
+ let index = this.terms.length;
+ for (const [e, r, g, t] in terms) {
+ this.storeIndex(this.termIndices, e, index);
+ this.storeIndex(this.termIndices, r, index++);
+ this.terms.push([e, r, g, t]);
}
-
- return results;
}
- findKanji(kanji) {
- const results = [];
- for (let dict of this.kanjiDicts) {
- const result = this.findKanjiInDict(kanji, dict);
- if (result !== null) {
- results.push(result);
- }
+ addKanjiDict(kanji) {
+ let index = this.kanji.length;
+ for (const [c, k, o, g] in kanji) {
+ this.storeIndex(this.kanjiIndices, c, index++);
+ this.kanji.push([c, k, o, g]);
}
-
- return results;
}
- findTermInDict(term, dict) {
- return (dict.indices[term] || []).map(index => {
- const [e, r, g, t] = dict.defs[index];
+ findTerm(term) {
+ return (this.termIndices[term] || []).map(index => {
+ const [e, r, g, t] = this.terms[index];
return {
id: index,
expression: e,
@@ -66,19 +56,24 @@ class Dictionary {
});
}
- findKanjiInDict(kanji, dict) {
- const def = dict.defs[kanji];
- if (def === null) {
- return null;
- }
+ findKanji(kanji) {
+ return (this.kanjiIndices[kanji] || []).map(index => {
+ const [c, k, o, g] = def;
+ return {
+ id: kanji.charCodeAt(0),
+ character: c,
+ kunyomi: k,
+ onyomi: o,
+ glossary: g
+ };
+ });
+ }
- const [c, k, o, g] = def;
- return {
- id: kanji.charCodeAt(0),
- character: c,
- kunyomi: k,
- onyomi: o,
- glossary: g
- };
+ storeIndex(indices, term, index) {
+ if (term.length > 0) {
+ const indices = this.termIndices[term] || [];
+ indices.push(term);
+ this.termIndices[term] = indices;
+ }
}
}
diff --git a/ext/client.js b/ext/client.js
index 2d9a470f..1c8c0a9f 100644
--- a/ext/client.js
+++ b/ext/client.js
@@ -27,7 +27,9 @@ class Client {
this.popup.classList.add('yomichan-popup');
this.popup.addEventListener('mousedown', (e) => e.stopPropagation());
this.popup.addEventListener('scroll', (e) => e.stopPropagation());
- document.body.appendChild(this.popup);
+
+ const base = document.body.appendChild('div');
+ base.createShadowRoot().appendChild(this.popup);
chrome.runtime.onMessage.addListener(this.onMessage.bind(this));
window.addEventListener('mousedown', this.onMouseDown.bind(this));
diff --git a/util/compile.py b/util/compile.py
index 485537dc..790ebfc7 100755
--- a/util/compile.py
+++ b/util/compile.py
@@ -18,7 +18,6 @@
import codecs
-import json
import optparse
import os.path
import re
@@ -111,7 +110,7 @@ def load_definitions(path):
def parse_kanji_dic(path):
- results = {}
+ results = []
for line in load_definitions(path):
segments = line.split()
@@ -119,32 +118,20 @@ def parse_kanji_dic(path):
kunyomi = ', '.join(filter(lambda x: filter(is_hiragana, x), segments[1:]))
onyomi = ', '.join(filter(lambda x: filter(is_katakana, x), segments[1:]))
glossary = '; '.join(re.findall('\{([^\}]+)\}', line))
- results[character] = (kunyomi, onyomi, glossary)
-
- return results
-
-
-def parse_krad_file(path):
- results = {}
-
- for line in load_definitions(path):
- segments = line.split(' ')
- character = segments[0]
- radicals = ' '.join(segments[2:])
- results[character] = radicals;
+ results.append((character, kunyomi, onyomi, glossary))
return results
def parse_edict(path):
- defs = []
+ results = []
for line in load_definitions(path):
segments = line.split('/')
expression = segments[0].split(' ')
term = expression[0]
match = re.search('\[([^\]]+)\]', expression[1])
- reading = None if match is None else match.group(1)
+ reading = '' if match is None else match.group(1)
glossary = '; '.join(filter(lambda x: len(x) > 0, segments[1:]))
glossary = re.sub('\(\d+\)\s*', '', glossary)
@@ -156,30 +143,21 @@ def parse_edict(path):
tags = set(tags).intersection(PARSED_TAGS)
tags = ' '.join(tags)
- defs.append((term, reading, glossary, tags))
-
- indices = {}
- for i, d in enumerate(defs):
- for key in d[:2]:
- if key is not None:
- values = indices.get(key, [])
- values.append(i)
- indices[key] = values
+ results.append((term, reading, glossary, tags))
- return {'defs': defs, 'indices': indices}
+ return results[1:]
def build_dict(output_dir, input_file, parser):
if input_file is not None:
base = os.path.splitext(os.path.basename(input_file))[0]
- with open(os.path.join(output_dir, base) + '.json', 'w') as fp:
- # json.dump(parser(input_file), fp, sort_keys=True, indent=4, separators=(',', ': '))
- json.dump(parser(input_file), fp)
+ with codecs.open(os.path.join(output_dir, base) + '.csv', 'wb', encoding='utf-8') as fp:
+ for d in parser(input_file):
+ fp.write('\t'.join(d) + '\n')
-def build(dict_dir, kanjidic, kradfile, edict, enamdict):
+def build(dict_dir, kanjidic, edict, enamdict):
build_dict(dict_dir, kanjidic, parse_kanji_dic)
- build_dict(dict_dir, kradfile, parse_krad_file)
build_dict(dict_dir, edict, parse_edict)
build_dict(dict_dir, enamdict, parse_edict)
@@ -187,7 +165,6 @@ def build(dict_dir, kanjidic, kradfile, edict, enamdict):
def main():
parser = optparse.OptionParser()
parser.add_option('--kanjidic', dest='kanjidic')
- parser.add_option('--kradfile', dest='kradfile')
parser.add_option('--edict', dest='edict')
parser.add_option('--enamdict', dest='enamdict')
@@ -196,13 +173,7 @@ def main():
if len(args) == 0:
parser.print_help()
else:
- build(
- args[0],
- options.kanjidic,
- options.kradfile,
- options.edict,
- options.enamdict
- )
+ build(args[0], options.kanjidic, options.edict, options.enamdict)
if __name__ == '__main__':