diff options
-rwxr-xr-x | build_tmpl.sh | 2 | ||||
-rwxr-xr-x | build_tmpl_auto.sh | 2 | ||||
-rw-r--r-- | tmpl/footer.html (renamed from util/tmpl/footer.html) | 0 | ||||
-rw-r--r-- | tmpl/header.html (renamed from util/tmpl/header.html) | 0 | ||||
-rw-r--r-- | tmpl/kanji-link.html (renamed from util/tmpl/kanji-link.html) | 0 | ||||
-rw-r--r-- | tmpl/kanji-list.html (renamed from util/tmpl/kanji-list.html) | 0 | ||||
-rw-r--r-- | tmpl/kanji.html (renamed from util/tmpl/kanji.html) | 0 | ||||
-rw-r--r-- | tmpl/term-list.html (renamed from util/tmpl/term-list.html) | 0 | ||||
-rw-r--r-- | tmpl/term.html (renamed from util/tmpl/term.html) | 0 | ||||
-rwxr-xr-x | util/compile.py | 244 | ||||
-rw-r--r-- | util/data/edict | 3 | ||||
-rw-r--r-- | util/data/enamdict | 3 | ||||
-rw-r--r-- | util/data/kanjidic | 3 |
13 files changed, 2 insertions, 255 deletions
diff --git a/build_tmpl.sh b/build_tmpl.sh index 5c4a0c44..be8362a6 100755 --- a/build_tmpl.sh +++ b/build_tmpl.sh @@ -1,2 +1,2 @@ #!/bin/sh -handlebars util/tmpl/* -f ext/bg/js/templates.js +handlebars tmpl/* -f ext/bg/js/templates.js diff --git a/build_tmpl_auto.sh b/build_tmpl_auto.sh index c10c858c..98065cb7 100755 --- a/build_tmpl_auto.sh +++ b/build_tmpl_auto.sh @@ -1,5 +1,5 @@ #!/bin/bash -DIRECTORY_TO_OBSERVE="util/tmpl" +DIRECTORY_TO_OBSERVE="tmpl" BUILD_SCRIPT="build_tmpl.sh" function block_for_change { diff --git a/util/tmpl/footer.html b/tmpl/footer.html index 3840600f..3840600f 100644 --- a/util/tmpl/footer.html +++ b/tmpl/footer.html diff --git a/util/tmpl/header.html b/tmpl/header.html index 2256b08a..2256b08a 100644 --- a/util/tmpl/header.html +++ b/tmpl/header.html diff --git a/util/tmpl/kanji-link.html b/tmpl/kanji-link.html index f4f8dc69..f4f8dc69 100644 --- a/util/tmpl/kanji-link.html +++ b/tmpl/kanji-link.html diff --git a/util/tmpl/kanji-list.html b/tmpl/kanji-list.html index b5d0b627..b5d0b627 100644 --- a/util/tmpl/kanji-list.html +++ b/tmpl/kanji-list.html diff --git a/util/tmpl/kanji.html b/tmpl/kanji.html index 7a5affb7..7a5affb7 100644 --- a/util/tmpl/kanji.html +++ b/tmpl/kanji.html diff --git a/util/tmpl/term-list.html b/tmpl/term-list.html index 5581b679..5581b679 100644 --- a/util/tmpl/term-list.html +++ b/tmpl/term-list.html diff --git a/util/tmpl/term.html b/tmpl/term.html index 216ccf0a..216ccf0a 100644 --- a/util/tmpl/term.html +++ b/tmpl/term.html diff --git a/util/compile.py b/util/compile.py deleted file mode 100755 index 41c3f432..00000000 --- a/util/compile.py +++ /dev/null @@ -1,244 +0,0 @@ -#!/usr/bin/env python - -# Copyright (C) 2016 Alex Yatskov <alex@foosoft.net> -# Author: Alex Yatskov <alex@foosoft.net> -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - - -import codecs -import json -import optparse -import os.path -import re - - -PARSED_TAGS = { - 'Buddh', - 'MA', - 'X', - 'abbr', - 'adj', - 'adj-f', - 'adj-i', - 'adj-na', - 'adj-no', - 'adj-pn', - 'adj-t', - 'adv', - 'adv-n', - 'adv-to', - 'arch', - 'ateji', - 'aux', - 'aux-adj', - 'aux-v', - 'c', - 'chn', - 'col', - 'comp', - 'conj', - 'ctr', - 'derog', - 'eK', - 'ek', - 'exp', - 'f', - 'fam', - 'fem', - 'food', - 'g', - 'geom', - 'gikun', - 'gram', - 'h', - 'hon', - 'hum', - 'iK', - 'id', - 'ik', - 'int', - 'io', - 'iv', - 'ling', - 'm', - 'm-sl', - 'male', - 'male-sl', - 'math', - 'mil', - 'n', - 'n-adv', - 'n-pref', - 'n-suf', - 'n-t', - 'num', - 'oK', - 'obs', - 'obsc', - 'ok', - 'on-mim', - 'P', - 'p', - 'physics', - 'pn', - 'poet', - 'pol', - 'pr', - 'pref', - 'prt', - 'rare', - 's', - 'sens', - 'sl', - 'st', - 'suf', - 'u', - 'uK', - 'uk', - 'v1', - 'v2a-s', - 'v4h', - 'v4r', - 'v5', - 'v5aru', - 'v5b', - 'v5g', - 'v5k', - 'v5k-s', - 'v5m', - 'v5n', - 'v5r', - 'v5r-i', - 'v5s', - 'v5t', - 'v5u', - 'v5u-s', - 'v5uru', - 'v5z', - 'vi', - 'vk', - 'vn', - 'vs', - 'vs-c', - 'vs-i', - 'vs-s', - 'vt', - 'vulg', - 'vz' -} - - -def is_hiragana(c): - return 0x3040 <= ord(c) < 0x30a0 - - -def is_katakana(c): - return 0x30a0 <= ord(c) < 0x3100 - - -def load_definitions(path): - print('Parsing "{0}"...'.format(path)) - with codecs.open(path, encoding='euc-jp') as fp: - return filter(lambda x: x and x[0] != '#', fp.read().splitlines()) - - -def parse_kanji_dic(path): - results = {} - for line in load_definitions(path): - segments = line.split() - character = segments[0] - kunyomi = ' '.join(filter(lambda x: list(filter(is_hiragana, x)), segments[1:])) - onyomi = ' '.join(filter(lambda x: list(filter(is_katakana, x)), segments[1:])) - glossary = re.findall('\{([^\}]+)\}', line) - results[character] = (kunyomi or None, onyomi or None, glossary) - - return results - - -def parse_edict(path): - results = [] - for line in load_definitions(path): - segments = line.split('/') - - exp_parts = segments[0].split(' ') - expression = exp_parts[0] - reading_match = re.search('\[([^\]]+)\]', exp_parts[1]) - reading = None if reading_match is None else reading_match.group(1) - - defs = [] - tags = set() - - for index, dfn in enumerate(filter(None, segments[1:])): - dfn_match = re.search(r'^((?:\((?:[\w\-\,\:]*)*\)\s*)*)(.*)$', dfn) - - tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1)))) - tags_raw = tags_raw.intersection(PARSED_TAGS) - tags = tags.union(tags_raw) - - gloss = dfn_match.group(2).strip() - if len(gloss) == 0: - continue - - if index == 0 or len(dfn_match.group(1)) > 0: - defs.append([gloss]) - else: - defs[-1].append(gloss) - - result = [expression, reading, ' '.join(tags)] - result += map(lambda x: '; '.join(x), defs) - - results.append(result) - - indices = {} - for i, d in enumerate(results): - for key in d[:2]: - if key is not None: - values = indices.get(key, []) - values.append(i) - indices[key] = values - - return {'defs': results, 'indices': indices} - - -def build_dict(output_dir, input_file, parser): - if input_file is not None: - base = os.path.splitext(os.path.basename(input_file))[0] - with open(os.path.join(output_dir, base) + '.json', 'w') as fp: - # json.dump(parser(input_file), fp, sort_keys=True, indent=4, separators=(',', ': ')) - json.dump(parser(input_file), fp, separators=(',', ':')) - - -def build(dict_dir, kanjidic, edict, enamdict): - build_dict(dict_dir, kanjidic, parse_kanji_dic) - build_dict(dict_dir, edict, parse_edict) - build_dict(dict_dir, enamdict, parse_edict) - - -def main(): - parser = optparse.OptionParser() - parser.add_option('--kanjidic', dest='kanjidic') - parser.add_option('--edict', dest='edict') - parser.add_option('--enamdict', dest='enamdict') - - options, args = parser.parse_args() - - if len(args) == 0: - parser.print_help() - else: - build(args[0], options.kanjidic, options.edict, options.enamdict) - - -if __name__ == '__main__': - main() diff --git a/util/data/edict b/util/data/edict deleted file mode 100644 index 35fc6bdd..00000000 --- a/util/data/edict +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:51b9e6f5761f3a9c2d1f6c65df131ad3fa60738ffda06e1e0cfb07fdfe6acd89 -size 15655597 diff --git a/util/data/enamdict b/util/data/enamdict deleted file mode 100644 index 17004236..00000000 --- a/util/data/enamdict +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9fd670f8c8a020aeb2b870d186d223702e91e0538cec04a784b1433363aff9f9 -size 26426507 diff --git a/util/data/kanjidic b/util/data/kanjidic deleted file mode 100644 index dbfb510f..00000000 --- a/util/data/kanjidic +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:494d5a6d750444907995eb3be7bbe7d817a20d3431ae0d8c944715690610b69c -size 1213904 |