diff options
author | Alex Yatskov <alex@foosoft.net> | 2016-05-08 12:01:43 -0700 |
---|---|---|
committer | Alex Yatskov <alex@foosoft.net> | 2016-05-08 12:01:43 -0700 |
commit | 14ecb8f32831ef4747adef1de1af658dc7d4b298 (patch) | |
tree | b3b765ef282b15dd31336ec7ab326a34510ea4c0 /util | |
parent | 449ffc260a94cda0aca8edf33cf105378840effb (diff) |
File reorg
Diffstat (limited to 'util')
-rwxr-xr-x | util/compile.py | 244 | ||||
-rw-r--r-- | util/data/edict | 3 | ||||
-rw-r--r-- | util/data/enamdict | 3 | ||||
-rw-r--r-- | util/data/kanjidic | 3 | ||||
-rw-r--r-- | util/tmpl/footer.html | 3 | ||||
-rw-r--r-- | util/tmpl/header.html | 14 | ||||
-rw-r--r-- | util/tmpl/kanji-link.html | 1 | ||||
-rw-r--r-- | util/tmpl/kanji-list.html | 5 | ||||
-rw-r--r-- | util/tmpl/kanji.html | 43 | ||||
-rw-r--r-- | util/tmpl/term-list.html | 5 | ||||
-rw-r--r-- | util/tmpl/term.html | 40 |
11 files changed, 0 insertions, 364 deletions
diff --git a/util/compile.py b/util/compile.py deleted file mode 100755 index 41c3f432..00000000 --- a/util/compile.py +++ /dev/null @@ -1,244 +0,0 @@ -#!/usr/bin/env python - -# Copyright (C) 2016 Alex Yatskov <alex@foosoft.net> -# Author: Alex Yatskov <alex@foosoft.net> -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - - -import codecs -import json -import optparse -import os.path -import re - - -PARSED_TAGS = { - 'Buddh', - 'MA', - 'X', - 'abbr', - 'adj', - 'adj-f', - 'adj-i', - 'adj-na', - 'adj-no', - 'adj-pn', - 'adj-t', - 'adv', - 'adv-n', - 'adv-to', - 'arch', - 'ateji', - 'aux', - 'aux-adj', - 'aux-v', - 'c', - 'chn', - 'col', - 'comp', - 'conj', - 'ctr', - 'derog', - 'eK', - 'ek', - 'exp', - 'f', - 'fam', - 'fem', - 'food', - 'g', - 'geom', - 'gikun', - 'gram', - 'h', - 'hon', - 'hum', - 'iK', - 'id', - 'ik', - 'int', - 'io', - 'iv', - 'ling', - 'm', - 'm-sl', - 'male', - 'male-sl', - 'math', - 'mil', - 'n', - 'n-adv', - 'n-pref', - 'n-suf', - 'n-t', - 'num', - 'oK', - 'obs', - 'obsc', - 'ok', - 'on-mim', - 'P', - 'p', - 'physics', - 'pn', - 'poet', - 'pol', - 'pr', - 'pref', - 'prt', - 'rare', - 's', - 'sens', - 'sl', - 'st', - 'suf', - 'u', - 'uK', - 'uk', - 'v1', - 'v2a-s', - 'v4h', - 'v4r', - 'v5', - 'v5aru', - 'v5b', - 'v5g', - 'v5k', - 'v5k-s', - 'v5m', - 'v5n', - 'v5r', - 'v5r-i', - 'v5s', - 'v5t', - 'v5u', - 'v5u-s', - 'v5uru', - 'v5z', - 'vi', - 'vk', - 'vn', - 'vs', - 'vs-c', - 'vs-i', - 'vs-s', - 'vt', - 'vulg', - 'vz' -} - - -def is_hiragana(c): - return 0x3040 <= ord(c) < 0x30a0 - - -def is_katakana(c): - return 0x30a0 <= ord(c) < 0x3100 - - -def load_definitions(path): - print('Parsing "{0}"...'.format(path)) - with codecs.open(path, encoding='euc-jp') as fp: - return filter(lambda x: x and x[0] != '#', fp.read().splitlines()) - - -def parse_kanji_dic(path): - results = {} - for line in load_definitions(path): - segments = line.split() - character = segments[0] - kunyomi = ' '.join(filter(lambda x: list(filter(is_hiragana, x)), segments[1:])) - onyomi = ' '.join(filter(lambda x: list(filter(is_katakana, x)), segments[1:])) - glossary = re.findall('\{([^\}]+)\}', line) - results[character] = (kunyomi or None, onyomi or None, glossary) - - return results - - -def parse_edict(path): - results = [] - for line in load_definitions(path): - segments = line.split('/') - - exp_parts = segments[0].split(' ') - expression = exp_parts[0] - reading_match = re.search('\[([^\]]+)\]', exp_parts[1]) - reading = None if reading_match is None else reading_match.group(1) - - defs = [] - tags = set() - - for index, dfn in enumerate(filter(None, segments[1:])): - dfn_match = re.search(r'^((?:\((?:[\w\-\,\:]*)*\)\s*)*)(.*)$', dfn) - - tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1)))) - tags_raw = tags_raw.intersection(PARSED_TAGS) - tags = tags.union(tags_raw) - - gloss = dfn_match.group(2).strip() - if len(gloss) == 0: - continue - - if index == 0 or len(dfn_match.group(1)) > 0: - defs.append([gloss]) - else: - defs[-1].append(gloss) - - result = [expression, reading, ' '.join(tags)] - result += map(lambda x: '; '.join(x), defs) - - results.append(result) - - indices = {} - for i, d in enumerate(results): - for key in d[:2]: - if key is not None: - values = indices.get(key, []) - values.append(i) - indices[key] = values - - return {'defs': results, 'indices': indices} - - -def build_dict(output_dir, input_file, parser): - if input_file is not None: - base = os.path.splitext(os.path.basename(input_file))[0] - with open(os.path.join(output_dir, base) + '.json', 'w') as fp: - # json.dump(parser(input_file), fp, sort_keys=True, indent=4, separators=(',', ': ')) - json.dump(parser(input_file), fp, separators=(',', ':')) - - -def build(dict_dir, kanjidic, edict, enamdict): - build_dict(dict_dir, kanjidic, parse_kanji_dic) - build_dict(dict_dir, edict, parse_edict) - build_dict(dict_dir, enamdict, parse_edict) - - -def main(): - parser = optparse.OptionParser() - parser.add_option('--kanjidic', dest='kanjidic') - parser.add_option('--edict', dest='edict') - parser.add_option('--enamdict', dest='enamdict') - - options, args = parser.parse_args() - - if len(args) == 0: - parser.print_help() - else: - build(args[0], options.kanjidic, options.edict, options.enamdict) - - -if __name__ == '__main__': - main() diff --git a/util/data/edict b/util/data/edict deleted file mode 100644 index 35fc6bdd..00000000 --- a/util/data/edict +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:51b9e6f5761f3a9c2d1f6c65df131ad3fa60738ffda06e1e0cfb07fdfe6acd89 -size 15655597 diff --git a/util/data/enamdict b/util/data/enamdict deleted file mode 100644 index 17004236..00000000 --- a/util/data/enamdict +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9fd670f8c8a020aeb2b870d186d223702e91e0538cec04a784b1433363aff9f9 -size 26426507 diff --git a/util/data/kanjidic b/util/data/kanjidic deleted file mode 100644 index dbfb510f..00000000 --- a/util/data/kanjidic +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:494d5a6d750444907995eb3be7bbe7d817a20d3431ae0d8c944715690610b69c -size 1213904 diff --git a/util/tmpl/footer.html b/util/tmpl/footer.html deleted file mode 100644 index 3840600f..00000000 --- a/util/tmpl/footer.html +++ /dev/null @@ -1,3 +0,0 @@ - <script src="{{root}}/js/frame.js"></script> - </body> -</html> diff --git a/util/tmpl/header.html b/util/tmpl/header.html deleted file mode 100644 index 2256b08a..00000000 --- a/util/tmpl/header.html +++ /dev/null @@ -1,14 +0,0 @@ -<!DOCTYPE html> -<html lang="en"> - <head> - <meta charset="UTF-8"> - <title></title> - <style> - @font-face { - font-family: "KanjiStrokeOrders"; - src: url("{{root}}/ttf/kanji-stroke-orders.ttf"); - } - </style> - <link rel="stylesheet" href="{{root}}/css/frame.css"> - </head> - <body> diff --git a/util/tmpl/kanji-link.html b/util/tmpl/kanji-link.html deleted file mode 100644 index f4f8dc69..00000000 --- a/util/tmpl/kanji-link.html +++ /dev/null @@ -1 +0,0 @@ -<a href="#" class="kanji-link">{{kanji}}</a> diff --git a/util/tmpl/kanji-list.html b/util/tmpl/kanji-list.html deleted file mode 100644 index b5d0b627..00000000 --- a/util/tmpl/kanji-list.html +++ /dev/null @@ -1,5 +0,0 @@ -{{> header.html}} -{{#each defs}} -{{> kanji.html root=../root options=../options sequence=../sequence}} -{{/each}} -{{> footer.html}} diff --git a/util/tmpl/kanji.html b/util/tmpl/kanji.html deleted file mode 100644 index 7a5affb7..00000000 --- a/util/tmpl/kanji.html +++ /dev/null @@ -1,43 +0,0 @@ -<div class="kanji-definition"> - {{#with options}} - {{#if enableAnkiConnect}} - <div class="action-bar"> - <a href="#" title="Add Kanji" class="action-link disabled" data-sequence="{{../sequence}}" data-mode="kanji" data-index="{{@index}}"><img src="{{../root}}/img/add_kanji.png"></a> - </div> - {{/if}} - {{/with}} - - <div class="kanji-glyph">{{character}}</div> - - <div class="kanji-info"> - <dl> - {{#if glossary}} - <dt>Meanings</dt> - <dd> - {{#each glossary}} - {{.}}{{#unless @last}}, {{/unless}} - {{/each}} - </dd> - {{/if}} - - {{#if kunyomi}} - <dt>Kunyomi</dt> - <dd> - {{#each kunyomi}} - {{.}}{{#unless @last}}, {{/unless}} - {{/each}} - </dd> - {{/if}} - - {{#if onyomi}} - <dt>Onyomi</dt> - <dd> - {{#each onyomi}} - {{.}}{{#unless @last}}, {{/unless}} - {{/each}} - </dd> - {{/if}} - </dl> - </div> -</div> -</div> diff --git a/util/tmpl/term-list.html b/util/tmpl/term-list.html deleted file mode 100644 index 5581b679..00000000 --- a/util/tmpl/term-list.html +++ /dev/null @@ -1,5 +0,0 @@ -{{> header.html}} -{{#each defs}} -{{> term.html root=../root options=../options sequence=../sequence}} -{{/each}} -{{> footer.html}} diff --git a/util/tmpl/term.html b/util/tmpl/term.html deleted file mode 100644 index 216ccf0a..00000000 --- a/util/tmpl/term.html +++ /dev/null @@ -1,40 +0,0 @@ -<div class="term-definition"> - {{#with options}} - {{#if enableAnkiConnect}} - <div class="action-bar"> - <a href="#" title="Add term as expression" class="action-link disabled" data-sequence="{{../sequence}}" data-mode="vocabExp" data-index="{{@index}}"><img src="{{../root}}/img/add_expression.png"></a> - <a href="#" title="Add term as reading" class="action-link disabled" data-sequence="{{../sequence}}" data-mode="vocabReading" data-index="{{@index}}"><img src="{{../root}}/img/add_reading.png"></a> - </div> - {{/if}} - {{/with}} - - {{#if reading}} - <div class="term-expression"><ruby>{{#kanjiLinks}}{{expression}}{{/kanjiLinks}}<rt>{{reading}}</rt></ruby></div> - {{else}} - <div class="term-expression">{{#kanjiLinks}}{{expression}}{{/kanjiLinks}}</div> - {{/if}} - - {{#if rules}} - <div class="term-rules"> - {{#each rules}} - <span class="rule">{{.}}</span> {{#unless @last}}«{{/unless}} - {{/each}} - </div> - {{/if}} - - {{#if tags}} - <div class="term-tags"> - {{#each tags}} - <span class="tag tag-{{class}}" title="{{desc}}">{{name}}</span> - {{/each}} - </div> - {{/if}} - - <div class="term-glossary"> - <ol> - {{#each glossary}} - <li><span>{{.}}</span></li> - {{/each}} - </ol> - </div> -</div> |