summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Yatskov <alex@foosoft.net>2016-04-29 22:25:33 -0700
committerAlex Yatskov <alex@foosoft.net>2016-04-29 22:25:33 -0700
commit61993db7026d2e06666ba6aeea1f7eed388d8592 (patch)
tree5af5f0a6e38661848c5c0477c98343a65c4e09e2
parenta7e5a23e2d2facf774a1455b0b2802633a4a183a (diff)
Simplify compile script
-rwxr-xr-xutil/compile.py226
1 files changed, 113 insertions, 113 deletions
diff --git a/util/compile.py b/util/compile.py
index 842dfa9a..41c3f432 100755
--- a/util/compile.py
+++ b/util/compile.py
@@ -25,118 +25,118 @@ import re
PARSED_TAGS = {
- 'Buddh': 'Buddhist term',
- 'MA': 'martial arts term',
- 'X': 'rude or X-rated term',
- 'abbr': 'abbreviation',
- 'adj': 'former adjective classification (being removed)',
- 'adj-f': 'noun or verb acting prenominally (other than the above)',
- 'adj-i': 'adjective (keiyoushi)',
- 'adj-na': 'adjectival nouns or quasi-adjectives (keiyodoshi)',
- 'adj-no': 'nouns which may take the genitive case particle "no"',
- 'adj-pn': 'pre-noun adjectival (rentaishi)',
- 'adj-t': '"taru" adjective',
- 'adv': 'adverb (fukushi)',
- 'adv-n': 'adverbial noun',
- 'adv-to': 'adverb taking the "to" particle',
- 'arch': 'archaism',
- 'ateji': 'ateji (phonetic) reading',
- 'aux': 'auxiliary',
- 'aux-adj': 'auxiliary adjective',
- 'aux-v': 'auxiliary verb',
- 'c': 'company name',
- 'chn': 'children\'s language',
- 'col': 'colloquialism',
- 'comp': 'computer terminology',
- 'conj': 'conjunction',
- 'ctr': 'counter',
- 'derog': 'derogatory term',
- 'eK': 'exclusively kanji',
- 'ek': 'exclusively kana',
- 'exp': 'Expressions (phrases, clauses, etc.)',
- 'f': 'female given name',
- 'fam': 'familiar language',
- 'fem': 'female term or language',
- 'food': 'food term',
- 'g': 'given name, as-yet not classified by sex',
- 'geom': 'geometry term',
- 'gikun': 'gikun (meaning) reading',
- 'gram': 'grammatical term',
- 'h': 'full (usually family plus given) name of a particular person',
- 'hon': 'honorific or respectful (sonkeigo) language',
- 'hum': 'humble (kenjougo) language',
- 'iK': 'word containing irregular kanji usage',
- 'id': 'idiomatic expression',
- 'ik': 'word containing irregular kana usage',
- 'int': 'interjection (kandoushi)',
- 'io': 'irregular okurigana usage',
- 'iv': 'irregular verb',
- 'ling': 'linguistics terminology',
- 'm': 'male given name',
- 'm-sl': 'manga slang',
- 'male': 'male term or language',
- 'male-sl': 'male slang',
- 'math': 'mathematics',
- 'mil': 'military',
- 'n': 'noun (common) (futsuumeishi)',
- 'n-adv': 'adverbial noun (fukushitekimeishi)',
- 'n-pref': 'noun, used as a prefix',
- 'n-suf': 'noun, used as a suffix',
- 'n-t': 'noun (temporal) (jisoumeishi)',
- 'num': 'numeric',
- 'oK': 'word containing out-dated kanji',
- 'obs': 'obsolete term',
- 'obsc': 'obscure term',
- 'ok': 'out-dated or obsolete kana usage',
- 'on-mim': 'onomatopoeic or mimetic word',
- 'P': 'popular term',
- 'p': 'place-name',
- 'physics': 'physics terminology',
- 'pn': 'pronoun',
- 'poet': 'poetical term',
- 'pol': 'polite (teineigo) language',
- 'pr': 'product name',
- 'pref': 'prefix',
- 'prt': 'particle',
- 'rare': 'rare (now replaced by "obsc")',
- 's': 'surname',
- 'sens': 'sensitive word',
- 'sl': 'slang',
- 'st': 'stations',
- 'suf': 'suffix',
- 'u': 'person name, either given or surname, as-yet unclassified',
- 'uK': 'word usually written using kanji alone',
- 'uk': 'word usually written using kana alone',
- 'v1': 'Ichidan verb',
- 'v2a-s': 'Nidan verb with "u" ending (archaic)',
- 'v4h': 'Yodan verb with "hu/fu" ending (archaic)',
- 'v4r': 'Yodan verb with "ru" ending (archaic)',
- 'v5': 'Godan verb (not completely classified)',
- 'v5aru': 'Godan verb - -aru special class',
- 'v5b': 'Godan verb with "bu" ending',
- 'v5g': 'Godan verb with "gu" ending',
- 'v5k': 'Godan verb with "ku" ending',
- 'v5k-s': 'Godan verb - iku/yuku special class',
- 'v5m': 'Godan verb with "mu" ending',
- 'v5n': 'Godan verb with "nu" ending',
- 'v5r': 'Godan verb with "ru" ending',
- 'v5r-i': 'Godan verb with "ru" ending (irregular verb)',
- 'v5s': 'Godan verb with "su" ending',
- 'v5t': 'Godan verb with "tsu" ending',
- 'v5u': 'Godan verb with "u" ending',
- 'v5u-s': 'Godan verb with "u" ending (special class)',
- 'v5uru': 'Godan verb - uru old class verb (old form of Eru)',
- 'v5z': 'Godan verb with "zu" ending',
- 'vi': 'intransitive verb',
- 'vk': 'kuru verb - special class',
- 'vn': 'irregular nu verb',
- 'vs': 'noun or participle which takes the aux. verb suru',
- 'vs-c': 'su verb - precursor to the modern suru',
- 'vs-i': 'suru verb - irregular',
- 'vs-s': 'suru verb - special class',
- 'vt': 'transitive ver',
- 'vulg': 'vulgar expression or word',
- 'vz': 'Ichidan verb - zuru verb - (alternative form of -jiru verbs)',
+ 'Buddh',
+ 'MA',
+ 'X',
+ 'abbr',
+ 'adj',
+ 'adj-f',
+ 'adj-i',
+ 'adj-na',
+ 'adj-no',
+ 'adj-pn',
+ 'adj-t',
+ 'adv',
+ 'adv-n',
+ 'adv-to',
+ 'arch',
+ 'ateji',
+ 'aux',
+ 'aux-adj',
+ 'aux-v',
+ 'c',
+ 'chn',
+ 'col',
+ 'comp',
+ 'conj',
+ 'ctr',
+ 'derog',
+ 'eK',
+ 'ek',
+ 'exp',
+ 'f',
+ 'fam',
+ 'fem',
+ 'food',
+ 'g',
+ 'geom',
+ 'gikun',
+ 'gram',
+ 'h',
+ 'hon',
+ 'hum',
+ 'iK',
+ 'id',
+ 'ik',
+ 'int',
+ 'io',
+ 'iv',
+ 'ling',
+ 'm',
+ 'm-sl',
+ 'male',
+ 'male-sl',
+ 'math',
+ 'mil',
+ 'n',
+ 'n-adv',
+ 'n-pref',
+ 'n-suf',
+ 'n-t',
+ 'num',
+ 'oK',
+ 'obs',
+ 'obsc',
+ 'ok',
+ 'on-mim',
+ 'P',
+ 'p',
+ 'physics',
+ 'pn',
+ 'poet',
+ 'pol',
+ 'pr',
+ 'pref',
+ 'prt',
+ 'rare',
+ 's',
+ 'sens',
+ 'sl',
+ 'st',
+ 'suf',
+ 'u',
+ 'uK',
+ 'uk',
+ 'v1',
+ 'v2a-s',
+ 'v4h',
+ 'v4r',
+ 'v5',
+ 'v5aru',
+ 'v5b',
+ 'v5g',
+ 'v5k',
+ 'v5k-s',
+ 'v5m',
+ 'v5n',
+ 'v5r',
+ 'v5r-i',
+ 'v5s',
+ 'v5t',
+ 'v5u',
+ 'v5u-s',
+ 'v5uru',
+ 'v5z',
+ 'vi',
+ 'vk',
+ 'vn',
+ 'vs',
+ 'vs-c',
+ 'vs-i',
+ 'vs-s',
+ 'vt',
+ 'vulg',
+ 'vz'
}
@@ -184,7 +184,7 @@ def parse_edict(path):
dfn_match = re.search(r'^((?:\((?:[\w\-\,\:]*)*\)\s*)*)(.*)$', dfn)
tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1))))
- tags_raw = tags_raw.intersection(set(PARSED_TAGS.keys()))
+ tags_raw = tags_raw.intersection(PARSED_TAGS)
tags = tags.union(tags_raw)
gloss = dfn_match.group(2).strip()