diff options
Diffstat (limited to 'util/compile.py')
| -rwxr-xr-x | util/compile.py | 226 | 
1 files changed, 113 insertions, 113 deletions
diff --git a/util/compile.py b/util/compile.py index 842dfa9a..41c3f432 100755 --- a/util/compile.py +++ b/util/compile.py @@ -25,118 +25,118 @@ import re  PARSED_TAGS = { -    'Buddh':   'Buddhist term', -    'MA':      'martial arts term', -    'X':       'rude or X-rated term', -    'abbr':    'abbreviation', -    'adj':     'former adjective classification (being removed)', -    'adj-f':   'noun or verb acting prenominally (other than the above)', -    'adj-i':   'adjective (keiyoushi)', -    'adj-na':  'adjectival nouns or quasi-adjectives (keiyodoshi)', -    'adj-no':  'nouns which may take the genitive case particle "no"', -    'adj-pn':  'pre-noun adjectival (rentaishi)', -    'adj-t':   '"taru" adjective', -    'adv':     'adverb (fukushi)', -    'adv-n':   'adverbial noun', -    'adv-to':  'adverb taking the "to" particle', -    'arch':    'archaism', -    'ateji':   'ateji (phonetic) reading', -    'aux':     'auxiliary', -    'aux-adj': 'auxiliary adjective', -    'aux-v':   'auxiliary verb', -    'c':       'company name', -    'chn':     'children\'s language', -    'col':     'colloquialism', -    'comp':    'computer terminology', -    'conj':    'conjunction', -    'ctr':     'counter', -    'derog':   'derogatory term', -    'eK':      'exclusively kanji', -    'ek':      'exclusively kana', -    'exp':     'Expressions (phrases, clauses, etc.)', -    'f':       'female given name', -    'fam':     'familiar language', -    'fem':     'female term or language', -    'food':    'food term', -    'g':       'given name, as-yet not classified by sex', -    'geom':    'geometry term', -    'gikun':   'gikun (meaning) reading', -    'gram':    'grammatical term', -    'h':       'full (usually family plus given) name of a particular person', -    'hon':     'honorific or respectful (sonkeigo) language', -    'hum':     'humble (kenjougo) language', -    'iK':      'word containing irregular kanji usage', -    'id':      'idiomatic expression', -    'ik':      'word containing irregular kana usage', -    'int':     'interjection (kandoushi)', -    'io':      'irregular okurigana usage', -    'iv':      'irregular verb', -    'ling':    'linguistics terminology', -    'm':       'male given name', -    'm-sl':    'manga slang', -    'male':    'male term or language', -    'male-sl': 'male slang', -    'math':    'mathematics', -    'mil':     'military', -    'n':       'noun (common) (futsuumeishi)', -    'n-adv':   'adverbial noun (fukushitekimeishi)', -    'n-pref':  'noun, used as a prefix', -    'n-suf':   'noun, used as a suffix', -    'n-t':     'noun (temporal) (jisoumeishi)', -    'num':     'numeric', -    'oK':      'word containing out-dated kanji', -    'obs':     'obsolete term', -    'obsc':    'obscure term', -    'ok':      'out-dated or obsolete kana usage', -    'on-mim':  'onomatopoeic or mimetic word', -    'P':       'popular term', -    'p':       'place-name', -    'physics': 'physics terminology', -    'pn':      'pronoun', -    'poet':    'poetical term', -    'pol':     'polite (teineigo) language', -    'pr':      'product name', -    'pref':    'prefix', -    'prt':     'particle', -    'rare':    'rare (now replaced by "obsc")', -    's':       'surname', -    'sens':    'sensitive word', -    'sl':      'slang', -    'st':      'stations', -    'suf':     'suffix', -    'u':       'person name, either given or surname, as-yet unclassified', -    'uK':      'word usually written using kanji alone', -    'uk':      'word usually written using kana alone', -    'v1':      'Ichidan verb', -    'v2a-s':   'Nidan verb with "u" ending (archaic)', -    'v4h':     'Yodan verb with "hu/fu" ending (archaic)', -    'v4r':     'Yodan verb with "ru" ending (archaic)', -    'v5':      'Godan verb (not completely classified)', -    'v5aru':   'Godan verb - -aru special class', -    'v5b':     'Godan verb with "bu" ending', -    'v5g':     'Godan verb with "gu" ending', -    'v5k':     'Godan verb with "ku" ending', -    'v5k-s':   'Godan verb - iku/yuku special class', -    'v5m':     'Godan verb with "mu" ending', -    'v5n':     'Godan verb with "nu" ending', -    'v5r':     'Godan verb with "ru" ending', -    'v5r-i':   'Godan verb with "ru" ending (irregular verb)', -    'v5s':     'Godan verb with "su" ending', -    'v5t':     'Godan verb with "tsu" ending', -    'v5u':     'Godan verb with "u" ending', -    'v5u-s':   'Godan verb with "u" ending (special class)', -    'v5uru':   'Godan verb - uru old class verb (old form of Eru)', -    'v5z':     'Godan verb with "zu" ending', -    'vi':      'intransitive verb', -    'vk':      'kuru verb - special class', -    'vn':      'irregular nu verb', -    'vs':      'noun or participle which takes the aux. verb suru', -    'vs-c':    'su verb - precursor to the modern suru', -    'vs-i':    'suru verb - irregular', -    'vs-s':    'suru verb - special class', -    'vt':      'transitive ver', -    'vulg':    'vulgar expression or word', -    'vz':      'Ichidan verb - zuru verb - (alternative form of -jiru verbs)', +    'Buddh', +    'MA', +    'X', +    'abbr', +    'adj', +    'adj-f', +    'adj-i', +    'adj-na', +    'adj-no', +    'adj-pn', +    'adj-t', +    'adv', +    'adv-n', +    'adv-to', +    'arch', +    'ateji', +    'aux', +    'aux-adj', +    'aux-v', +    'c', +    'chn', +    'col', +    'comp', +    'conj', +    'ctr', +    'derog', +    'eK', +    'ek', +    'exp', +    'f', +    'fam', +    'fem', +    'food', +    'g', +    'geom', +    'gikun', +    'gram', +    'h', +    'hon', +    'hum', +    'iK', +    'id', +    'ik', +    'int', +    'io', +    'iv', +    'ling', +    'm', +    'm-sl', +    'male', +    'male-sl', +    'math', +    'mil', +    'n', +    'n-adv', +    'n-pref', +    'n-suf', +    'n-t', +    'num', +    'oK', +    'obs', +    'obsc', +    'ok', +    'on-mim', +    'P', +    'p', +    'physics', +    'pn', +    'poet', +    'pol', +    'pr', +    'pref', +    'prt', +    'rare', +    's', +    'sens', +    'sl', +    'st', +    'suf', +    'u', +    'uK', +    'uk', +    'v1', +    'v2a-s', +    'v4h', +    'v4r', +    'v5', +    'v5aru', +    'v5b', +    'v5g', +    'v5k', +    'v5k-s', +    'v5m', +    'v5n', +    'v5r', +    'v5r-i', +    'v5s', +    'v5t', +    'v5u', +    'v5u-s', +    'v5uru', +    'v5z', +    'vi', +    'vk', +    'vn', +    'vs', +    'vs-c', +    'vs-i', +    'vs-s', +    'vt', +    'vulg', +    'vz'  } @@ -184,7 +184,7 @@ def parse_edict(path):              dfn_match = re.search(r'^((?:\((?:[\w\-\,\:]*)*\)\s*)*)(.*)$', dfn)              tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1)))) -            tags_raw = tags_raw.intersection(set(PARSED_TAGS.keys())) +            tags_raw = tags_raw.intersection(PARSED_TAGS)              tags = tags.union(tags_raw)              gloss = dfn_match.group(2).strip()  |