summaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
authorAlex Yatskov <alex@foosoft.net>2016-04-16 20:11:27 -0700
committerAlex Yatskov <alex@foosoft.net>2016-04-16 20:11:27 -0700
commit9ceef4649f07211079017d2cfcbc3756dd9a509b (patch)
tree8bc506f337a19e165e1391229c12e32480accd24 /util
parentccf545919054be4dc2aa9efdc7cd914c867775a8 (diff)
Fixes to deinflection logic
Diffstat (limited to 'util')
-rwxr-xr-xutil/compile.py41
1 files changed, 26 insertions, 15 deletions
diff --git a/util/compile.py b/util/compile.py
index 1ed299f7..9c5a6bdd 100755
--- a/util/compile.py
+++ b/util/compile.py
@@ -112,21 +112,21 @@ PARSED_TAGS = {
'v4h': 'Yodan verb with "hu/fu" ending (archaic)',
'v4r': 'Yodan verb with "ru" ending (archaic)',
'v5': 'Godan verb (not completely classified)',
- 'v5aru': 'Godan verb - -aru special class',
- 'v5b': 'Godan verb with "bu" ending',
- 'v5g': 'Godan verb with "gu" ending',
- 'v5k': 'Godan verb with "ku" ending',
- 'v5k-s': 'Godan verb - iku/yuku special class',
- 'v5m': 'Godan verb with "mu" ending',
- 'v5n': 'Godan verb with "nu" ending',
- 'v5r': 'Godan verb with "ru" ending',
- 'v5r-i': 'Godan verb with "ru" ending (irregular verb)',
- 'v5s': 'Godan verb with "su" ending',
- 'v5t': 'Godan verb with "tsu" ending',
- 'v5u': 'Godan verb with "u" ending',
- 'v5u-s': 'Godan verb with "u" ending (special class)',
- 'v5uru': 'Godan verb - uru old class verb (old form of Eru)',
- 'v5z': 'Godan verb with "zu" ending',
+ # 'v5aru': 'Godan verb - -aru special class',
+ # 'v5b': 'Godan verb with "bu" ending',
+ # 'v5g': 'Godan verb with "gu" ending',
+ # 'v5k': 'Godan verb with "ku" ending',
+ # 'v5k-s': 'Godan verb - iku/yuku special class',
+ # 'v5m': 'Godan verb with "mu" ending',
+ # 'v5n': 'Godan verb with "nu" ending',
+ # 'v5r': 'Godan verb with "ru" ending',
+ # 'v5r-i': 'Godan verb with "ru" ending (irregular verb)',
+ # 'v5s': 'Godan verb with "su" ending',
+ # 'v5t': 'Godan verb with "tsu" ending',
+ # 'v5u': 'Godan verb with "u" ending',
+ # 'v5u-s': 'Godan verb with "u" ending (special class)',
+ # 'v5uru': 'Godan verb - uru old class verb (old form of Eru)',
+ # 'v5z': 'Godan verb with "zu" ending',
'vi': 'intransitive verb',
'vk': 'kuru verb - special class',
'vn': 'irregular nu verb',
@@ -167,6 +167,16 @@ def parse_kanji_dic(path):
return results
+def fixup_godan_verbs(tags):
+ results = []
+ for tag in tags:
+ if tag.startswith('v5'):
+ tag = 'v5'
+ results.append(tag)
+
+ return set(results)
+
+
def parse_edict(path):
results = []
for line in load_definitions(path):
@@ -188,6 +198,7 @@ def parse_edict(path):
continue
tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1))))
+ tags_raw = fixup_godan_verbs(tags_raw)
tags_raw = tags_raw.intersection(set(PARSED_TAGS.keys()))
tags = tags.union(tags_raw)