aboutsummaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
authorAlex Yatskov <alex@foosoft.net>2016-04-13 19:41:43 -0700
committerAlex Yatskov <alex@foosoft.net>2016-04-13 19:41:43 -0700
commit52fca7c5a5179683067970aa59877d00175e2519 (patch)
tree889fa752577e8102a542a3d5820c1259a77e59b5 /util
parent18720aa150734ac1888ca560c6e840d4ce25d00e (diff)
Fixing dictionary generation
Diffstat (limited to 'util')
-rwxr-xr-xutil/compile.py16
1 files changed, 10 insertions, 6 deletions
diff --git a/util/compile.py b/util/compile.py
index 8d8470dd..1ed299f7 100755
--- a/util/compile.py
+++ b/util/compile.py
@@ -89,6 +89,7 @@ PARSED_TAGS = {
'obsc': 'obscure term',
'ok': 'out-dated or obsolete kana usage',
'on-mim': 'onomatopoeic or mimetic word',
+ 'P': 'popular term',
'p': 'place-name',
'physics': 'physics terminology',
'pn': 'pronoun',
@@ -177,15 +178,18 @@ def parse_edict(path):
reading = None if reading_match is None else reading_match.group(1)
defs = []
- tags = []
+ tags = set()
for index, dfn in enumerate(filter(None, segments[1:])):
- dfn_match = re.search(r'^((?:\((?:[\w\-\,\:]*)*\)\s+)*)(.*)$', dfn)
- gloss = dfn_match.group(2)
+ dfn_match = re.search(r'^((?:\((?:[\w\-\,\:]*)*\)\s*)*)(.*)$', dfn)
- if index == 0:
- tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1))))
- tags = tags_raw.intersection(set(PARSED_TAGS.keys()))
+ gloss = dfn_match.group(2).strip()
+ if len(gloss) == 0:
+ continue
+
+ tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1))))
+ tags_raw = tags_raw.intersection(set(PARSED_TAGS.keys()))
+ tags = tags.union(tags_raw)
if index == 0 or len(dfn_match.group(1)) > 0:
defs.append([gloss])