diff options
Diffstat (limited to 'util/compile.py')
| -rwxr-xr-x | util/compile.py | 16 | 
1 files changed, 10 insertions, 6 deletions
| diff --git a/util/compile.py b/util/compile.py index 8d8470dd..1ed299f7 100755 --- a/util/compile.py +++ b/util/compile.py @@ -89,6 +89,7 @@ PARSED_TAGS = {      'obsc':    'obscure term',      'ok':      'out-dated or obsolete kana usage',      'on-mim':  'onomatopoeic or mimetic word', +    'P':       'popular term',      'p':       'place-name',      'physics': 'physics terminology',      'pn':      'pronoun', @@ -177,15 +178,18 @@ def parse_edict(path):          reading = None if reading_match is None else reading_match.group(1)          defs = [] -        tags = [] +        tags = set()          for index, dfn in enumerate(filter(None, segments[1:])): -            dfn_match = re.search(r'^((?:\((?:[\w\-\,\:]*)*\)\s+)*)(.*)$', dfn) -            gloss = dfn_match.group(2) +            dfn_match = re.search(r'^((?:\((?:[\w\-\,\:]*)*\)\s*)*)(.*)$', dfn) -            if index == 0: -                tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1)))) -                tags = tags_raw.intersection(set(PARSED_TAGS.keys())) +            gloss = dfn_match.group(2).strip() +            if len(gloss) == 0: +                continue + +            tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1)))) +            tags_raw = tags_raw.intersection(set(PARSED_TAGS.keys())) +            tags = tags.union(tags_raw)              if index == 0 or len(dfn_match.group(1)) > 0:                  defs.append([gloss]) |