diff options
author | lonkaars <loek@pipeframe.xyz> | 2023-07-15 21:52:57 +0200 |
---|---|---|
committer | lonkaars <loek@pipeframe.xyz> | 2023-07-15 21:52:57 +0200 |
commit | 8e179a43e909ce4683f753a90bb3505630f05ad8 (patch) | |
tree | 5e46594af33ba7f82d1bd5ea954b99b4a92d0093 /db/dict | |
parent | 3dc9484fc81db8f3c8ffd4ebb4bab042e66c6214 (diff) |
implement alternate writings (failing tests down to 500)
Diffstat (limited to 'db/dict')
-rw-r--r-- | db/dict/deinflections.sql | 4 | ||||
-rw-r--r-- | db/dict/init.sql | 2 | ||||
-rw-r--r-- | db/dict/tags.sql | 12 | ||||
-rw-r--r-- | db/dict/template.sql | 26 |
4 files changed, 28 insertions, 16 deletions
diff --git a/db/dict/deinflections.sql b/db/dict/deinflections.sql index 7472122..1fb1ebe 100644 --- a/db/dict/deinflections.sql +++ b/db/dict/deinflections.sql @@ -286,8 +286,8 @@ insert into deinflection_temp values ('infl:passive', 'こられる', 'くる', 'ru', 'k'), ('infl:passive', '来られる', '来る', 'ru', 'k'), - -- auxiliary rules - ('class:verb:suru-included', 'する', '', 's', ''); -- deconjugate suru verbs into stem + -- suru verbs <https://guidetojapanese.org/learn/grammar/surunaru> + ('infl:suru', 'する', '', 's', ''); -- deconjugate suru verbs into stem -- rule/bitmask lookup table create temporary table rule_map (tag, name, mask); diff --git a/db/dict/init.sql b/db/dict/init.sql index 4e9fcc9..3c6dc50 100644 --- a/db/dict/init.sql +++ b/db/dict/init.sql @@ -92,8 +92,8 @@ create index term_expression on term (expression); create index term_reading on term (reading); -- TODO: (automatically) remove unused terms from db (using constraints?) - -- allow many<->many relation between definition and tag +-- TODO: remove this table! create table if not exists definition_tag ( id integer primary key autoincrement, definition_id int not null, diff --git a/db/dict/tags.sql b/db/dict/tags.sql index a200abb..2088831 100644 --- a/db/dict/tags.sql +++ b/db/dict/tags.sql @@ -1,11 +1 @@ -insert into tag (code) values - ('class:verb'), - ('class:verb:suru'), - ('class:verb:suru-included'), - ('class:noun'), - ('class:suffix'), - ('class:part'), - ('class:expr'), - ('name:place'), - ('name:female'), - ('name:male'); +-- TODO: generate this file from TypeScript diff --git a/db/dict/template.sql b/db/dict/template.sql index 1a07252..6b17a0c 100644 --- a/db/dict/template.sql +++ b/db/dict/template.sql @@ -1,7 +1,5 @@ -- create temporary ingest table drop table if exists ingest; --- TODO: ingest pitch-accent dictionaries --- TODO: ingest alternate writings (space-separated) create temporary table ingest( -- term fields expression text not null, -- kanji of term (e.g. 読み込む) @@ -13,6 +11,16 @@ create temporary table ingest( glossary text null default null, -- glossary content (support for basic HTML markup/styling) glossary_tags text null default null -- add tags to single glossary entry ); +-- TODO: ingest pitch-accent dictionaries + +-- create temporary alternate readings table +drop table if exists alts; +create temporary table alts( + expression text not null, -- kanji of alternate version + reading text not null, -- reading of alternate version + normal_expression text not null, -- kanji of parent (original) + normal_reading text not null -- reading of parent (original) +); -- #DICTIONARY_CONTENT_BEGIN -- this template is 'rendered' by pasting a .dict.sql file in between these @@ -35,6 +43,11 @@ insert into term (expression, reading) select expression, reading from ingest; +-- add alternates +insert into term (expression, reading, alt) +select expression, reading, (select id from term where expression = normal_expression and reading = normal_reading) +from alts; + -- add definitions insert into definition (term_id, sort, glossary, dict_id) select @@ -103,6 +116,15 @@ select from term_tag_map join tag on tag.code = term_tag_map.tag; +-- add tags to alternates +insert into term_tag (term_id, tag_id) +select term_alt.id, term_tag.tag_id +from alts +inner join term as term_normal on term_normal.expression = alts.normal_expression and term_normal.reading = alts.normal_reading +inner join term as term_alt on term_alt.expression = alts.expression and term_alt.reading = alts.reading +left join term_tag on term_tag.term_id = term_normal.id +where term_tag.tag_id is not null; + -- add tags to definitions insert into definition_tag (definition_id, tag_id) select |