aboutsummaryrefslogtreecommitdiff
path: root/db/dict
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-07-15 21:52:57 +0200
committerlonkaars <loek@pipeframe.xyz>2023-07-15 21:52:57 +0200
commit8e179a43e909ce4683f753a90bb3505630f05ad8 (patch)
tree5e46594af33ba7f82d1bd5ea954b99b4a92d0093 /db/dict
parent3dc9484fc81db8f3c8ffd4ebb4bab042e66c6214 (diff)
implement alternate writings (failing tests down to 500)
Diffstat (limited to 'db/dict')
-rw-r--r--db/dict/deinflections.sql4
-rw-r--r--db/dict/init.sql2
-rw-r--r--db/dict/tags.sql12
-rw-r--r--db/dict/template.sql26
4 files changed, 28 insertions, 16 deletions
diff --git a/db/dict/deinflections.sql b/db/dict/deinflections.sql
index 7472122..1fb1ebe 100644
--- a/db/dict/deinflections.sql
+++ b/db/dict/deinflections.sql
@@ -286,8 +286,8 @@ insert into deinflection_temp values
('infl:passive', 'こられる', 'くる', 'ru', 'k'),
('infl:passive', '来られる', '来る', 'ru', 'k'),
- -- auxiliary rules
- ('class:verb:suru-included', 'する', '', 's', ''); -- deconjugate suru verbs into stem
+ -- suru verbs <https://guidetojapanese.org/learn/grammar/surunaru>
+ ('infl:suru', 'する', '', 's', ''); -- deconjugate suru verbs into stem
-- rule/bitmask lookup table
create temporary table rule_map (tag, name, mask);
diff --git a/db/dict/init.sql b/db/dict/init.sql
index 4e9fcc9..3c6dc50 100644
--- a/db/dict/init.sql
+++ b/db/dict/init.sql
@@ -92,8 +92,8 @@ create index term_expression on term (expression);
create index term_reading on term (reading);
-- TODO: (automatically) remove unused terms from db (using constraints?)
-
-- allow many<->many relation between definition and tag
+-- TODO: remove this table!
create table if not exists definition_tag (
id integer primary key autoincrement,
definition_id int not null,
diff --git a/db/dict/tags.sql b/db/dict/tags.sql
index a200abb..2088831 100644
--- a/db/dict/tags.sql
+++ b/db/dict/tags.sql
@@ -1,11 +1 @@
-insert into tag (code) values
- ('class:verb'),
- ('class:verb:suru'),
- ('class:verb:suru-included'),
- ('class:noun'),
- ('class:suffix'),
- ('class:part'),
- ('class:expr'),
- ('name:place'),
- ('name:female'),
- ('name:male');
+-- TODO: generate this file from TypeScript
diff --git a/db/dict/template.sql b/db/dict/template.sql
index 1a07252..6b17a0c 100644
--- a/db/dict/template.sql
+++ b/db/dict/template.sql
@@ -1,7 +1,5 @@
-- create temporary ingest table
drop table if exists ingest;
--- TODO: ingest pitch-accent dictionaries
--- TODO: ingest alternate writings (space-separated)
create temporary table ingest(
-- term fields
expression text not null, -- kanji of term (e.g. 読み込む)
@@ -13,6 +11,16 @@ create temporary table ingest(
glossary text null default null, -- glossary content (support for basic HTML markup/styling)
glossary_tags text null default null -- add tags to single glossary entry
);
+-- TODO: ingest pitch-accent dictionaries
+
+-- create temporary alternate readings table
+drop table if exists alts;
+create temporary table alts(
+ expression text not null, -- kanji of alternate version
+ reading text not null, -- reading of alternate version
+ normal_expression text not null, -- kanji of parent (original)
+ normal_reading text not null -- reading of parent (original)
+);
-- #DICTIONARY_CONTENT_BEGIN
-- this template is 'rendered' by pasting a .dict.sql file in between these
@@ -35,6 +43,11 @@ insert into term (expression, reading)
select expression, reading
from ingest;
+-- add alternates
+insert into term (expression, reading, alt)
+select expression, reading, (select id from term where expression = normal_expression and reading = normal_reading)
+from alts;
+
-- add definitions
insert into definition (term_id, sort, glossary, dict_id)
select
@@ -103,6 +116,15 @@ select
from term_tag_map
join tag on tag.code = term_tag_map.tag;
+-- add tags to alternates
+insert into term_tag (term_id, tag_id)
+select term_alt.id, term_tag.tag_id
+from alts
+inner join term as term_normal on term_normal.expression = alts.normal_expression and term_normal.reading = alts.normal_reading
+inner join term as term_alt on term_alt.expression = alts.expression and term_alt.reading = alts.reading
+left join term_tag on term_tag.term_id = term_normal.id
+where term_tag.tag_id is not null;
+
-- add tags to definitions
insert into definition_tag (definition_id, tag_id)
select