aboutsummaryrefslogtreecommitdiff
path: root/db/dict/template.sql
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-06-29 11:33:23 +0200
committerlonkaars <loek@pipeframe.xyz>2023-06-29 11:33:23 +0200
commitc998e1c0477d51c886f9e4246e102dec4d7ef8dd (patch)
tree4d979c57f16b138ff4b2ce5fb3151ce241af6881 /db/dict/template.sql
parent67dbb6421976254658c5e38045513129dd18187a (diff)
add jmdict importer to repo
Diffstat (limited to 'db/dict/template.sql')
-rw-r--r--db/dict/template.sql113
1 files changed, 113 insertions, 0 deletions
diff --git a/db/dict/template.sql b/db/dict/template.sql
new file mode 100644
index 0000000..1a07252
--- /dev/null
+++ b/db/dict/template.sql
@@ -0,0 +1,113 @@
+-- create temporary ingest table
+drop table if exists ingest;
+-- TODO: ingest pitch-accent dictionaries
+-- TODO: ingest alternate writings (space-separated)
+create temporary table ingest(
+ -- term fields
+ expression text not null, -- kanji of term (e.g. 読み込む)
+ reading text not null, -- reading of term (e.g. よみこむ)
+ term_tags text not null default '', -- space-separated *term* tags, merged if term already exists in DB
+
+ -- definition fields
+ glossary_sort int not null default 1, -- order of multiple meanings (glossaries)
+ glossary text null default null, -- glossary content (support for basic HTML markup/styling)
+ glossary_tags text null default null -- add tags to single glossary entry
+);
+
+-- #DICTIONARY_CONTENT_BEGIN
+-- this template is 'rendered' by pasting a .dict.sql file in between these
+-- DICTIONARY_CONTENT markers. the makefile can render these using the
+-- following m4 code (called using m4 -P template.sql < any.dict.sql):
+--
+-- m4_undivert(`/dev/stdin')
+--
+-- this breaks when the first line of the input file is not a comment or empty
+-- line, so the makefile accounts for this by concatenating an empty line with
+-- the dict first. the runtime typescript dictionary importer handles this by
+-- not calling m4 for this.
+-- #DICTIONARY_CONTENT_END
+
+-- create dict id
+insert into dict (tag, language) values ('dict:' || :dict, :lang);
+
+-- add terms
+insert into term (expression, reading)
+select expression, reading
+from ingest;
+
+-- add definitions
+insert into definition (term_id, sort, glossary, dict_id)
+select
+ term.id,
+ ingest.glossary_sort,
+ ingest.glossary,
+ (select id from dict where tag = 'dict:' || :dict)
+from ingest
+join term on term.expression = ingest.expression and term.reading = ingest.reading;
+
+-- create map of term_id and tag code
+drop table if exists term_tag_map;
+create temporary table term_tag_map (term_id, tag);
+insert into term_tag_map
+with tag_map(term_id, temp, tag) as (
+ select
+ (select id from term where expression is ingest.expression and reading is ingest.reading),
+ term_tags || ' ',
+ ''
+ from ingest
+ union
+ select
+ term_id,
+ substr(temp, instr(temp, ' ') + 1),
+ substr(temp, 0, instr(temp, ' '))
+ from tag_map
+ where length(temp) > 1
+)
+select term_id, replace(tag, ' ', '')
+from tag_map
+where length(tag) > 0;
+
+-- create map of definition_id and tag code
+drop table if exists definition_tag_map;
+create temporary table definition_tag_map (definition_id, tag);
+insert into definition_tag_map
+with tag_map(definition_id, temp, tag) as (
+ select
+ (select id from definition where glossary is ingest.glossary),
+ glossary_tags || ' ',
+ ''
+ from ingest
+ union
+ select
+ definition_id,
+ substr(temp, instr(temp, ' ') + 1),
+ substr(temp, 0, instr(temp, ' '))
+ from tag_map
+ where length(temp) > 1
+)
+select definition_id, replace(tag, ' ', '')
+from tag_map
+where length(tag) > 0;
+
+-- make sure tags exist
+insert into tag (code)
+select tag from term_tag_map
+union
+select tag from definition_tag_map;
+
+-- add tags to terms
+insert into term_tag (term_id, tag_id)
+select
+ term_id,
+ tag.id
+from term_tag_map
+join tag on tag.code = term_tag_map.tag;
+
+-- add tags to definitions
+insert into definition_tag (definition_id, tag_id)
+select
+ definition_id,
+ tag.id
+from definition_tag_map
+join tag on tag.code = definition_tag_map.tag;
+