aboutsummaryrefslogtreecommitdiff
path: root/db/dict
diff options
context:
space:
mode:
authorlonkaars <loek@pipeframe.xyz>2023-06-28 23:59:50 +0200
committerlonkaars <loek@pipeframe.xyz>2023-06-28 23:59:50 +0200
commit67dbb6421976254658c5e38045513129dd18187a (patch)
tree288b599d1097b26bdbcad3b6749b38e133017cf2 /db/dict
initial public commit
Diffstat (limited to 'db/dict')
-rw-r--r--db/dict/.gitignore10
-rw-r--r--db/dict/deinflections.sql183
-rw-r--r--db/dict/init.sql115
-rw-r--r--db/dict/reset.sql12
-rw-r--r--db/dict/tags.sql11
-rw-r--r--db/dict/template.sql.m4104
-rw-r--r--db/dict/test_a.dict.sql25
-rw-r--r--db/dict/test_b.dict.sql9
-rw-r--r--db/dict/test_pitch_accent.dict.sql10
9 files changed, 479 insertions, 0 deletions
diff --git a/db/dict/.gitignore b/db/dict/.gitignore
new file mode 100644
index 0000000..96bd267
--- /dev/null
+++ b/db/dict/.gitignore
@@ -0,0 +1,10 @@
+base.sql
+full.sql
+
+test_a.sql
+test_b.sql
+test_pitch_accent.sql
+
+jmdict*
+
+dict.sql
diff --git a/db/dict/deinflections.sql b/db/dict/deinflections.sql
new file mode 100644
index 0000000..d13f313
--- /dev/null
+++ b/db/dict/deinflections.sql
@@ -0,0 +1,183 @@
+-- deinflection rules (ordered by appearance in tae kim's japanese grammar guide)
+create temporary table deinflection_temp (tag, kana_in, kana_out, rules_in, rules_out);
+insert into deinflection_temp values
+ -- negative <https://guidetojapanese.org/learn/grammar/negativeverbs>
+ ('infl:negative', 'ない', 'る', 'a', 'ru'),
+ ('infl:negative', 'わない', 'う', 'a', 'u'),
+ ('infl:negative', 'かない', 'く', 'a', 'u'),
+ ('infl:negative', 'がない', 'ぐ', 'a', 'u'),
+ ('infl:negative', 'さない', 'す', 'a', 'u'),
+ ('infl:negative', 'たない', 'つ', 'a', 'u'),
+ ('infl:negative', 'なない', 'ぬ', 'a', 'u'),
+ ('infl:negative', 'ばない', 'ぶ', 'a', 'u'),
+ ('infl:negative', 'まない', 'む', 'a', 'u'),
+ ('infl:negative', 'らない', 'る', 'a', 'u'),
+ ('infl:negative', 'しない', 'する', 'a', 's'),
+ ('infl:negative', 'こない', 'くる', 'a', 'k'),
+ ('infl:negative', '来ない', '来る', 'a', 'k'),
+ ('infl:negative', 'ない', 'ある', 'a', 'ru'), -- this one may cause problems (?)
+ -- ('infl:negative', 'ない', '', 'a', 'ru'), -- this one may cause problems (?)
+
+ -- past tense <https://guidetojapanese.org/learn/grammar/past_tense>
+ ('infl:tense:past', 'た', 'る', 'a', 'ru'),
+ ('infl:tense:past', 'した', 'す', 'a', 'u'),
+ ('infl:tense:past', 'いた', 'く', 'a', 'u'),
+ ('infl:tense:past', 'いだ', 'ぐ', 'a', 'u'),
+ ('infl:tense:past', 'んだ', 'む', 'a', 'u'),
+ ('infl:tense:past', 'んだ', 'ぬ', 'a', 'u'),
+ ('infl:tense:past', 'んだ', 'ぶ', 'a', 'u'),
+ ('infl:tense:past', 'った', 'う', 'a', 'u'),
+ ('infl:tense:past', 'った', 'つ', 'a', 'u'),
+ ('infl:tense:past', 'った', 'る', 'a', 'u'),
+ ('infl:tense:past', 'した', 'する', 'a', 's'),
+ ('infl:tense:past', 'きた', 'くる', 'a', 'k'),
+ ('infl:tense:past', '来た', 'くる', 'a', 'k'),
+ ('infl:tense:past', '行った', '行く', 'a', ''),
+
+ -- adjective to adverb <https://guidetojapanese.org/learn/grammar/adverbs>
+ ('infl:adverb', 'く', 'い', 'a', 'i'),
+ -- TODO: na-adjectives aren't deconjugated
+
+ -- polite form <https://guidetojapanese.org/learn/grammar/polite>
+ ('infl:polite:masu', 'ます', 'る', 'a', 'ru'),
+ ('infl:polite:masu', 'います', 'う', 'a', 'u'),
+ ('infl:polite:masu', 'きます', 'く', 'a', 'u'),
+ ('infl:polite:masu', 'ぎます', 'ぐ', 'a', 'u'),
+ ('infl:polite:masu', 'します', 'す', 'a', 'u'),
+ ('infl:polite:masu', 'ちます', 'つ', 'a', 'u'),
+ ('infl:polite:masu', 'にます', 'ぬ', 'a', 'u'),
+ ('infl:polite:masu', 'びます', 'ぶ', 'a', 'u'),
+ ('infl:polite:masu', 'みます', 'む', 'a', 'u'),
+ ('infl:polite:masu', 'ります', 'る', 'a', 'u'),
+ ('infl:polite:masu', 'します', 'する', 'a', 's'),
+ ('infl:polite:masu', 'きます', 'くる', 'a', 'k'),
+ ('infl:polite:masu', '来ます', '来る', 'a', 'k'),
+ ('infl:polite:masu infl:negative', 'ません', 'る', 'a', 'ru'),
+ ('infl:polite:masu infl:negative', 'いません', 'う', 'a', 'u'),
+ ('infl:polite:masu infl:negative', 'きません', 'く', 'a', 'u'),
+ ('infl:polite:masu infl:negative', 'ぎません', 'ぐ', 'a', 'u'),
+ ('infl:polite:masu infl:negative', 'しません', 'す', 'a', 'u'),
+ ('infl:polite:masu infl:negative', 'ちません', 'つ', 'a', 'u'),
+ ('infl:polite:masu infl:negative', 'にません', 'ぬ', 'a', 'u'),
+ ('infl:polite:masu infl:negative', 'びません', 'ぶ', 'a', 'u'),
+ ('infl:polite:masu infl:negative', 'みません', 'む', 'a', 'u'),
+ ('infl:polite:masu infl:negative', 'りません', 'る', 'a', 'u'),
+ ('infl:polite:masu infl:negative', 'しません', 'する', 'a', 's'),
+ ('infl:polite:masu infl:negative', 'きません', 'くる', 'a', 'k'),
+ ('infl:polite:masu infl:negative', '来ません', '来る', 'a', 'k'),
+ ('infl:polite:masu infl:tense:past', 'ました', 'る', 'a', 'ru'),
+ ('infl:polite:masu infl:tense:past', 'いました', 'う', 'a', 'u'),
+ ('infl:polite:masu infl:tense:past', 'きました', 'く', 'a', 'u'),
+ ('infl:polite:masu infl:tense:past', 'ぎました', 'ぐ', 'a', 'u'),
+ ('infl:polite:masu infl:tense:past', 'しました', 'す', 'a', 'u'),
+ ('infl:polite:masu infl:tense:past', 'ちました', 'つ', 'a', 'u'),
+ ('infl:polite:masu infl:tense:past', 'にました', 'ぬ', 'a', 'u'),
+ ('infl:polite:masu infl:tense:past', 'びました', 'ぶ', 'a', 'u'),
+ ('infl:polite:masu infl:tense:past', 'みました', 'む', 'a', 'u'),
+ ('infl:polite:masu infl:tense:past', 'りました', 'る', 'a', 'u'),
+ ('infl:polite:masu infl:tense:past', 'しました', 'する', 'a', 's'),
+ ('infl:polite:masu infl:tense:past', 'きました', 'くる', 'a', 'k'),
+ ('infl:polite:masu infl:tense:past', '来ました', '来る', 'a', 'k'),
+ ('infl:polite:masu infl:tense:past infl:negative', 'ませんでした', 'る', 'a', 'ru'),
+ ('infl:polite:masu infl:tense:past infl:negative', 'いませんでした', 'う', 'a', 'u'),
+ ('infl:polite:masu infl:tense:past infl:negative', 'きませんでした', 'く', 'a', 'u'),
+ ('infl:polite:masu infl:tense:past infl:negative', 'ぎませんでした', 'ぐ', 'a', 'u'),
+ ('infl:polite:masu infl:tense:past infl:negative', 'しませんでした', 'す', 'a', 'u'),
+ ('infl:polite:masu infl:tense:past infl:negative', 'ちませんでした', 'つ', 'a', 'u'),
+ ('infl:polite:masu infl:tense:past infl:negative', 'にませんでした', 'ぬ', 'a', 'u'),
+ ('infl:polite:masu infl:tense:past infl:negative', 'びませんでした', 'ぶ', 'a', 'u'),
+ ('infl:polite:masu infl:tense:past infl:negative', 'みませんでした', 'む', 'a', 'u'),
+ ('infl:polite:masu infl:tense:past infl:negative', 'りませんでした', 'る', 'a', 'u'),
+ ('infl:polite:masu infl:tense:past infl:negative', 'しませんでした', 'する', 'a', 's'),
+ ('infl:polite:masu infl:tense:past infl:negative', 'きませんでした', 'くる', 'a', 'k'),
+ ('infl:polite:masu infl:tense:past infl:negative', '来ませんでした', '来る', 'a', 'k'),
+ -- ('infl:polite:masu infl:tense:past infl:negative', 'くありません', 'い', 'a', 'i'), -- reality check (should not be needed)
+
+ -- TODO: compound nouns and adjectives <https://guidetojapanese.org/learn/grammar/compound>
+
+ -- te-form <https://guidetojapanese.org/learn/grammar/compound>
+ ('infl:suffix:te', 'て', 'た', 'a', 'ru'),
+ ('infl:suffix:te', 'して', 'した', 'a', 's u'),
+ ('infl:suffix:te', 'いて', 'いた', 'a', 'u'),
+ ('infl:suffix:te', 'いで', 'いだ', 'a', 'u'),
+ ('infl:suffix:te', 'んで', 'んだ', 'a', 'u'),
+ ('infl:suffix:te', 'って', 'った', 'a', 'u'),
+ ('infl:suffix:te', 'きて', 'きた', 'a', 'k'),
+ ('infl:suffix:te', '来て', '来た', 'a', 'k'),
+ ('infl:suffix:te', 'くて', 'い', 'a', ''), -- TODO: rules_out of this one is i?
+
+ -- -tari lists <https://guidetojapanese.org/learn/grammar/compound>
+ ('infl:suffix:tari', 'たり', 'た', 'a', 'ru'),
+ ('infl:suffix:tari', 'したり', 'した', 'a', 's u'),
+ ('infl:suffix:tari', 'いたり', 'いた', 'a', 'u'),
+ ('infl:suffix:tari', 'いだり', 'いだ', 'a', 'u'),
+ ('infl:suffix:tari', 'んだり', 'んだ', 'a', 'u'),
+ ('infl:suffix:tari', 'ったり', 'った', 'a', 'u'),
+ ('infl:suffix:tari', 'きたり', 'きた', 'a', 'k'),
+ ('infl:suffix:tari', '来たり', '来た', 'a', 'k'),
+
+ -- auxiliary rules
+ ('class:verb:suru-included', 'する', '', 's', ''); -- deconjugate suru verbs into stem
+
+-- rule/bitmask lookup table
+create temporary table rule_map (tag, name, mask);
+insert into rule_map values
+ (null, 'a', -1 ), -- all (allow all rules in)
+ (null, '', 0 ), -- (nothing)
+ ('infl:reason:ru', 'ru', 1 << 0), -- 一段活用 (ichidan a.k.a. ru-verbs in tae kim's japanese grammar guide)
+ ('infl:reason:u', 'u', 1 << 1), -- 五段活用 (godan a.k.a. u-verbs in tae kim's japanese grammar guide)
+ ('infl:reason:suru', 's', 1 << 2), -- する (suru)
+ ('infl:reason:kuru', 'k', 1 << 3), -- くる (kuru)
+ (null, 'z', 1 << 4), -- ずる (zuru)
+ ('infl:reason:adj-i', 'i', 1 << 5), -- 形容詞 (i-adjective)
+ (null, 'iru', 1 << 6); -- 〜いる (temporary iru for progressive tense)
+
+-- add tags to db
+insert into deinflection_rules (mask, tag)
+select mask, tag
+from rule_map
+where tag is not null;
+
+-- convert space-separated strings to bitmasks and insert into deinflection table
+insert into deinflection (tag, kana_in, kana_out, rules_in, rules_out)
+with temp_deinflection_map(tag, kana_in, kana_out, rules_in_stack, rules_in, rules_out_stack, rules_out) as (
+ select
+ tag,
+ kana_in,
+ kana_out,
+ rules_in || ' ',
+ '',
+ rules_out || ' ',
+ ''
+ from deinflection_temp
+ union
+ select
+ tag,
+ kana_in,
+ kana_out,
+ substr(rules_in_stack, instr(rules_in_stack, ' ') + 1),
+ replace(substr(rules_in_stack, 0, instr(rules_in_stack, ' ')), ' ', ''),
+ substr(rules_out_stack, instr(rules_out_stack, ' ') + 1),
+ replace(substr(rules_out_stack, 0, instr(rules_out_stack, ' ')), ' ', '')
+ from temp_deinflection_map
+ where
+ (length(rules_in_stack) > 1) or
+ (length(rules_out_stack) > 1)
+)
+select
+ temp_deinflection_map.tag,
+ temp_deinflection_map.kana_in,
+ temp_deinflection_map.kana_out,
+ -- NOTE: sum() should really be bitwise and across all rows after group by
+ -- here. be careful to not specify rules more than once in these columns, or
+ -- any other rule if the column has 'a'.
+ sum(rule_map_in.mask) as rules_in,
+ sum(rule_map_out.mask) as rules_out
+from temp_deinflection_map
+join rule_map as rule_map_in on rule_map_in.name = temp_deinflection_map.rules_in
+join rule_map as rule_map_out on rule_map_out.name = temp_deinflection_map.rules_out
+group by
+ temp_deinflection_map.tag,
+ temp_deinflection_map.kana_in,
+ temp_deinflection_map.kana_out;
+
diff --git a/db/dict/init.sql b/db/dict/init.sql
new file mode 100644
index 0000000..4e9fcc9
--- /dev/null
+++ b/db/dict/init.sql
@@ -0,0 +1,115 @@
+-- deinflection map (see dict/deinflections.sql)
+create table if not exists deinflection (
+ tag text not null,
+ kana_in text not null,
+ kana_out text not null,
+ rules_in int not null default 0,
+ rules_out int not null default 0
+);
+create index deinflection_kana_in on deinflection (kana_in);
+
+-- lookup table for deinflection rule bitmasks
+create table if not exists deinflection_rules (
+ mask int not null default 0,
+ tag text not null
+);
+
+-- dictionary metadata
+create table if not exists dict (
+ id integer primary key autoincrement,
+ tag text not null, -- TODO: dict.tag -> dict.tag_id
+ language text null default null,
+ priority int not null default 1
+);
+
+-- tag code<->id lookup
+create table if not exists tag (
+ id integer primary key autoincrement,
+ code text not null,
+ unique(code) on conflict ignore
+);
+create index tag_code on tag (code);
+
+
+-- definition of term (with single meaning)
+--
+-- terms that have multiple meanings should have multiple entries in the
+-- definition table with the same term_id. glossary contains an explanation or
+-- list of synonyms to illustrate the meaning of the term.
+create table if not exists definition (
+ id integer primary key autoincrement,
+ term_id int not null, -- reference to term (combined writing and reading)
+ sort int not null default 1, -- sort multiple meanings/glossaries within dictionary
+ -- search results for terms are grouped by term, then dictionary, then sort
+ -- within each dictionary
+ glossary text null default null, -- explanation or list of synonyms for term
+ dict_id int not null, -- origin dictionary of term
+ foreign key(term_id) references term(id),
+ foreign key(dict_id) references dict(id),
+ unique(glossary) on conflict ignore
+);
+
+-- human-readable tag label lookup table (used in UI and for translations)
+create table if not exists tag_label (
+ id integer primary key autoincrement,
+ tag_id int not null, -- reference to tag
+ language text not null default "en-US", -- label locale as RFC 5646 tag (e.g. ja)
+ -- label locale is used when displaying tags in a UI frontend. labels are
+ -- sorted based on langauge preference, with "en-US" being inserted last as a
+ -- fallback.
+ label_short text null default null, -- abbreviated label (e.g. ティアキン)
+ label text not null, -- full label (e.g. ティアーズ・オブ・ザ・キングダム)
+ description text null default null, -- short description (e.g. ゼルダの伝説シリーズより)
+ foreign key(tag_id) references tag(id)
+);
+
+-- standalone combination of kanji and reading
+--
+-- terms are added automatically when importing a dictionary, but are shared
+-- between dictionaries and thus not removed when disabling/removing a
+-- dictionary. terms are ranked outside the database, and the starting rank
+-- score can be changed using the user database (TODO)
+create table if not exists term (
+ id integer primary key autoincrement,
+ expression text not null, -- writing of term (e.g. 乗り越える)
+ reading text not null, -- reading of term (e.g. のりこえる)
+ alt int null default null, -- reference to main writing if this is is an alternate
+ -- alternate readings are stored as normal terms, but point to their main
+ -- reading. also used internally to store alternate readings of irregular
+ -- verbs example:
+ -- id expression reading alt
+ -- 1 言葉 ことば NULL
+ -- 2 詞 ことば 1
+ -- 3 辞 ことば 1
+ -- 4 来る くる NULL
+ -- 5 来た きた 4
+ -- NOTE: alternate writings can technically be stored recursively this way,
+ -- but are only resolved one level deep TODO: resolve alternates
+ foreign key(alt) references term(id),
+ unique(expression, reading) on conflict ignore
+);
+create index term_expression on term (expression);
+create index term_reading on term (reading);
+-- TODO: (automatically) remove unused terms from db (using constraints?)
+
+
+-- allow many<->many relation between definition and tag
+create table if not exists definition_tag (
+ id integer primary key autoincrement,
+ definition_id int not null,
+ tag_id int not null,
+ foreign key(definition_id) references definition(id),
+ foreign key(tag_id) references tag(id),
+ unique(definition_id, tag_id) on conflict ignore
+);
+
+-- allow many<->many relation between term and tag
+create table if not exists term_tag (
+ id integer primary key autoincrement,
+ term_id int not null,
+ tag_id int not null,
+ foreign key(term_id) references term(id),
+ foreign key(tag_id) references tag(id),
+ unique(term_id, tag_id) on conflict ignore
+);
+
diff --git a/db/dict/reset.sql b/db/dict/reset.sql
new file mode 100644
index 0000000..757e31b
--- /dev/null
+++ b/db/dict/reset.sql
@@ -0,0 +1,12 @@
+drop table if exists definition_tag;
+drop table if exists term_tag;
+
+drop table if exists definition;
+drop table if exists tag_label;
+drop table if exists term;
+
+drop table if exists deinflection;
+drop table if exists deinflection_rules;
+drop table if exists dict;
+drop table if exists tag;
+
diff --git a/db/dict/tags.sql b/db/dict/tags.sql
new file mode 100644
index 0000000..a200abb
--- /dev/null
+++ b/db/dict/tags.sql
@@ -0,0 +1,11 @@
+insert into tag (code) values
+ ('class:verb'),
+ ('class:verb:suru'),
+ ('class:verb:suru-included'),
+ ('class:noun'),
+ ('class:suffix'),
+ ('class:part'),
+ ('class:expr'),
+ ('name:place'),
+ ('name:female'),
+ ('name:male');
diff --git a/db/dict/template.sql.m4 b/db/dict/template.sql.m4
new file mode 100644
index 0000000..00de413
--- /dev/null
+++ b/db/dict/template.sql.m4
@@ -0,0 +1,104 @@
+-- create temporary ingest table
+drop table if exists ingest;
+-- TODO: ingest pitch-accent dictionaries
+-- TODO: ingest alternate writings (space-separated)
+create temporary table ingest(
+ -- term fields
+ expression text not null, -- kanji of term (e.g. 読み込む)
+ reading text not null, -- reading of term (e.g. よみこむ)
+ term_tags text not null default '', -- space-separated *term* tags, merged if term already exists in DB
+
+ -- definition fields
+ glossary_sort int not null default 1, -- order of multiple meanings (glossaries)
+ glossary text null default null, -- glossary content (support for basic HTML markup/styling)
+ glossary_tags text null default null -- add tags to single glossary entry
+);
+
+include(`/dev/stdin')dnl --'
+-- the apostrophe is so my editor highlighting keeps working if I force the
+-- filetype to sql instead of m4
+
+-- create dict id
+insert into dict (tag, language) values ('dict:' || :dict, :lang);
+
+-- add terms
+insert into term (expression, reading)
+select expression, reading
+from ingest;
+
+-- add definitions
+insert into definition (term_id, sort, glossary, dict_id)
+select
+ term.id,
+ ingest.glossary_sort,
+ ingest.glossary,
+ (select id from dict where tag = 'dict:' || :dict)
+from ingest
+join term on term.expression = ingest.expression and term.reading = ingest.reading;
+
+-- create map of term_id and tag code
+drop table if exists term_tag_map;
+create temporary table term_tag_map (term_id, tag);
+insert into term_tag_map
+with tag_map(term_id, temp, tag) as (
+ select
+ (select id from term where expression is ingest.expression and reading is ingest.reading),
+ term_tags || ' ',
+ ''
+ from ingest
+ union
+ select
+ term_id,
+ `substr'(temp, instr(temp, ' ') + 1),
+ `substr'(temp, 0, instr(temp, ' '))
+ from tag_map
+ where length(temp) > 1
+)
+select term_id, replace(tag, ' ', '')
+from tag_map
+where length(tag) > 0;
+
+-- create map of definition_id and tag code
+drop table if exists definition_tag_map;
+create temporary table definition_tag_map (definition_id, tag);
+insert into definition_tag_map
+with tag_map(definition_id, temp, tag) as (
+ select
+ (select id from definition where glossary is ingest.glossary),
+ glossary_tags || ' ',
+ ''
+ from ingest
+ union
+ select
+ definition_id,
+ `substr'(temp, instr(temp, ' ') + 1),
+ `substr'(temp, 0, instr(temp, ' '))
+ from tag_map
+ where length(temp) > 1
+)
+select definition_id, replace(tag, ' ', '')
+from tag_map
+where length(tag) > 0;
+
+-- make sure tags exist
+insert into tag (code)
+select tag from term_tag_map
+union
+select tag from definition_tag_map;
+
+-- add tags to terms
+insert into term_tag (term_id, tag_id)
+select
+ term_id,
+ tag.id
+from term_tag_map
+join tag on tag.code = term_tag_map.tag;
+
+-- add tags to definitions
+insert into definition_tag (definition_id, tag_id)
+select
+ definition_id,
+ tag.id
+from definition_tag_map
+join tag on tag.code = definition_tag_map.tag;
+
diff --git a/db/dict/test_a.dict.sql b/db/dict/test_a.dict.sql
new file mode 100644
index 0000000..854d207
--- /dev/null
+++ b/db/dict/test_a.dict.sql
@@ -0,0 +1,25 @@
+-- set these
+.param set :dict 'test_a'
+.param set :lang 'nl_NL'
+
+-- dictionary content
+insert into ingest(expression, reading, term_tags, glossary_sort, glossary, glossary_tags) values
+ ('浮上', 'ふじょう', 'class:verb:suru class:noun', 1, 'opstijgen, zweven', ''),
+ ('城', 'しろ', 'class:noun', 1, 'kasteel', ''),
+ ('城', 'じょう', 'class:suffix', 1, '-burcht, -burg (suffix voor kastelen)', ''),
+ ('下', 'した', 'class:noun', 1, 'onder, beneden, omlaag', ''),
+ ('の下に', 'のもとに', 'class:expr', 1, 'onder leiding van', ''),
+ ('ハイラル', 'はいらる', 'series:zelda name:place class:noun', 1, 'Hyrule', ''),
+ ('にて', 'にて', 'class:part', 1, '{duidt aanwezigheid op een plaats aan}', ''),
+ ('ゼルダ', 'ぜるだ', 'series:zelda name:female class:noun', 1, 'Zelda', ''),
+ ('様', 'さま', 'class:suffix', 1, 'meneer, mijnheer, mevrouw, madame', ''),
+ ('様', 'よう', 'class:noun', 1, '(eruit zien) als, zoals, manier', ''),
+ ('達', 'たち', 'class:suffix', 1, '{meervoudssuffix}', ''),
+ ('の', 'の', 'class:part', 1, '{bezitspartikel}', ''),
+ ('と', 'と', 'class:part', 1, '{opsommingspartikel}', ''),
+ ('捜索', 'そうさく', 'class:verb:suru class:noun', 1, 'onderzoek, opspeuren, achterhalen', ''),
+ ('を', 'を', 'class:part', 1, '{lijdend voornaamwoord partikel}', ''),
+ ('行う', 'おこなう', 'class:verb', 1, 'uitvoeren', ''),
+ ('こと', 'こと', 'class:part class:noun', 1, '{eindpartikel dat een bevel / waarschuwing uitdrukt}', ''),
+ ('成る', 'なる', 'class:verb aux:test', 1, 'worden, veranderen in', '');
+
diff --git a/db/dict/test_b.dict.sql b/db/dict/test_b.dict.sql
new file mode 100644
index 0000000..0140189
--- /dev/null
+++ b/db/dict/test_b.dict.sql
@@ -0,0 +1,9 @@
+-- set these
+.param set :dict 'test_b'
+.param set :lang 'en_US'
+
+-- dictionary content
+insert into ingest(expression, reading, term_tags, glossary_sort, glossary, glossary_tags) values
+ ('城', 'しろ', 'aux:test_b', 1, 'castle', ''),
+ ('城', 'じょう', 'aux:test_b aux:test_b aux:nog_een_test_b', 1, '-burg (suffix for castles)', 'aux:gert aux:test aux:nog_wat');
+
diff --git a/db/dict/test_pitch_accent.dict.sql b/db/dict/test_pitch_accent.dict.sql
new file mode 100644
index 0000000..3b8298f
--- /dev/null
+++ b/db/dict/test_pitch_accent.dict.sql
@@ -0,0 +1,10 @@
+-- this is an example pitch accent dictionary
+-- set these
+.param set :dict 'pitch_accent'
+.param set :lang null
+
+-- dictionary content
+insert into ingest (expression, reading, glossary_tags) values
+ ('浮上', 'ふじょう', 'pitch:0'),
+ ('成る', 'なる', 'pitch:1');
+