diff options
| -rw-r--r-- | ext/bg/data/dictionary-index-schema.json | 40 | ||||
| -rw-r--r-- | ext/bg/data/dictionary-kanji-bank-v1-schema.json | 33 | ||||
| -rw-r--r-- | ext/bg/data/dictionary-kanji-bank-v3-schema.json | 44 | ||||
| -rw-r--r-- | ext/bg/data/dictionary-kanji-meta-bank-v3-schema.json | 25 | ||||
| -rw-r--r-- | ext/bg/data/dictionary-tag-bank-v3-schema.json | 32 | ||||
| -rw-r--r-- | ext/bg/data/dictionary-term-bank-v1-schema.json | 36 | ||||
| -rw-r--r-- | ext/bg/data/dictionary-term-bank-v3-schema.json | 48 | ||||
| -rw-r--r-- | ext/bg/data/dictionary-term-meta-bank-v3-schema.json | 25 | ||||
| -rw-r--r-- | test/dictionary-validate.js | 90 | 
9 files changed, 373 insertions, 0 deletions
| diff --git a/ext/bg/data/dictionary-index-schema.json b/ext/bg/data/dictionary-index-schema.json new file mode 100644 index 00000000..9865fcc1 --- /dev/null +++ b/ext/bg/data/dictionary-index-schema.json @@ -0,0 +1,40 @@ +{ +    "$schema": "http://json-schema.org/draft-07/schema#", +    "type": "object", +    "description": "Index file containing information about the data contained in the dictionary.", +    "required": [ +        "title", +        "revision" +    ], +    "properties": { +        "title": { +            "type": "string", +            "description": "Title of the dictionary." +        }, +        "revision": { +            "type": "string", +            "description": "Revision of the dictionary. This value is only used for displaying information." +        }, +        "sequenced": { +            "type": "boolean", +            "default": false, +            "description": "Whether or not this dictionary can be used as the primary dictionary. Primary dictionaries typically contain term/expression definitions." +        }, +        "format": { +            "type": "integer", +            "description": "Format of data found in the JSON data files." +        }, +        "version": { +            "type": "integer", +            "description": "Alias for format." +        } +    }, +    "anyOf": [ +        { +            "required": ["format"] +        }, +        { +            "required": ["version"] +        } +    ] +}
\ No newline at end of file diff --git a/ext/bg/data/dictionary-kanji-bank-v1-schema.json b/ext/bg/data/dictionary-kanji-bank-v1-schema.json new file mode 100644 index 00000000..6dad5a7a --- /dev/null +++ b/ext/bg/data/dictionary-kanji-bank-v1-schema.json @@ -0,0 +1,33 @@ +{ +    "$schema": "http://json-schema.org/draft-07/schema#", +    "type": "array", +    "description": "Data file containing kanji information.", +    "additionalItems": { +        "type": "array", +        "description": "Information about a single kanji character.", +        "minItems": 4, +        "items": [ +            { +                "type": "string", +                "description": "Kanji character.", +                "minLength": 1 +            }, +            { +                "type": "string", +                "description": "String of space-separated onyomi readings for the kanji character. An empty string is treated as no readings." +            }, +            { +                "type": "string", +                "description": "String of space-separated kunyomi readings for the kanji character. An empty string is treated as no readings." +            }, +            { +                "type": "string", +                "description": "String of space-separated tags for the kanji character. An empty string is treated as no tags." +            } +        ], +        "additionalItems": { +            "type": "string", +            "description": "A meaning for the kanji character." +        } +    } +}
\ No newline at end of file diff --git a/ext/bg/data/dictionary-kanji-bank-v3-schema.json b/ext/bg/data/dictionary-kanji-bank-v3-schema.json new file mode 100644 index 00000000..a5b82039 --- /dev/null +++ b/ext/bg/data/dictionary-kanji-bank-v3-schema.json @@ -0,0 +1,44 @@ +{ +    "$schema": "http://json-schema.org/draft-07/schema#", +    "type": "array", +    "description": "Data file containing kanji information.", +    "additionalItems": { +        "type": "array", +        "description": "Information about a single kanji character.", +        "minItems": 6, +        "items": [ +            { +                "type": "string", +                "description": "Kanji character.", +                "minLength": 1 +            }, +            { +                "type": "string", +                "description": "String of space-separated onyomi readings for the kanji character. An empty string is treated as no readings." +            }, +            { +                "type": "string", +                "description": "String of space-separated kunyomi readings for the kanji character. An empty string is treated as no readings." +            }, +            { +                "type": "string", +                "description": "String of space-separated tags for the kanji character. An empty string is treated as no tags." +            }, +            { +                "type": "array", +                "description": "Array of meanings for the kanji character.", +                "items": { +                    "type": "string", +                    "description": "A meaning for the kanji character." +                } +            }, +            { +                "type": "object", +                "description": "Various stats for the kanji character.", +                "additionalProperties": { +                    "type": "string" +                } +            } +        ] +    } +}
\ No newline at end of file diff --git a/ext/bg/data/dictionary-kanji-meta-bank-v3-schema.json b/ext/bg/data/dictionary-kanji-meta-bank-v3-schema.json new file mode 100644 index 00000000..62479026 --- /dev/null +++ b/ext/bg/data/dictionary-kanji-meta-bank-v3-schema.json @@ -0,0 +1,25 @@ +{ +    "$schema": "http://json-schema.org/draft-07/schema#", +    "type": "array", +    "description": "Custom metadata for kanji characters.", +    "additionalItems": { +        "type": "array", +        "description": "Metadata about a single kanji character.", +        "minItems": 3, +        "items": [ +            { +                "type": "string", +                "minLength": 1 +            }, +            { +                "type": "string", +                "enum": ["freq"], +                "description": "Type of data. \"freq\" corresponds to frequency information." +            }, +            { +                "type": ["string", "number"], +                "description": "Data for the character." +            } +        ] +    } +}
\ No newline at end of file diff --git a/ext/bg/data/dictionary-tag-bank-v3-schema.json b/ext/bg/data/dictionary-tag-bank-v3-schema.json new file mode 100644 index 00000000..ee5ca64d --- /dev/null +++ b/ext/bg/data/dictionary-tag-bank-v3-schema.json @@ -0,0 +1,32 @@ +{ +    "$schema": "http://json-schema.org/draft-07/schema#", +    "type": "array", +    "description": "Data file containing tag information for terms and kanji.", +    "additionalItems": { +        "type": "array", +        "description": "Information about a single tag.", +        "minItems": 5, +        "items": [ +            { +                "type": "string", +                "description": "Tag name." +            }, +            { +                "type": "string", +                "description": "Category for the tag." +            }, +            { +                "type": "number", +                "description": "Sorting order for the tag." +            }, +            { +                "type": "string", +                "description": "Notes for the tag." +            }, +            { +                "type": "number", +                "description": "Score used to determine popularity. Negative values are more rare and positive values are more frequent. This score is also used to sort search results." +            } +        ] +    } +}
\ No newline at end of file diff --git a/ext/bg/data/dictionary-term-bank-v1-schema.json b/ext/bg/data/dictionary-term-bank-v1-schema.json new file mode 100644 index 00000000..6ffb26e6 --- /dev/null +++ b/ext/bg/data/dictionary-term-bank-v1-schema.json @@ -0,0 +1,36 @@ +{ +    "$schema": "http://json-schema.org/draft-07/schema#", +    "type": "array", +    "description": "Data file containing term and expression information.", +    "additionalItems": { +        "type": "array", +        "description": "Information about a single term/expression.", +        "minItems": 5, +        "items": [ +            { +                "type": "string", +                "description": "Term or expression." +            }, +            { +                "type": "string", +                "description": "Reading of the term/expression, or an empty string if the reading is the same as the term/expression." +            }, +            { +                "type": ["string", "null"], +                "description": "String of space-separated tags for the definition. An empty string is treated as no tags." +            }, +            { +                "type": "string", +                "description": "String of space-separated rule identifiers for the definition which is used to validate delinflection. Valid rule identifiers are: v1: ichidan verb; v5: godan verb; vs: suru verb; vk: kuru verb; adj-i: i-adjective. An empty string corresponds to words which aren't inflected, such as nouns." +            }, +            { +                "type": "number", +                "description": "Score used to determine popularity. Negative values are more rare and positive values are more frequent. This score is also used to sort search results." +            } +        ], +        "additionalItems": { +            "type": "string", +            "description": "Single definition for the term/expression." +        } +    } +}
\ No newline at end of file diff --git a/ext/bg/data/dictionary-term-bank-v3-schema.json b/ext/bg/data/dictionary-term-bank-v3-schema.json new file mode 100644 index 00000000..bb982e36 --- /dev/null +++ b/ext/bg/data/dictionary-term-bank-v3-schema.json @@ -0,0 +1,48 @@ +{ +    "$schema": "http://json-schema.org/draft-07/schema#", +    "type": "array", +    "description": "Data file containing term and expression information.", +    "additionalItems": { +        "type": "array", +        "description": "Information about a single term/expression.", +        "minItems": 8, +        "items": [ +            { +                "type": "string", +                "description": "Term or expression." +            }, +            { +                "type": "string", +                "description": "Reading of the term/expression, or an empty string if the reading is the same as the term/expression." +            }, +            { +                "type": ["string", "null"], +                "description": "String of space-separated tags for the definition. An empty string is treated as no tags." +            }, +            { +                "type": "string", +                "description": "String of space-separated rule identifiers for the definition which is used to validate delinflection. Valid rule identifiers are: v1: ichidan verb; v5: godan verb; vs: suru verb; vk: kuru verb; adj-i: i-adjective. An empty string corresponds to words which aren't inflected, such as nouns." +            }, +            { +                "type": "number", +                "description": "Score used to determine popularity. Negative values are more rare and positive values are more frequent. This score is also used to sort search results." +            }, +            { +                "type": "array", +                "description": "Array of definitions for the term/expression.", +                "items": { +                    "type": "string", +                    "description": "Single definition for the term/expression." +                } +            }, +            { +                "type": "integer", +                "description": "Sequence number for the term/expression. Terms/expressions with the same sequence number can be shown together when the \"resultOutputMode\" option is set to \"merge\"." +            }, +            { +                "type": "string", +                "description": "String of space-separated tags for the term/expression. An empty string is treated as no tags." +            } +        ] +    } +}
\ No newline at end of file diff --git a/ext/bg/data/dictionary-term-meta-bank-v3-schema.json b/ext/bg/data/dictionary-term-meta-bank-v3-schema.json new file mode 100644 index 00000000..1cc0557f --- /dev/null +++ b/ext/bg/data/dictionary-term-meta-bank-v3-schema.json @@ -0,0 +1,25 @@ +{ +    "$schema": "http://json-schema.org/draft-07/schema#", +    "type": "array", +    "description": "Custom metadata for terms/expressions.", +    "additionalItems": { +        "type": "array", +        "description": "Metadata about a single term/expression.", +        "minItems": 3, +        "items": [ +            { +                "type": "string", +                "description": "Term or expression." +            }, +            { +                "type": "string", +                "enum": ["freq"], +                "description": "Type of data. \"freq\" corresponds to frequency information." +            }, +            { +                "type": ["string", "number"], +                "description": "Data for the term/expression." +            } +        ] +    } +}
\ No newline at end of file diff --git a/test/dictionary-validate.js b/test/dictionary-validate.js new file mode 100644 index 00000000..971c4971 --- /dev/null +++ b/test/dictionary-validate.js @@ -0,0 +1,90 @@ +const fs = require('fs'); +const path = require('path'); + +process.noDeprecation = true; // Suppress a warning about JSZip +const JSZip = require(path.join(__dirname, '../ext/mixed/lib/jszip.min.js')); +process.noDeprecation = false; + +const jsonSchemaFileName = path.join(__dirname, '../ext/bg/js/json-schema.js'); +const jsonSchemaFileSource = fs.readFileSync(jsonSchemaFileName, {encoding: 'utf8'}); +const JsonSchema = Function(`'use strict';${jsonSchemaFileSource};return JsonSchema;`)(); + + +function readSchema(relativeFileName) { +    const fileName = path.join(__dirname, relativeFileName); +    const source = fs.readFileSync(fileName, {encoding: 'utf8'}); +    return JSON.parse(source); +} + + +async function validateDictionaryBanks(zip, fileNameFormat, schema) { +    let index = 1; +    while (true) { +        const fileName = fileNameFormat.replace(/%s/, index); + +        const file = zip.files[fileName]; +        if (!file) { break; } + +        const data = JSON.parse(await file.async('string')); +        JsonSchema.validate(data, schema); + +        ++index; +    } +} + +async function validateDictionary(fileName, schemas) { +    const source = fs.readFileSync(fileName); +    const zip = await JSZip.loadAsync(source); + +    const indexFile = zip.files['index.json']; +    if (!indexFile) { +        throw new Error('No dictionary index found in archive'); +    } + +    const index = JSON.parse(await indexFile.async('string')); +    const version = index.format || index.version; + +    JsonSchema.validate(index, schemas.index); + +    await validateDictionaryBanks(zip, 'term_bank_%s.json', version === 1 ? schemas.termBankV1 : schemas.termBankV3); +    await validateDictionaryBanks(zip, 'term_meta_bank_%s.json', schemas.termMetaBankV3); +    await validateDictionaryBanks(zip, 'kanji_bank_%s.json', version === 1 ? schemas.kanjiBankV1 : schemas.kanjiBankV3); +    await validateDictionaryBanks(zip, 'kanji_meta_bank_%s.json', schemas.kanjiMetaBankV3); +    await validateDictionaryBanks(zip, 'tag_bank_%s.json', schemas.tagBankV3); +} + + +async function main() { +    const dictionaryFileNames = process.argv.slice(2); +    if (dictionaryFileNames.length === 0) { +        console.log([ +            'Usage:', +            '  node dictionary-validate <dictionary-file-names>...' +        ].join('\n')); +        return; +    } + +    const schemas = { +        index: readSchema('../ext/bg/data/dictionary-index-schema.json'), +        kanjiBankV1: readSchema('../ext/bg/data/dictionary-kanji-bank-v1-schema.json'), +        kanjiBankV3: readSchema('../ext/bg/data/dictionary-kanji-bank-v3-schema.json'), +        kanjiMetaBankV3: readSchema('../ext/bg/data/dictionary-kanji-meta-bank-v3-schema.json'), +        tagBankV3: readSchema('../ext/bg/data/dictionary-tag-bank-v3-schema.json'), +        termBankV1: readSchema('../ext/bg/data/dictionary-term-bank-v1-schema.json'), +        termBankV3: readSchema('../ext/bg/data/dictionary-term-bank-v3-schema.json'), +        termMetaBankV3: readSchema('../ext/bg/data/dictionary-term-meta-bank-v3-schema.json') +    }; + +    for (const dictionaryFileName of dictionaryFileNames) { +        try { +            console.log(`Validating ${dictionaryFileName}...`); +            await validateDictionary(dictionaryFileName, schemas); +            console.log('No issues found'); +        } catch (e) { +            console.warn(e); +        } +    } +} + + +main(); |