diff options
author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2021-01-10 14:43:06 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-10 14:43:06 -0500 |
commit | f6a38f40dc52c4517e41ddb381278ecf5efba056 (patch) | |
tree | 0b56e9224ee25c0b6cc2c18cf8ae8ab891427569 /ext/bg/data | |
parent | 083da93142ec6302021ee1c29428121b54fc9e68 (diff) |
Customizable sentence parsing (#1217)
* Add new sentenceParsing options
* Update TextScanner.setOptions
* Assign terminator/quote maps
* Pass sentence parsing info to extractSentence
* Simplify setting
* Add setting for enableTerminationCharacters
* Create new settings for sentence termination characters
Diffstat (limited to 'ext/bg/data')
-rw-r--r-- | ext/bg/data/options-schema.json | 61 |
1 files changed, 60 insertions, 1 deletions
diff --git a/ext/bg/data/options-schema.json b/ext/bg/data/options-schema.json index 9053ebb1..151a7fe3 100644 --- a/ext/bg/data/options-schema.json +++ b/ext/bg/data/options-schema.json @@ -846,13 +846,72 @@ "sentenceParsing": { "type": "object", "required": [ - "scanExtent" + "scanExtent", + "enableTerminationCharacters", + "terminationCharacters" ], "properties": { "scanExtent": { "type": "integer", "minimum": 0, "default": 200 + }, + "enableTerminationCharacters": { + "type": "boolean", + "default": true + }, + "terminationCharacters": { + "type": "array", + "items": { + "type": "object", + "required": [ + "enabled", + "character1", + "character2", + "includeCharacterAtStart", + "includeCharacterAtEnd" + ], + "properties": { + "enabled": { + "type": "boolean", + "default": true + }, + "character1": { + "type": "string", + "default": "\"", + "minLength": 1, + "maxLength": 1 + }, + "character2": { + "type": ["string", "null"], + "default": "\"", + "minLength": 1, + "maxLength": 1 + }, + "includeCharacterAtStart": { + "type": "boolean", + "default": false + }, + "includeCharacterAtEnd": { + "type": "boolean", + "default": false + } + } + }, + "default": [ + {"enabled": true, "character1": "「", "character2": "」", "includeCharacterAtStart": false, "includeCharacterAtEnd": false}, + {"enabled": true, "character1": "『", "character2": "』", "includeCharacterAtStart": false, "includeCharacterAtEnd": false}, + {"enabled": true, "character1": "\"", "character2": "\"", "includeCharacterAtStart": false, "includeCharacterAtEnd": false}, + {"enabled": true, "character1": "'", "character2": "'", "includeCharacterAtStart": false, "includeCharacterAtEnd": false}, + {"enabled": true, "character1": ".", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true}, + {"enabled": true, "character1": "!", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true}, + {"enabled": true, "character1": "?", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true}, + {"enabled": true, "character1": ".", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true}, + {"enabled": true, "character1": "。", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true}, + {"enabled": true, "character1": "!", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true}, + {"enabled": true, "character1": "?", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true}, + {"enabled": true, "character1": "…", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true} + ] } } } |