diff options
author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2021-01-10 14:43:06 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-10 14:43:06 -0500 |
commit | f6a38f40dc52c4517e41ddb381278ecf5efba056 (patch) | |
tree | 0b56e9224ee25c0b6cc2c18cf8ae8ab891427569 | |
parent | 083da93142ec6302021ee1c29428121b54fc9e68 (diff) |
Customizable sentence parsing (#1217)
* Add new sentenceParsing options
* Update TextScanner.setOptions
* Assign terminator/quote maps
* Pass sentence parsing info to extractSentence
* Simplify setting
* Add setting for enableTerminationCharacters
* Create new settings for sentence termination characters
-rw-r--r-- | ext/bg/css/settings2.css | 75 | ||||
-rw-r--r-- | ext/bg/data/options-schema.json | 61 | ||||
-rw-r--r-- | ext/bg/js/options.js | 19 | ||||
-rw-r--r-- | ext/bg/js/settings2/sentence-termination-characters-controller.js | 255 | ||||
-rw-r--r-- | ext/bg/js/settings2/settings-main.js | 4 | ||||
-rw-r--r-- | ext/bg/settings2.html | 125 | ||||
-rw-r--r-- | ext/fg/js/frontend.js | 4 | ||||
-rw-r--r-- | ext/mixed/js/display.js | 8 | ||||
-rw-r--r-- | ext/mixed/js/document-util.js | 47 | ||||
-rw-r--r-- | ext/mixed/js/text-scanner.js | 62 | ||||
-rw-r--r-- | test/test-document-util.js | 23 | ||||
-rw-r--r-- | test/test-options-util.js | 17 |
12 files changed, 638 insertions, 62 deletions
diff --git a/ext/bg/css/settings2.css b/ext/bg/css/settings2.css index a74d0942..6ae9e335 100644 --- a/ext/bg/css/settings2.css +++ b/ext/bg/css/settings2.css @@ -1856,6 +1856,81 @@ input.translation-text-replacement-test-output { display: none; } +.sentence-termination-character-list-table { + width: 100%; + table-layout: fixed; + border-spacing: 0.25em; + margin-top: 0.5em; + min-width: 400px; +} +.sentence-termination-character-list-table thead td { + white-space: nowrap; + font-size: var(--font-size-small); + line-height: 1; + text-align: left; + vertical-align: bottom; + font-weight: normal; +} +.sentence-termination-character-list-table td { + vertical-align: middle; + padding: 0; +} +.sentence-termination-character-list-table td:nth-child(1) { + width: 2em; +} +.sentence-termination-character-list-table td:nth-child(2) { + width: 4em; +} +.sentence-termination-character-list-table td:nth-child(3) { + width: 25%; +} +.sentence-termination-character-list-table td:nth-child(4) { + width: 18.5%; +} +.sentence-termination-character-list-table td:nth-child(5) { + width: 18.5%; +} +.sentence-termination-character-list-table td:nth-child(6) { + width: 40%; +} +.sentence-termination-character-list-table td:nth-child(7) { + width: 3em; +} +select.sentence-termination-character-type, +input.sentence-termination-character-input1, +input.sentence-termination-character-input2 { + width: 100%; +} +.sentence-termination-character-input2-alt { + text-align: center; +} +.sentence-termination-character-entry:not([data-type=quote]) .sentence-termination-character-input2 { + display: none; +} +.sentence-termination-character-entry[data-type=quote] .sentence-termination-character-input2-alt { + display: none; +} +.sentence-termination-character-include-list { + display: flex; + flex-flow: row nowrap; +} +.sentence-termination-character-include { + display: flex; + flex-flow: row nowrap; + white-space: nowrap; + align-items: center; +} +.sentence-termination-character-include>:first-child { + margin-right: 0.375em; +} +.sentence-termination-character-include+.sentence-termination-character-include { + margin-left: 1em; +} +#sentence-termination-character-list-empty { + flex: 0 1 auto; + margin-top: 0.5em; +} + /* Generic layouts */ .margin-above { diff --git a/ext/bg/data/options-schema.json b/ext/bg/data/options-schema.json index 9053ebb1..151a7fe3 100644 --- a/ext/bg/data/options-schema.json +++ b/ext/bg/data/options-schema.json @@ -846,13 +846,72 @@ "sentenceParsing": { "type": "object", "required": [ - "scanExtent" + "scanExtent", + "enableTerminationCharacters", + "terminationCharacters" ], "properties": { "scanExtent": { "type": "integer", "minimum": 0, "default": 200 + }, + "enableTerminationCharacters": { + "type": "boolean", + "default": true + }, + "terminationCharacters": { + "type": "array", + "items": { + "type": "object", + "required": [ + "enabled", + "character1", + "character2", + "includeCharacterAtStart", + "includeCharacterAtEnd" + ], + "properties": { + "enabled": { + "type": "boolean", + "default": true + }, + "character1": { + "type": "string", + "default": "\"", + "minLength": 1, + "maxLength": 1 + }, + "character2": { + "type": ["string", "null"], + "default": "\"", + "minLength": 1, + "maxLength": 1 + }, + "includeCharacterAtStart": { + "type": "boolean", + "default": false + }, + "includeCharacterAtEnd": { + "type": "boolean", + "default": false + } + } + }, + "default": [ + {"enabled": true, "character1": "「", "character2": "」", "includeCharacterAtStart": false, "includeCharacterAtEnd": false}, + {"enabled": true, "character1": "『", "character2": "』", "includeCharacterAtStart": false, "includeCharacterAtEnd": false}, + {"enabled": true, "character1": "\"", "character2": "\"", "includeCharacterAtStart": false, "includeCharacterAtEnd": false}, + {"enabled": true, "character1": "'", "character2": "'", "includeCharacterAtStart": false, "includeCharacterAtEnd": false}, + {"enabled": true, "character1": ".", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true}, + {"enabled": true, "character1": "!", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true}, + {"enabled": true, "character1": "?", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true}, + {"enabled": true, "character1": ".", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true}, + {"enabled": true, "character1": "。", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true}, + {"enabled": true, "character1": "!", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true}, + {"enabled": true, "character1": "?", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true}, + {"enabled": true, "character1": "…", "character2": null, "includeCharacterAtStart": false, "includeCharacterAtEnd": true} + ] } } } diff --git a/ext/bg/js/options.js b/ext/bg/js/options.js index 441d56ec..6c335346 100644 --- a/ext/bg/js/options.js +++ b/ext/bg/js/options.js @@ -684,13 +684,30 @@ class OptionsUtil { // Version 8 changes: // Added translation.textReplacements. // Moved anki.sentenceExt to sentenceParsing.scanExtent. + // Added sentenceParsing.enableTerminationCharacters. + // Added sentenceParsing.terminationCharacters. for (const profile of options.profiles) { profile.options.translation.textReplacements = { searchOriginal: true, groups: [] }; profile.options.sentenceParsing = { - scanExtent: profile.options.anki.sentenceExt + scanExtent: profile.options.anki.sentenceExt, + enableTerminationCharacters: true, + terminationCharacters: [ + {enabled: true, character1: '「', character2: '」', includeCharacterAtStart: false, includeCharacterAtEnd: false}, + {enabled: true, character1: '『', character2: '』', includeCharacterAtStart: false, includeCharacterAtEnd: false}, + {enabled: true, character1: '"', character2: '"', includeCharacterAtStart: false, includeCharacterAtEnd: false}, + {enabled: true, character1: '\'', character2: '\'', includeCharacterAtStart: false, includeCharacterAtEnd: false}, + {enabled: true, character1: '.', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true}, + {enabled: true, character1: '!', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true}, + {enabled: true, character1: '?', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true}, + {enabled: true, character1: '.', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true}, + {enabled: true, character1: '。', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true}, + {enabled: true, character1: '!', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true}, + {enabled: true, character1: '?', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true}, + {enabled: true, character1: '…', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true} + ] }; delete profile.options.anki.sentenceExt; } diff --git a/ext/bg/js/settings2/sentence-termination-characters-controller.js b/ext/bg/js/settings2/sentence-termination-characters-controller.js new file mode 100644 index 00000000..173c609b --- /dev/null +++ b/ext/bg/js/settings2/sentence-termination-characters-controller.js @@ -0,0 +1,255 @@ +/* + * Copyright (C) 2021 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/* global + * OptionsUtil + */ + +class SentenceTerminationCharactersController { + constructor(settingsController) { + this._settingsController = settingsController; + this._entries = []; + this._addButton = null; + this._resetButton = null; + this._listTable = null; + this._listContainer = null; + this._emptyIndicator = null; + } + + get settingsController() { + return this._settingsController; + } + + async prepare() { + this._addButton = document.querySelector('#sentence-termination-character-list-add'); + this._resetButton = document.querySelector('#sentence-termination-character-list-reset'); + this._listTable = document.querySelector('#sentence-termination-character-list-table'); + this._listContainer = document.querySelector('#sentence-termination-character-list'); + this._emptyIndicator = document.querySelector('#sentence-termination-character-list-empty'); + + this._addButton.addEventListener('click', this._onAddClick.bind(this)); + this._resetButton.addEventListener('click', this._onResetClick.bind(this)); + this._settingsController.on('optionsChanged', this._onOptionsChanged.bind(this)); + + await this._updateOptions(); + } + + async addEntry(terminationCharacterEntry) { + const options = await this._settingsController.getOptions(); + const {sentenceParsing: {terminationCharacters}} = options; + + await this._settingsController.modifyProfileSettings([{ + action: 'splice', + path: 'sentenceParsing.terminationCharacters', + start: terminationCharacters.length, + deleteCount: 0, + items: [terminationCharacterEntry] + }]); + + await this._updateOptions(); + } + + async deleteEntry(index) { + const options = await this._settingsController.getOptions(); + const {sentenceParsing: {terminationCharacters}} = options; + + if (index < 0 || index >= terminationCharacters.length) { return false; } + + await this._settingsController.modifyProfileSettings([{ + action: 'splice', + path: 'sentenceParsing.terminationCharacters', + start: index, + deleteCount: 1, + items: [] + }]); + + await this._updateOptions(); + return true; + } + + async modifyProfileSettings(targets) { + return await this._settingsController.modifyProfileSettings(targets); + } + + // Private + + _onOptionsChanged({options}) { + for (const entry of this._entries) { + entry.cleanup(); + } + + this._entries = []; + const {sentenceParsing: {terminationCharacters}} = options; + + for (let i = 0, ii = terminationCharacters.length; i < ii; ++i) { + const terminationCharacterEntry = terminationCharacters[i]; + const node = this._settingsController.instantiateTemplate('sentence-termination-character-entry'); + this._listContainer.appendChild(node); + const entry = new SentenceTerminationCharacterEntry(this, terminationCharacterEntry, i, node); + this._entries.push(entry); + entry.prepare(); + } + + this._listTable.hidden = (terminationCharacters.length === 0); + this._emptyIndicator.hidden = (terminationCharacters.length !== 0); + } + + _onAddClick(e) { + e.preventDefault(); + this._addNewEntry(); + } + + _onResetClick(e) { + e.preventDefault(); + this._reset(); + } + + async _addNewEntry() { + const newEntry = { + enabled: true, + character1: '"', + character2: '"', + includeCharacterAtStart: false, + includeCharacterAtEnd: false + }; + return await this.addEntry(newEntry); + } + + async _updateOptions() { + const options = await this._settingsController.getOptions(); + this._onOptionsChanged({options}); + } + + async _reset() { + const defaultOptions = await this._getDefaultOptions(); + const value = defaultOptions.profiles[0].options.sentenceParsing.terminationCharacters; + await this._settingsController.setProfileSetting('sentenceParsing.terminationCharacters', value); + await this._updateOptions(); + } + + async _getDefaultOptions() { + const optionsUtil = new OptionsUtil(); + await optionsUtil.prepare(); + const optionsFull = optionsUtil.getDefault(); + return optionsFull; + } +} + +class SentenceTerminationCharacterEntry { + constructor(parent, data, index, node) { + this._parent = parent; + this._data = data; + this._index = index; + this._node = node; + this._eventListeners = new EventListenerCollection(); + this._character1Input = null; + this._character2Input = null; + this._basePath = `sentenceParsing.terminationCharacters[${this._index}]`; + } + + prepare() { + const {enabled, character1, character2, includeCharacterAtStart, includeCharacterAtEnd} = this._data; + const node = this._node; + + const enabledToggle = node.querySelector('.sentence-termination-character-enabled'); + const typeSelect = node.querySelector('.sentence-termination-character-type'); + const character1Input = node.querySelector('.sentence-termination-character-input1'); + const character2Input = node.querySelector('.sentence-termination-character-input2'); + const includeAtStartCheckbox = node.querySelector('.sentence-termination-character-include-at-start'); + const includeAtEndheckbox = node.querySelector('.sentence-termination-character-include-at-end'); + const menuButton = node.querySelector('.sentence-termination-character-entry-button'); + + this._character1Input = character1Input; + this._character2Input = character2Input; + + const type = (character2 === null ? 'terminator' : 'quote'); + node.dataset.type = type; + + enabledToggle.checked = enabled; + typeSelect.value = type; + character1Input.value = character1; + character2Input.value = (character2 !== null ? character2 : ''); + includeAtStartCheckbox.checked = includeCharacterAtStart; + includeAtEndheckbox.checked = includeCharacterAtEnd; + + enabledToggle.dataset.setting = `${this._basePath}.enabled`; + includeAtStartCheckbox.dataset.setting = `${this._basePath}.includeCharacterAtStart`; + includeAtEndheckbox.dataset.setting = `${this._basePath}.includeCharacterAtEnd`; + + this._eventListeners.addEventListener(typeSelect, 'change', this._onTypeSelectChange.bind(this), false); + this._eventListeners.addEventListener(character1Input, 'change', this._onCharacterChange.bind(this, 1), false); + this._eventListeners.addEventListener(character2Input, 'change', this._onCharacterChange.bind(this, 2), false); + this._eventListeners.addEventListener(menuButton, 'menuClosed', this._onMenuClosed.bind(this), false); + } + + cleanup() { + this._eventListeners.removeAllEventListeners(); + if (this._node.parentNode !== null) { + this._node.parentNode.removeChild(this._node); + } + } + + // Private + + _onTypeSelectChange(e) { + this._setHasCharacter2(e.currentTarget.value === 'quote'); + } + + _onCharacterChange(characterNumber, e) { + const node = e.currentTarget; + if (characterNumber === 2 && this._data.character2 === null) { + node.value = ''; + } + + const value = node.value.substring(0, 1); + this._setCharacterValue(node, characterNumber, value); + } + + _onMenuClosed(e) { + const {detail: {action}} = e; + switch (action) { + case 'delete': + this._delete(); + break; + } + } + + async _delete() { + this._parent.deleteEntry(this._index); + } + + async _setHasCharacter2(has) { + const okay = await this._setCharacterValue(this._character2Input, 2, has ? this._data.character1 : null); + if (okay) { + const type = (!has ? 'terminator' : 'quote'); + this._node.dataset.type = type; + } + } + + async _setCharacterValue(inputNode, characterNumber, value) { + const pathEnd = `character${characterNumber}`; + const r = await this._parent.settingsController.setProfileSetting(`${this._basePath}.${pathEnd}`, value); + const okay = !r[0].error; + if (okay) { + this._data[pathEnd] = value; + } else { + value = this._data[pathEnd]; + } + inputNode.value = (value !== null ? value : ''); + return okay; + } +} diff --git a/ext/bg/js/settings2/settings-main.js b/ext/bg/js/settings2/settings-main.js index 1b3bfaa0..fc003ac8 100644 --- a/ext/bg/js/settings2/settings-main.js +++ b/ext/bg/js/settings2/settings-main.js @@ -32,6 +32,7 @@ * ScanInputsController * ScanInputsSimpleController * SecondarySearchDictionaryController + * SentenceTerminationCharactersController * SettingsController * SettingsDisplayController * StatusFooter @@ -124,6 +125,9 @@ async function setupGenericSettingsController(genericSettingController) { const translationTextReplacementsController = new TranslationTextReplacementsController(settingsController); translationTextReplacementsController.prepare(); + const sentenceTerminationCharactersController = new SentenceTerminationCharactersController(settingsController); + sentenceTerminationCharactersController.prepare(); + await Promise.all(preparePromises); document.documentElement.dataset.loaded = 'true'; diff --git a/ext/bg/settings2.html b/ext/bg/settings2.html index e5f105f0..81b91bf0 100644 --- a/ext/bg/settings2.html +++ b/ext/bg/settings2.html @@ -1125,29 +1125,31 @@ </div> </div> <div class="settings-group advanced-only"> - <div class="settings-item"> - <div class="settings-item-inner settings-item-inner-wrappable"> - <div class="settings-item-left"> - <div class="settings-item-label"> - Sentence scanning extent - <a class="more-toggle more-only" data-parent-distance="4">(?)</a> - </div> - </div> - <div class="settings-item-right"> - <input type="number" data-setting="sentenceParsing.scanExtent" min="0" step="1"> - </div> + <div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable"> + <div class="settings-item-left"> + <div class="settings-item-label">Sentence scanning extent</div> + <div class="settings-item-description">Adjust how many characters are bidirectionally scanned to form a sentence.</div> </div> - <div class="settings-item-children more" hidden> - <p> - This option controls the maximum scanning distance used to determine the bounds of a sentence, - in number of characters. - Sentence scanning is bidirectional and begins from both the start and end of the source term. - </p> - <p> - <a class="more-toggle" data-parent-distance="3">Less…</a> - </p> + <div class="settings-item-right"> + <input type="number" data-setting="sentenceParsing.scanExtent" min="0" step="1"> </div> - </div> + </div></div> + <div class="settings-item"><div class="settings-item-inner settings-item-inner-wrappable"> + <div class="settings-item-left"> + <div class="settings-item-label">Enable sentence termination characters</div> + </div> + <div class="settings-item-right"> + <label class="toggle"><input type="checkbox" data-setting="sentenceParsing.enableTerminationCharacters"><span class="toggle-body"><span class="toggle-track"></span><span class="toggle-knob"></span></span></label> + </div> + </div></div> + <div class="settings-item settings-item-button" data-modal-action="show,sentence-termination-characters"><div class="settings-item-inner"> + <div class="settings-item-left"> + <div class="settings-item-label">Configure sentence termination characters…</div> + </div> + <div class="settings-item-right open-panel-button-container"> + <button class="icon-button"><span class="icon-button-inner"><span class="icon" data-icon="material-right-arrow"></span></span></button> + </div> + </div></div> </div> <!-- Translation --> @@ -2725,6 +2727,86 @@ </div></div></template> +<!-- Sentence parsing modal --> +<div id="sentence-termination-characters" class="modal-container" tabindex="-1" role="dialog" hidden><div class="modal-content"> + <div class="modal-header"> + <div class="modal-title">Sentence Termination Characters</div> + <div class="modal-header-button-container"> + <div class="modal-header-button-group"> + <button class="icon-button modal-header-button" data-modal-action="expand"><span class="icon-button-inner"><span class="icon" data-icon="expand"></span></span></button> + <button class="icon-button modal-header-button" data-modal-action="collapse"><span class="icon-button-inner"><span class="icon" data-icon="collapse"></span></span></button> + </div> + </div> + </div> + <div class="modal-body"> + <p> + Sentences are terminated by punctuation and quotation marks, which can both be configured below. + </p> + <table class="sentence-termination-character-list-table" id="sentence-termination-character-list-table" hidden> + <thead><tr> + <td>#</td> + <td>Enabled</td> + <td>Type</td> + <td>Character 1</td> + <td>Character 2</td> + <td>Include character in sentence</td> + <td></td> + </tr></thead> + <tbody class="sentence-termination-character-list generic-list" id="sentence-termination-character-list"></tbody> + </table> + <div id="sentence-termination-character-list-empty" hidden> + No terminators defined. + </div> + </div> + <div class="modal-footer"> + <button class="low-emphasis danger" id="sentence-termination-character-list-reset">Reset</button> + <button class="low-emphasis" id="sentence-termination-character-list-add">Add</button> + <button data-modal-action="hide">Close</button> + </div> +</div></div> + + +<!-- Sentence parsing templates --> +<template id="sentence-termination-character-entry-template"><tr class="sentence-termination-character-entry"> + <td class="generic-list-index-prefix"></td> + <td> + <label class="toggle"><input type="checkbox" class="sentence-termination-character-enabled"><span class="toggle-body"><span class="toggle-track"></span><span class="toggle-knob"></span></span></label> + </td> + <td> + <select class="sentence-termination-character-type"> + <option value="terminator">Terminator</option> + <option value="quote">Quote</option> + </select> + </td> + <td> + <input type="text" class="sentence-termination-character-input1" maxlength="1"> + </td> + <td> + <input type="text" class="sentence-termination-character-input2" maxlength="1"> + <div class="sentence-termination-character-input2-alt">—</div> + </td> + <td> + <div class="sentence-termination-character-include-list"> + <label class="sentence-termination-character-include"> + <label class="checkbox"><input type="checkbox" class="sentence-termination-character-include-at-start"><span class="checkbox-body"><span class="checkbox-fill"></span><span class="checkbox-border"></span><span class="checkbox-check"></span></span></label> + <span>At start</span> + </label> + <label class="sentence-termination-character-include"> + <label class="checkbox"><input type="checkbox" class="sentence-termination-character-include-at-end"><span class="checkbox-body"><span class="checkbox-fill"></span><span class="checkbox-border"></span><span class="checkbox-check"></span></span></label> + <span>At end</span> + </label> + </div> + </td> + <td> + <button class="icon-button sentence-termination-character-entry-button" data-menu="sentence-termination-character-entry-menu" data-menu-position="below,left"><span class="icon-button-inner"><span class="icon" data-icon="kebab-menu"></span></span></button> + </td> +</tr></template> + +<template id="sentence-termination-character-entry-menu-template"><div class="popup-menu-container" tabindex="-1" role="dialog"><div class="popup-menu"> + <button class="popup-menu-item" data-menu-action="delete">Delete</button> +</div></div></template> + + <!-- Scripts --> <script src="/mixed/lib/jszip.min.js"></script> <script src="/mixed/lib/wanakana.min.js"></script> @@ -2783,6 +2865,7 @@ <script src="/bg/js/settings2/nested-popups-controller.js"></script> <script src="/bg/js/settings2/secondary-search-dictionary-controller.js"></script> +<script src="/bg/js/settings2/sentence-termination-characters-controller.js"></script> <script src="/bg/js/settings2/settings-display-controller.js"></script> <script src="/bg/js/settings2/translation-text-replacements-controller.js"></script> diff --git a/ext/fg/js/frontend.js b/ext/fg/js/frontend.js index a206e3fb..4ca7874a 100644 --- a/ext/fg/js/frontend.js +++ b/ext/fg/js/frontend.js @@ -326,9 +326,9 @@ class Frontend { touchInputEnabled: scanningOptions.touchInputEnabled, pointerEventsEnabled: scanningOptions.pointerEventsEnabled, scanLength: scanningOptions.length, - sentenceScanExtent: sentenceParsingOptions.scanExtent, layoutAwareScan: scanningOptions.layoutAwareScan, - preventMiddleMouse + preventMiddleMouse, + sentenceParsingOptions }); this._updateTextScannerEnabled(); diff --git a/ext/mixed/js/display.js b/ext/mixed/js/display.js index 4c8d2f91..2b3ea21c 100644 --- a/ext/mixed/js/display.js +++ b/ext/mixed/js/display.js @@ -327,9 +327,9 @@ class Display extends EventDispatcher { touchInputEnabled: scanningOptions.touchInputEnabled, pointerEventsEnabled: scanningOptions.pointerEventsEnabled, scanLength: scanningOptions.length, - sentenceScanExtent: sentenceParsingOptions.scanExtent, layoutAwareScan: scanningOptions.layoutAwareScan, - preventMiddleMouse: scanningOptions.preventMiddleMouse.onSearchQuery + preventMiddleMouse: scanningOptions.preventMiddleMouse.onSearchQuery, + sentenceParsingOptions } }); @@ -1832,9 +1832,9 @@ class Display extends EventDispatcher { touchInputEnabled: false, pointerEventsEnabled: false, scanLength: scanningOptions.length, - sentenceScanExtent: sentenceParsingOptions.scanExtent, layoutAwareScan: scanningOptions.layoutAwareScan, - preventMiddleMouse: false + preventMiddleMouse: false, + sentenceParsingOptions }); this._definitionTextScanner.setEnabled(true); diff --git a/ext/mixed/js/document-util.js b/ext/mixed/js/document-util.js index 647cbedc..42d3556b 100644 --- a/ext/mixed/js/document-util.js +++ b/ext/mixed/js/document-util.js @@ -24,24 +24,6 @@ class DocumentUtil { constructor() { this._transparentColorPattern = /rgba\s*\([^)]*,\s*0(?:\.0+)?\s*\)/; - - const quoteArray = [ - ['「', '」'], - ['『', '』'], - ['\'', '\''], - ['"', '"'] - ]; - const terminatorString = '…。..??!!'; - this._terminatorMap = new Map(); - for (const char of terminatorString) { - this._terminatorMap.set(char, [false, true]); - } - this._forwardQuoteMap = new Map(); - this._backwardQuoteMap = new Map(); - for (const [char1, char2] of quoteArray) { - this._forwardQuoteMap.set(char1, [char2, false]); - this._backwardQuoteMap.set(char2, [char1, false]); - } } getRangeFromPoint(x, y, deepContentScan) { @@ -81,11 +63,30 @@ class DocumentUtil { } } - extractSentence(source, layoutAwareScan, extent) { - const terminatorMap = this._terminatorMap; - const forwardQuoteMap = this._forwardQuoteMap; - const backwardQuoteMap = this._backwardQuoteMap; - + /** + * Extract a sentence from a document. + * @param source The text source object, either `TextSourceRange` or `TextSourceElement`. + * @param layoutAwareScan Whether or not layout-aware scan mode should be used. + * @param extent The length of the sentence to extract. + * @param terminatorMap A mapping of characters that terminate a sentence. + * Format: + * ```js + * new Map([ [character: string, [includeCharacterAtStart: boolean, includeCharacterAtEnd: boolean]], ... ]) + * ``` + * @param forwardQuoteMap A mapping of quote characters that delimit a sentence. + * Format: + * ```js + * new Map([ [character: string, [otherCharacter: string, includeCharacterAtStart: boolean]], ... ]) + * ``` + * @param backwardQuoteMap A mapping of quote characters that delimit a sentence, + * which is the inverse of forwardQuoteMap. + * Format: + * ```js + * new Map([ [character: string, [otherCharacter: string, includeCharacterAtEnd: boolean]], ... ]) + * ``` + * @returns The sentence and the offset to the original source: `{sentence: string, offset: integer}`. + */ + extractSentence(source, layoutAwareScan, extent, terminatorMap, forwardQuoteMap, backwardQuoteMap) { // Scan text source = source.clone(); const startLength = source.setStartOffset(extent, layoutAwareScan); diff --git a/ext/mixed/js/text-scanner.js b/ext/mixed/js/text-scanner.js index f26bcf0e..11a6f88f 100644 --- a/ext/mixed/js/text-scanner.js +++ b/ext/mixed/js/text-scanner.js @@ -59,9 +59,12 @@ class TextScanner extends EventDispatcher { this._touchInputEnabled = false; this._pointerEventsEnabled = false; this._scanLength = 1; - this._sentenceScanExtent = 1; this._layoutAwareScan = false; this._preventMiddleMouse = false; + this._sentenceScanExtent = 0; + this._sentenceTerminatorMap = new Map(); + this._sentenceForwardQuoteMap = new Map(); + this._sentenceBackwardQuoteMap = new Map(); this._inputs = []; this._enabled = false; @@ -142,9 +145,9 @@ class TextScanner extends EventDispatcher { touchInputEnabled, pointerEventsEnabled, scanLength, - sentenceScanExtent, layoutAwareScan, - preventMiddleMouse + preventMiddleMouse, + sentenceParsingOptions }) { if (Array.isArray(inputs)) { this._inputs = inputs.map(({ @@ -193,15 +196,38 @@ class TextScanner extends EventDispatcher { if (typeof scanLength === 'number') { this._scanLength = scanLength; } - if (typeof sentenceScanExtent === 'number') { - this._sentenceScanExtent = sentenceScanExtent; - } if (typeof layoutAwareScan === 'boolean') { this._layoutAwareScan = layoutAwareScan; } if (typeof preventMiddleMouse === 'boolean') { this._preventMiddleMouse = preventMiddleMouse; } + if (typeof sentenceParsingOptions === 'object' && sentenceParsingOptions !== null) { + const {scanExtent, enableTerminationCharacters, terminationCharacters} = sentenceParsingOptions; + const hasTerminationCharacters = (typeof terminationCharacters === 'object' && Array.isArray(terminationCharacters)); + if (typeof scanExtent === 'number') { + this._sentenceScanExtent = sentenceParsingOptions.scanExtent; + } + if (typeof enableTerminationCharacters === 'boolean' || hasTerminationCharacters) { + const sentenceTerminatorMap = this._sentenceTerminatorMap; + const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap; + const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap; + sentenceTerminatorMap.clear(); + sentenceForwardQuoteMap.clear(); + sentenceBackwardQuoteMap.clear(); + if (enableTerminationCharacters !== false && hasTerminationCharacters) { + for (const {enabled, character1, character2, includeCharacterAtStart, includeCharacterAtEnd} of terminationCharacters) { + if (!enabled) { continue; } + if (character2 === null) { + sentenceTerminatorMap.set(character1, [includeCharacterAtStart, includeCharacterAtEnd]); + } else { + sentenceForwardQuoteMap.set(character1, [character2, includeCharacterAtStart]); + sentenceBackwardQuoteMap.set(character2, [character1, includeCharacterAtEnd]); + } + } + } + } + } } getTextSourceContent(textSource, length, layoutAwareScan) { @@ -723,6 +749,9 @@ class TextScanner extends EventDispatcher { async _findTerms(textSource, optionsContext) { const scanLength = this._scanLength; const sentenceScanExtent = this._sentenceScanExtent; + const sentenceTerminatorMap = this._sentenceTerminatorMap; + const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap; + const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap; const layoutAwareScan = this._layoutAwareScan; const searchText = this.getTextSourceContent(textSource, scanLength, layoutAwareScan); if (searchText.length === 0) { return null; } @@ -731,13 +760,23 @@ class TextScanner extends EventDispatcher { if (definitions.length === 0) { return null; } textSource.setEndOffset(length, layoutAwareScan); - const sentence = this._documentUtil.extractSentence(textSource, layoutAwareScan, sentenceScanExtent); + const sentence = this._documentUtil.extractSentence( + textSource, + layoutAwareScan, + sentenceScanExtent, + sentenceTerminatorMap, + sentenceForwardQuoteMap, + sentenceBackwardQuoteMap + ); return {definitions, sentence, type: 'terms'}; } async _findKanji(textSource, optionsContext) { const sentenceScanExtent = this._sentenceScanExtent; + const sentenceTerminatorMap = this._sentenceTerminatorMap; + const sentenceForwardQuoteMap = this._sentenceForwardQuoteMap; + const sentenceBackwardQuoteMap = this._sentenceBackwardQuoteMap; const layoutAwareScan = this._layoutAwareScan; const searchText = this.getTextSourceContent(textSource, 1, layoutAwareScan); if (searchText.length === 0) { return null; } @@ -746,7 +785,14 @@ class TextScanner extends EventDispatcher { if (definitions.length === 0) { return null; } textSource.setEndOffset(1, layoutAwareScan); - const sentence = this._documentUtil.extractSentence(textSource, layoutAwareScan, sentenceScanExtent); + const sentence = this._documentUtil.extractSentence( + textSource, + layoutAwareScan, + sentenceScanExtent, + sentenceTerminatorMap, + sentenceForwardQuoteMap, + sentenceBackwardQuoteMap + ); return {definitions, sentence, type: 'kanji'}; } diff --git a/test/test-document-util.js b/test/test-document-util.js index 09f0c5e7..2311e25f 100644 --- a/test/test-document-util.js +++ b/test/test-document-util.js @@ -181,8 +181,29 @@ async function testDocumentTextScanningFunctions(dom, {DocumentUtil, TextSourceR } if (source === null) { continue; } + // Sentence info + const terminatorString = '…。..??!!'; + const terminatorMap = new Map(); + for (const char of terminatorString) { + terminatorMap.set(char, [false, true]); + } + const quoteArray = [['「', '」'], ['『', '』'], ['\'', '\''], ['"', '"']]; + const forwardQuoteMap = new Map(); + const backwardQuoteMap = new Map(); + for (const [char1, char2] of quoteArray) { + forwardQuoteMap.set(char1, [char2, false]); + backwardQuoteMap.set(char2, [char1, false]); + } + // Test docSentenceExtract - const sentenceActual = documentUtil.extractSentence(source, false, sentenceScanExtent).text; + const sentenceActual = documentUtil.extractSentence( + source, + false, + sentenceScanExtent, + terminatorMap, + forwardQuoteMap, + backwardQuoteMap + ).text; assert.strictEqual(sentenceActual, sentence); // Clean diff --git a/test/test-options-util.js b/test/test-options-util.js index 8b7254c6..5ce9313e 100644 --- a/test/test-options-util.js +++ b/test/test-options-util.js @@ -420,7 +420,22 @@ function createProfileOptionsUpdatedTestData1() { fieldTemplates: null }, sentenceParsing: { - scanExtent: 200 + scanExtent: 200, + enableTerminationCharacters: true, + terminationCharacters: [ + {enabled: true, character1: '「', character2: '」', includeCharacterAtStart: false, includeCharacterAtEnd: false}, + {enabled: true, character1: '『', character2: '』', includeCharacterAtStart: false, includeCharacterAtEnd: false}, + {enabled: true, character1: '"', character2: '"', includeCharacterAtStart: false, includeCharacterAtEnd: false}, + {enabled: true, character1: '\'', character2: '\'', includeCharacterAtStart: false, includeCharacterAtEnd: false}, + {enabled: true, character1: '.', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true}, + {enabled: true, character1: '!', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true}, + {enabled: true, character1: '?', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true}, + {enabled: true, character1: '.', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true}, + {enabled: true, character1: '。', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true}, + {enabled: true, character1: '!', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true}, + {enabled: true, character1: '?', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true}, + {enabled: true, character1: '…', character2: null, includeCharacterAtStart: false, includeCharacterAtEnd: true} + ] } }; } |