diff options
author | toasted-nutbread <toasted-nutbread@users.noreply.github.com> | 2021-01-10 14:43:06 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-10 14:43:06 -0500 |
commit | f6a38f40dc52c4517e41ddb381278ecf5efba056 (patch) | |
tree | 0b56e9224ee25c0b6cc2c18cf8ae8ab891427569 /ext/mixed/js/document-util.js | |
parent | 083da93142ec6302021ee1c29428121b54fc9e68 (diff) |
Customizable sentence parsing (#1217)
* Add new sentenceParsing options
* Update TextScanner.setOptions
* Assign terminator/quote maps
* Pass sentence parsing info to extractSentence
* Simplify setting
* Add setting for enableTerminationCharacters
* Create new settings for sentence termination characters
Diffstat (limited to 'ext/mixed/js/document-util.js')
-rw-r--r-- | ext/mixed/js/document-util.js | 47 |
1 files changed, 24 insertions, 23 deletions
diff --git a/ext/mixed/js/document-util.js b/ext/mixed/js/document-util.js index 647cbedc..42d3556b 100644 --- a/ext/mixed/js/document-util.js +++ b/ext/mixed/js/document-util.js @@ -24,24 +24,6 @@ class DocumentUtil { constructor() { this._transparentColorPattern = /rgba\s*\([^)]*,\s*0(?:\.0+)?\s*\)/; - - const quoteArray = [ - ['「', '」'], - ['『', '』'], - ['\'', '\''], - ['"', '"'] - ]; - const terminatorString = '…。..??!!'; - this._terminatorMap = new Map(); - for (const char of terminatorString) { - this._terminatorMap.set(char, [false, true]); - } - this._forwardQuoteMap = new Map(); - this._backwardQuoteMap = new Map(); - for (const [char1, char2] of quoteArray) { - this._forwardQuoteMap.set(char1, [char2, false]); - this._backwardQuoteMap.set(char2, [char1, false]); - } } getRangeFromPoint(x, y, deepContentScan) { @@ -81,11 +63,30 @@ class DocumentUtil { } } - extractSentence(source, layoutAwareScan, extent) { - const terminatorMap = this._terminatorMap; - const forwardQuoteMap = this._forwardQuoteMap; - const backwardQuoteMap = this._backwardQuoteMap; - + /** + * Extract a sentence from a document. + * @param source The text source object, either `TextSourceRange` or `TextSourceElement`. + * @param layoutAwareScan Whether or not layout-aware scan mode should be used. + * @param extent The length of the sentence to extract. + * @param terminatorMap A mapping of characters that terminate a sentence. + * Format: + * ```js + * new Map([ [character: string, [includeCharacterAtStart: boolean, includeCharacterAtEnd: boolean]], ... ]) + * ``` + * @param forwardQuoteMap A mapping of quote characters that delimit a sentence. + * Format: + * ```js + * new Map([ [character: string, [otherCharacter: string, includeCharacterAtStart: boolean]], ... ]) + * ``` + * @param backwardQuoteMap A mapping of quote characters that delimit a sentence, + * which is the inverse of forwardQuoteMap. + * Format: + * ```js + * new Map([ [character: string, [otherCharacter: string, includeCharacterAtEnd: boolean]], ... ]) + * ``` + * @returns The sentence and the offset to the original source: `{sentence: string, offset: integer}`. + */ + extractSentence(source, layoutAwareScan, extent, terminatorMap, forwardQuoteMap, backwardQuoteMap) { // Scan text source = source.clone(); const startLength = source.setStartOffset(extent, layoutAwareScan); |