aboutsummaryrefslogtreecommitdiff
path: root/ext/mixed/js/document-util.js
diff options
context:
space:
mode:
authortoasted-nutbread <toasted-nutbread@users.noreply.github.com>2021-01-10 14:43:06 -0500
committerGitHub <noreply@github.com>2021-01-10 14:43:06 -0500
commitf6a38f40dc52c4517e41ddb381278ecf5efba056 (patch)
tree0b56e9224ee25c0b6cc2c18cf8ae8ab891427569 /ext/mixed/js/document-util.js
parent083da93142ec6302021ee1c29428121b54fc9e68 (diff)
Customizable sentence parsing (#1217)
* Add new sentenceParsing options * Update TextScanner.setOptions * Assign terminator/quote maps * Pass sentence parsing info to extractSentence * Simplify setting * Add setting for enableTerminationCharacters * Create new settings for sentence termination characters
Diffstat (limited to 'ext/mixed/js/document-util.js')
-rw-r--r--ext/mixed/js/document-util.js47
1 files changed, 24 insertions, 23 deletions
diff --git a/ext/mixed/js/document-util.js b/ext/mixed/js/document-util.js
index 647cbedc..42d3556b 100644
--- a/ext/mixed/js/document-util.js
+++ b/ext/mixed/js/document-util.js
@@ -24,24 +24,6 @@
class DocumentUtil {
constructor() {
this._transparentColorPattern = /rgba\s*\([^)]*,\s*0(?:\.0+)?\s*\)/;
-
- const quoteArray = [
- ['「', '」'],
- ['『', '』'],
- ['\'', '\''],
- ['"', '"']
- ];
- const terminatorString = '…。..??!!';
- this._terminatorMap = new Map();
- for (const char of terminatorString) {
- this._terminatorMap.set(char, [false, true]);
- }
- this._forwardQuoteMap = new Map();
- this._backwardQuoteMap = new Map();
- for (const [char1, char2] of quoteArray) {
- this._forwardQuoteMap.set(char1, [char2, false]);
- this._backwardQuoteMap.set(char2, [char1, false]);
- }
}
getRangeFromPoint(x, y, deepContentScan) {
@@ -81,11 +63,30 @@ class DocumentUtil {
}
}
- extractSentence(source, layoutAwareScan, extent) {
- const terminatorMap = this._terminatorMap;
- const forwardQuoteMap = this._forwardQuoteMap;
- const backwardQuoteMap = this._backwardQuoteMap;
-
+ /**
+ * Extract a sentence from a document.
+ * @param source The text source object, either `TextSourceRange` or `TextSourceElement`.
+ * @param layoutAwareScan Whether or not layout-aware scan mode should be used.
+ * @param extent The length of the sentence to extract.
+ * @param terminatorMap A mapping of characters that terminate a sentence.
+ * Format:
+ * ```js
+ * new Map([ [character: string, [includeCharacterAtStart: boolean, includeCharacterAtEnd: boolean]], ... ])
+ * ```
+ * @param forwardQuoteMap A mapping of quote characters that delimit a sentence.
+ * Format:
+ * ```js
+ * new Map([ [character: string, [otherCharacter: string, includeCharacterAtStart: boolean]], ... ])
+ * ```
+ * @param backwardQuoteMap A mapping of quote characters that delimit a sentence,
+ * which is the inverse of forwardQuoteMap.
+ * Format:
+ * ```js
+ * new Map([ [character: string, [otherCharacter: string, includeCharacterAtEnd: boolean]], ... ])
+ * ```
+ * @returns The sentence and the offset to the original source: `{sentence: string, offset: integer}`.
+ */
+ extractSentence(source, layoutAwareScan, extent, terminatorMap, forwardQuoteMap, backwardQuoteMap) {
// Scan text
source = source.clone();
const startLength = source.setStartOffset(extent, layoutAwareScan);