From 1d2a91523604cce7dd608408da65bea5645fc7d4 Mon Sep 17 00:00:00 2001
From: James Maa <jmaa@berkeley.edu>
Date: Sun, 16 Jun 2024 18:39:44 -0700
Subject: Fix DOMTextScanner entering initial div on backwards seeks (#1071)

* Fix dom-text-scanner entering initial div on reverse seeks

* Add new case

* Add launch.json

* Cleanup launch.json
---
 .vscode/launch.json                   |  20 ++++++
 ext/js/dom/dom-text-scanner.js        |   7 +++
 test/data/html/text-source-range.html |  17 ++++++
 test/data/json.json                   |   1 +
 test/dom-text-scanner.test.js         | 111 +++++++++++++++++-----------------
 test/text-source-range.test.js        |  80 ++++++++++++++++++++++++
 6 files changed, 180 insertions(+), 56 deletions(-)
 create mode 100644 .vscode/launch.json
 create mode 100644 test/data/html/text-source-range.html
 create mode 100644 test/text-source-range.test.js

diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 00000000..15f4da51
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,20 @@
+{
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Launch via NPM",
+            "request": "launch",
+            "runtimeArgs": [
+                "run",
+                "test:unit",
+                "--",
+                // "test/text-source-range.test.js" // Replace and uncomment this line to run a single test file
+            ],
+            "runtimeExecutable": "npm",
+            "skipFiles": [
+                "<node_internals>/**"
+            ],
+            "type": "node"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/ext/js/dom/dom-text-scanner.js b/ext/js/dom/dom-text-scanner.js
index eb56baf9..5325b894 100644
--- a/ext/js/dom/dom-text-scanner.js
+++ b/ext/js/dom/dom-text-scanner.js
@@ -36,6 +36,8 @@ export class DOMTextScanner {
         const resetOffset = (ruby !== null);
         if (resetOffset) { node = ruby; }
 
+        /** @type {Node} */
+        this._initialNode = node;
         /** @type {Node} */
         this._node = node;
         /** @type {number} */
@@ -129,11 +131,16 @@ export class DOMTextScanner {
                 }
             } else if (nodeType === ELEMENT_NODE) {
                 lastNode = node;
+                const initialNodeAtBeginningOfNodeGoingBackwards = node === this._initialNode && this._offset === 0 && !forward;
+                const initialNodeAtEndOfNodeGoingForwards = node === this._initialNode && this._offset === node.childNodes.length && forward;
                 this._offset = 0;
                 ({enterable, newlines} = DOMTextScanner.getElementSeekInfo(/** @type {Element} */ (node)));
                 if (newlines > this._newlines && generateLayoutContent) {
                     this._newlines = newlines;
                 }
+                if (initialNodeAtBeginningOfNodeGoingBackwards || initialNodeAtEndOfNodeGoingForwards) {
+                    enterable = false;
+                }
             }
 
             /** @type {Node[]} */
diff --git a/test/data/html/text-source-range.html b/test/data/html/text-source-range.html
new file mode 100644
index 00000000..c4b151f8
--- /dev/null
+++ b/test/data/html/text-source-range.html
@@ -0,0 +1,17 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="UTF-8">
+    <title>TextSourceRange Tests</title>
+</head>
+    <body>
+        <h1>TextSourceRange Tests</h1>
+        <test-case id="text-source-range-lazy">
+            <p>人</p>
+        </test-case>
+
+        <test-case id="text-source-range">
+<p>山</p>
+        </test-case>
+    </body>
+</html>
diff --git a/test/data/json.json b/test/data/json.json
index 73709fd5..f5b632e1 100644
--- a/test/data/json.json
+++ b/test/data/json.json
@@ -8,6 +8,7 @@
         {"path": ".eslintrc.json", "ignore": true},
         {"path": ".vscode/settings.json", "ignore": true},
         {"path": ".vscode/extensions.json", "ignore": true},
+        {"path": ".vscode/launch.json", "ignore": true},
         {"path": "dev/jsconfig.json", "ignore": true},
         {"path": "ext/manifest.json", "ignore": true},
         {"path": "test/data/dictionaries/invalid-dictionary1/index.json", "ignore": true},
diff --git a/test/dom-text-scanner.test.js b/test/dom-text-scanner.test.js
index e9d3405c..e204f2a3 100644
--- a/test/dom-text-scanner.test.js
+++ b/test/dom-text-scanner.test.js
@@ -112,56 +112,57 @@ function createAbsoluteGetComputedStyle(window) {
 
 const domTestEnv = await setupDomTest(path.join(dirname, 'data/html/dom-text-scanner.html'));
 
-describe('DOMTextScanner', () => {
+describe('DOMTextScanner seek tests', () => {
     const {window, teardown} = domTestEnv;
     afterAll(() => teardown(global));
 
-    test('Seek tests', () => {
-        const {document} = window;
-        window.getComputedStyle = createAbsoluteGetComputedStyle(window);
+    const {document} = window;
+    window.getComputedStyle = createAbsoluteGetComputedStyle(window);
 
-        for (const testElement of /** @type {NodeListOf<HTMLElement>} */ (document.querySelectorAll('test-case'))) {
-            /** @type {import('test/dom-text-scanner').TestData|import('test/dom-text-scanner').TestData[]} */
-            let testData = parseJson(/** @type {string} */ (testElement.dataset.testData));
-            if (!Array.isArray(testData)) {
-                testData = [testData];
-            }
-            for (const testDataItem of testData) {
-                const {
-                    node: nodeSelector,
-                    offset,
-                    length,
-                    forcePreserveWhitespace,
-                    generateLayoutContent,
-                    reversible,
-                    expected: {
-                        node: expectedNodeSelector,
-                        offset: expectedOffset,
-                        content: expectedContent,
-                        remainder: expectedRemainder,
-                    },
-                } = testDataItem;
-
-                const node = querySelectorTextNode(testElement, nodeSelector);
-                const expectedNode = querySelectorTextNode(testElement, expectedNodeSelector);
-
-                expect(node).not.toEqual(null);
-                expect(expectedNode).not.toEqual(null);
-                if (node === null || expectedNode === null) { continue; }
-
-                // Standard test
-                {
-                    const scanner = new DOMTextScanner(node, offset, forcePreserveWhitespace, generateLayoutContent);
-                    scanner.seek(length);
+    for (const testElement of /** @type {NodeListOf<HTMLElement>} */ (document.querySelectorAll('test-case'))) {
+        const testDescription = testElement.querySelector('test-description')?.textContent || 'Test description not found.';
 
-                    const {node: actualNode1, offset: actualOffset1, content: actualContent1, remainder: actualRemainder1} = scanner;
-                    expect(actualContent1).toStrictEqual(expectedContent);
-                    expect(actualOffset1).toStrictEqual(expectedOffset);
-                    expect(actualNode1).toStrictEqual(expectedNode);
-                    expect(actualRemainder1).toStrictEqual(expectedRemainder || 0);
-                }
+        /** @type {import('test/dom-text-scanner').TestData|import('test/dom-text-scanner').TestData[]} */
+        let testData = parseJson(/** @type {string} */ (testElement.dataset.testData));
+        if (!Array.isArray(testData)) {
+            testData = [testData];
+        }
 
-                // Substring tests
+        for (const testDataItem of testData) {
+            const {
+                node: nodeSelector,
+                offset,
+                length,
+                forcePreserveWhitespace,
+                generateLayoutContent,
+                reversible,
+                expected: {
+                    node: expectedNodeSelector,
+                    offset: expectedOffset,
+                    content: expectedContent,
+                    remainder: expectedRemainder,
+                },
+            } = testDataItem;
+
+            const node = querySelectorTextNode(testElement, nodeSelector);
+            const expectedNode = querySelectorTextNode(testElement, expectedNodeSelector);
+
+            expect(node).not.toEqual(null);
+            expect(expectedNode).not.toEqual(null);
+            if (node === null || expectedNode === null) { continue; }
+
+            test(`${testDescription} standard test`, () => {
+                const scanner = new DOMTextScanner(node, offset, forcePreserveWhitespace, generateLayoutContent);
+                scanner.seek(length);
+
+                const {node: actualNode1, offset: actualOffset1, content: actualContent1, remainder: actualRemainder1} = scanner;
+                expect(actualContent1).toStrictEqual(expectedContent);
+                expect(actualOffset1).toStrictEqual(expectedOffset);
+                expect(actualNode1).toStrictEqual(expectedNode);
+                expect(actualRemainder1).toStrictEqual(expectedRemainder || 0);
+            });
+
+            test(`${testDescription} substring test`, () => {
                 for (let i = 1; i <= length; ++i) {
                     const scanner = new DOMTextScanner(node, offset, forcePreserveWhitespace, generateLayoutContent);
                     scanner.seek(length - i);
@@ -169,19 +170,17 @@ describe('DOMTextScanner', () => {
                     const {content: actualContent} = scanner;
                     expect(actualContent).toStrictEqual(expectedContent.substring(0, expectedContent.length - i));
                 }
+            });
 
-                if (reversible === false) { continue; }
-
-                // Reversed test
-                {
-                    const scanner = new DOMTextScanner(expectedNode, expectedOffset, forcePreserveWhitespace, generateLayoutContent);
-                    scanner.seek(-length);
+            test.runIf(reversible)(`${testDescription} reversed test`, () => {
+                const scanner = new DOMTextScanner(expectedNode, expectedOffset, forcePreserveWhitespace, generateLayoutContent);
+                scanner.seek(-length);
 
-                    const {content: actualContent} = scanner;
-                    expect(actualContent).toStrictEqual(expectedContent);
-                }
+                const {content: actualContent} = scanner;
+                expect(actualContent).toStrictEqual(expectedContent);
+            });
 
-                // Reversed substring tests
+            test.runIf(reversible)(`${testDescription} reversed substring test`, () => {
                 for (let i = 1; i <= length; ++i) {
                     const scanner = new DOMTextScanner(expectedNode, expectedOffset, forcePreserveWhitespace, generateLayoutContent);
                     scanner.seek(-(length - i));
@@ -189,7 +188,7 @@ describe('DOMTextScanner', () => {
                     const {content: actualContent} = scanner;
                     expect(actualContent).toStrictEqual(expectedContent.substring(i));
                 }
-            }
+            });
         }
-    });
+    }
 });
diff --git a/test/text-source-range.test.js b/test/text-source-range.test.js
new file mode 100644
index 00000000..d92198c3
--- /dev/null
+++ b/test/text-source-range.test.js
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2024  Yomitan Authors
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {fail} from 'node:assert';
+import {fileURLToPath} from 'node:url';
+import path from 'path';
+import {afterAll, describe, expect, test} from 'vitest';
+import {TextSourceRange} from '../ext/js/dom/text-source-range.js';
+import {setupDomTest} from './fixtures/dom-test.js';
+
+
+const dirname = path.dirname(fileURLToPath(import.meta.url));
+const textSourceRangeTestEnv = await setupDomTest(path.join(dirname, 'data/html/text-source-range.html'));
+
+
+describe('TextSourceRange', () => {
+    const {window, teardown} = textSourceRangeTestEnv;
+    afterAll(() => teardown(global));
+
+    test('lazy', () => {
+        const {document} = window;
+        const testElement /** @type {NodeListOf<HTMLElement>} */ = document.getElementById('text-source-range-lazy');
+        if (testElement === null) {
+            fail('test element not found');
+        }
+
+        const range = new Range();
+        range.selectNodeContents(testElement);
+
+        const source = TextSourceRange.createLazy(range);
+        const startLength = source.setStartOffset(200, false);
+        const endLength = source.setEndOffset(200, true, false);
+
+        const text = source.text();
+        const textLength = text.length;
+
+        expect(startLength).toBeLessThanOrEqual(textLength);
+        expect(endLength).toBeLessThan(textLength);
+        const count = (text.match(/人/g) || []).length;
+        expect(count).toEqual(1);
+    });
+
+    test('standard', () => {
+        const {document} = window;
+        const testElement /** @type {NodeListOf<HTMLElement>} */ = document.getElementById('text-source-range');
+        if (testElement === null) {
+            fail('test element not found');
+        }
+
+        const range = new Range();
+
+        range.selectNodeContents(testElement);
+
+        const source = TextSourceRange.create(range);
+        const startLength = source.setStartOffset(15, false);
+        const endLength = source.setEndOffset(15, true, false);
+
+        const text = source.text();
+        const textLength = text.length;
+
+        expect(startLength).toBeLessThan(textLength);
+        expect(endLength).toBeLessThan(textLength);
+        const count = (text.match(/山/g) || []).length;
+        expect(count).toEqual(1);
+    });
+});
-- 
cgit v1.2.3