From c6417f5966b6c295778a9d1e948385e2ff9d9509 Mon Sep 17 00:00:00 2001 From: Rahul Jain Date: Sat, 27 Jun 2026 14:08:04 +0000 Subject: [PATCH 1/5] Add novel parser --- plugin/js/parsers/NovelArrowParser.js | 123 ++++++++++++++++++++++++++ plugin/popup.html | 1 + unitTest/Tests.html | 1 + unitTest/UtestNovelArrowParser.js | 43 +++++++++ 4 files changed, 168 insertions(+) create mode 100644 plugin/js/parsers/NovelArrowParser.js create mode 100644 unitTest/UtestNovelArrowParser.js diff --git a/plugin/js/parsers/NovelArrowParser.js b/plugin/js/parsers/NovelArrowParser.js new file mode 100644 index 00000000..3610f692 --- /dev/null +++ b/plugin/js/parsers/NovelArrowParser.js @@ -0,0 +1,123 @@ +"use strict"; + +parserFactory.register("novelarrow.com", () => new NovelArrowParser()); + +class NovelArrowParser extends Parser { + constructor() { + super(); + this.minimumThrottle = 1000; + } + + async getChapterUrls(dom) { + let seen = new Set(); + let chapters = []; + for (let link of [...dom.querySelectorAll("a")]) { + if (!this.isChapterLink(link)) { + continue; + } + let chapter = util.hyperLinkToChapter(link); + let key = util.normalizeUrlForCompare(chapter.sourceUrl); + if (!seen.has(key)) { + seen.add(key); + chapters.push(chapter); + } + } + return chapters; + } + + isChapterLink(link) { + let href = link.getAttribute("href") || ""; + return href.includes("/chapter/") && !href.includes("/genre/") && !href.includes("/author/"); + } + + findContent(dom) { + let selectors = [ + "article", + "main", + ".chapter-content", + "#chapter-content", + "[class*='chapter']", + "[id*='chapter']", + ".entry-content", + ".content" + ]; + for (let selector of selectors) { + let element = dom.querySelector(selector); + if (element != null && element.textContent.trim().length > 40) { + return element; + } + } + return dom.querySelector("body"); + } + + extractTitleImpl(dom) { + return dom.querySelector("h1") + || dom.querySelector(".novel-title") + || dom.querySelector(".entry-title") + || dom.querySelector("meta[property='og:title']") + || dom.querySelector("title"); + } + + extractAuthor(dom) { + let authorLink = [...dom.querySelectorAll("a")] + .find(link => link.href.includes("/author/")); + return authorLink?.textContent?.trim() || super.extractAuthor(dom); + } + + extractSubject(dom) { + let genres = [...dom.querySelectorAll("a")] + .filter(link => link.href.includes("/genre/")) + .map(link => link.textContent.trim()) + .filter(text => text.length > 0); + return genres.slice(0, 6).join(", "); + } + + extractDescription(dom) { + let metaDescription = dom.querySelector("meta[name='description']")?.getAttribute("content"); + return metaDescription?.trim() || super.extractDescription(dom); + } + + extractPublisher() { + return "Novel Arrow"; + } + + findCoverImageUrl(dom) { + let img = dom.querySelector("meta[property='og:image']")?.getAttribute("content") + || dom.querySelector("img[src*='novelarrow.com']") + || dom.querySelector("img"); + return img?.getAttribute("content") || img?.getAttribute("src") || null; + } + + findChapterTitle(dom) { + return dom.querySelector("h1, h2, .chapter-title, .entry-title") + || dom.querySelector("meta[property='og:title']") + || dom.querySelector("title"); + } + + removeUnwantedElementsFromContentElement(element) { + util.removeChildElementsMatchingSelector(element, "header, nav, footer, aside, form, script, style, noscript, .comments, .comment, .ads, .ad, .social-share"); + super.removeUnwantedElementsFromContentElement(element); + } + + async fetchChapter(url) { + let options = { parser: this }; + return (await HttpClient.wrapFetch(url, options)).responseXML; + } + + isCustomError(response) { + return response.responseXML?.title === "Just a moment..."; + } + + setCustomErrorResponse(url, wrapOptions, checkedresponse) { + return { + url: url, + wrapOptions: wrapOptions, + response: { + url: checkedresponse.response.url, + status: 403, + retryDelay: [80, 40, 20, 10, 5], + }, + errorMessage: "NovelArrow returned a Cloudflare challenge. Open the page in the browser, pass the check, and retry.", + }; + } +} diff --git a/plugin/popup.html b/plugin/popup.html index 7436f1ec..88e724dc 100644 --- a/plugin/popup.html +++ b/plugin/popup.html @@ -817,6 +817,7 @@

Instructions

+ diff --git a/unitTest/Tests.html b/unitTest/Tests.html index 11e2a04e..6a6cf502 100644 --- a/unitTest/Tests.html +++ b/unitTest/Tests.html @@ -126,6 +126,7 @@ + diff --git a/unitTest/UtestNovelArrowParser.js b/unitTest/UtestNovelArrowParser.js new file mode 100644 index 00000000..8449c939 --- /dev/null +++ b/unitTest/UtestNovelArrowParser.js @@ -0,0 +1,43 @@ +"use strict"; + +module("NovelArrowParser"); + +QUnit.test("parses story metadata and chapter links", function(assert) { + let dom = new DOMParser().parseFromString(NovelArrowStorySample, "text/html"); + let parser = new NovelArrowParser(); + + let chapters = parser.getChapterUrls(dom); + assert.equal(chapters.length, 2); + assert.equal(chapters[0].sourceUrl, "https://novelarrow.com/chapter/i-sell-gacha-jars-in-one-piece/chapter-1-the-jar-merchant"); + assert.equal(chapters[0].title, "Chapter 1: The Jar Merchant"); + assert.equal(chapters[1].title, "Chapter 2: Little Luffy"); + + assert.equal(parser.extractTitleImpl(dom).textContent.trim(), "I Sell Gacha Jars in One Piece"); + assert.equal(parser.extractAuthor(dom), "ElvenKing20"); + assert.equal(parser.extractSubject(dom), "Action, Adventure"); + assert.equal(parser.findCoverImageUrl(dom), "https://images.novelarrow.com/novel_480_720/i-sell-gacha-jars-in-one-piece.jpg"); +}); + +QUnit.test("recognises Cloudflare challenge responses", function(assert) { + let parser = new NovelArrowParser(); + assert.true(parser.isCustomError({responseXML: {title: "Just a moment..."}})); + assert.false(parser.isCustomError({responseXML: {title: "Normal page"}})); +}); + +let NovelArrowStorySample = ` + + + I Sell Gacha Jars in One Piece + + +
+

I Sell Gacha Jars in One Piece

+ cover + ElvenKing20 + Action + Adventure + Chapter 1: The Jar Merchant + Chapter 2: Little Luffy +
+ +`; From 692b93596de3f0a89247820a63b6d6ba14b16acb Mon Sep 17 00:00:00 2001 From: Rahul Jain Date: Sat, 27 Jun 2026 14:49:25 +0000 Subject: [PATCH 2/5] fix error in image fetch & increase robustness --- plugin/js/parsers/NovelArrowParser.js | 46 +++++++++++++------ unitTest/UtestNovelArrowParser.js | 65 +++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 13 deletions(-) diff --git a/plugin/js/parsers/NovelArrowParser.js b/plugin/js/parsers/NovelArrowParser.js index 3610f692..b2404850 100644 --- a/plugin/js/parsers/NovelArrowParser.js +++ b/plugin/js/parsers/NovelArrowParser.js @@ -59,22 +59,27 @@ class NovelArrowParser extends Parser { } extractAuthor(dom) { - let authorLink = [...dom.querySelectorAll("a")] - .find(link => link.href.includes("/author/")); - return authorLink?.textContent?.trim() || super.extractAuthor(dom); + let authorLink = dom.querySelector("a[href*='/author/']"); + let author = authorLink?.textContent?.trim(); + if (!author && authorLink) { + let href = authorLink.getAttribute("href") || authorLink.href || ""; + let segments = href.split("/").filter(Boolean); + author = segments.pop() || ""; + } + return author || dom.querySelector("meta[name='author']")?.getAttribute("content")?.trim() || super.extractAuthor(dom); } extractSubject(dom) { - let genres = [...dom.querySelectorAll("a")] - .filter(link => link.href.includes("/genre/")) + return [...dom.querySelectorAll("a[href*='/genre/']")] .map(link => link.textContent.trim()) - .filter(text => text.length > 0); - return genres.slice(0, 6).join(", "); + .filter(Boolean) + .slice(0, 6) + .join(", "); } extractDescription(dom) { - let metaDescription = dom.querySelector("meta[name='description']")?.getAttribute("content"); - return metaDescription?.trim() || super.extractDescription(dom); + return dom.querySelector("meta[name='description']")?.getAttribute("content")?.trim() + || super.extractDescription(dom); } extractPublisher() { @@ -82,10 +87,25 @@ class NovelArrowParser extends Parser { } findCoverImageUrl(dom) { - let img = dom.querySelector("meta[property='og:image']")?.getAttribute("content") - || dom.querySelector("img[src*='novelarrow.com']") - || dom.querySelector("img"); - return img?.getAttribute("content") || img?.getAttribute("src") || null; + if (!dom) { + return null; + } + + let metaImage = [ + "meta[property='og:image']", + "meta[property='og:image:secure_url']", + "meta[name='twitter:image']", + "meta[name='image']" + ] + .map(selector => dom.querySelector(selector)?.getAttribute("content")?.trim()) + .find(Boolean); + + if (metaImage) { + return metaImage; + } + + return dom.querySelector("img[src*='novelarrow.com']")?.src + || util.getFirstImgSrc(dom, "body"); } findChapterTitle(dom) { diff --git a/unitTest/UtestNovelArrowParser.js b/unitTest/UtestNovelArrowParser.js index 8449c939..777fdaa3 100644 --- a/unitTest/UtestNovelArrowParser.js +++ b/unitTest/UtestNovelArrowParser.js @@ -18,6 +18,28 @@ QUnit.test("parses story metadata and chapter links", function(assert) { assert.equal(parser.findCoverImageUrl(dom), "https://images.novelarrow.com/novel_480_720/i-sell-gacha-jars-in-one-piece.jpg"); }); +QUnit.test("finds cover image from og:image meta", function(assert) { + let dom = new DOMParser().parseFromString(NovelArrowOgImageStorySample, "text/html"); + let parser = new NovelArrowParser(); + + assert.equal(parser.findCoverImageUrl(dom), "https://images.novelarrow.com/novel_480_720/i-sell-gacha-jars-in-one-piece.jpg"); +}); + +QUnit.test("uses meta[name=author] when author link is absent", function(assert) { + let dom = new DOMParser().parseFromString(NovelArrowAuthorMetaSample, "text/html"); + let parser = new NovelArrowParser(); + + assert.equal(parser.extractAuthor(dom), "ElvenKing20"); +}); + +QUnit.test("gracefully handles missing author and cover image fields", function(assert) { + let dom = new DOMParser().parseFromString(NovelArrowMissingMetadataSample, "text/html"); + let parser = new NovelArrowParser(); + + assert.equal(parser.extractAuthor(dom), ""); + assert.equal(parser.findCoverImageUrl(dom), null); +}); + QUnit.test("recognises Cloudflare challenge responses", function(assert) { let parser = new NovelArrowParser(); assert.true(parser.isCustomError({responseXML: {title: "Just a moment..."}})); @@ -41,3 +63,46 @@ let NovelArrowStorySample = ` `; + +let NovelArrowOgImageStorySample = ` + + + I Sell Gacha Jars in One Piece + + + +
+

I Sell Gacha Jars in One Piece

+ ElvenKing20 + Action + Adventure +
+ +`; + +let NovelArrowMissingMetadataSample = ` + + + I Sell Gacha Jars in One Piece + + +
+

I Sell Gacha Jars in One Piece

+ Action + Adventure +
+ +`; + +let NovelArrowAuthorMetaSample = ` + + + I Sell Gacha Jars in One Piece + + + +
+

I Sell Gacha Jars in One Piece

+
+ +`; From fc6768cd832b9a8219336b909a4c4611331b44fc Mon Sep 17 00:00:00 2001 From: Rahul Jain Date: Sat, 27 Jun 2026 15:07:43 +0000 Subject: [PATCH 3/5] Fix chapter list in parsing to be complete --- plugin/js/parsers/NovelArrowParser.js | 92 +++++++++++++++++++++++---- unitTest/UtestNovelArrowParser.js | 28 ++++++++ 2 files changed, 108 insertions(+), 12 deletions(-) diff --git a/plugin/js/parsers/NovelArrowParser.js b/plugin/js/parsers/NovelArrowParser.js index b2404850..7e4a7cbb 100644 --- a/plugin/js/parsers/NovelArrowParser.js +++ b/plugin/js/parsers/NovelArrowParser.js @@ -9,20 +9,84 @@ class NovelArrowParser extends Parser { } async getChapterUrls(dom) { - let seen = new Set(); - let chapters = []; - for (let link of [...dom.querySelectorAll("a")]) { - if (!this.isChapterLink(link)) { - continue; - } - let chapter = util.hyperLinkToChapter(link); - let key = util.normalizeUrlForCompare(chapter.sourceUrl); - if (!seen.has(key)) { - seen.add(key); - chapters.push(chapter); + let chapters = this.getChapterUrlsFromInitialChapterList(dom); + return chapters.length > 0 + ? chapters + : util.hyperlinksToChapterList(dom, link => this.isChapterLink(link) && !this.isReadNowLink(link)); + } + + getChapterUrlsFromInitialChapterList(dom) { + if (!dom) { + return []; + } + + let script = [...dom.querySelectorAll("script")] + .find(scriptElement => (scriptElement.textContent || scriptElement.innerText || "").includes("initialChapterList")); + if (!script) { + return []; + } + + try { + return this.findInitialChapterList(JSON.parse(script.textContent || script.innerText || ""), dom); + } catch (e) { + return []; + } + } + + findInitialChapterList(value, dom) { + if (Array.isArray(value?.initialChapterList)) { + return value.initialChapterList + .filter(chapter => chapter && typeof chapter === "object") + .map(chapter => ({ + sourceUrl: this.getChapterUrl(chapter.chapter_id || chapter.slug || chapter.id, dom), + title: chapter.chapter_name || chapter.name || chapter.title || "", + newArc: null + })); + } + + if (value && typeof value === "object") { + for (let child of Object.values(value)) { + let chapters = this.findInitialChapterList(child, dom); + if (chapters.length > 0) { + return chapters; + } } } - return chapters; + return []; + } + + getChapterUrl(path, dom) { + if (!path) { + return ""; + } + if (/^https?:\/\//i.test(path)) { + return path; + } + + let baseUrl = this.getChapterBaseUrl(dom); + let slug = this.getNovelSlug(dom); + return new URL(path.startsWith("/") ? path : (slug ? `/chapter/${slug}/${path}` : path), baseUrl).href; + } + + getChapterBaseUrl(dom) { + let url = dom?.querySelector("link[rel='canonical']")?.href + || dom?.querySelector("meta[property='og:url']")?.getAttribute("content") + || dom?.baseURI + || "https://novelarrow.com"; + try { + return new URL(url).origin; + } catch (e) { + return "https://novelarrow.com"; + } + } + + getNovelSlug(dom) { + let url = dom?.querySelector("link[rel='canonical']")?.href + || dom?.querySelector("meta[property='og:url']")?.getAttribute("content") + || dom?.baseURI + || ""; + let match = url.match(/\/novel\/([^/?#]+)/); + return match ? match[1] : ""; } isChapterLink(link) { @@ -30,6 +94,10 @@ class NovelArrowParser extends Parser { return href.includes("/chapter/") && !href.includes("/genre/") && !href.includes("/author/"); } + isReadNowLink(link) { + return (link.textContent || "").trim().toLowerCase() === "read now"; + } + findContent(dom) { let selectors = [ "article", diff --git a/unitTest/UtestNovelArrowParser.js b/unitTest/UtestNovelArrowParser.js index 777fdaa3..c3f95f4e 100644 --- a/unitTest/UtestNovelArrowParser.js +++ b/unitTest/UtestNovelArrowParser.js @@ -40,6 +40,17 @@ QUnit.test("gracefully handles missing author and cover image fields", function( assert.equal(parser.findCoverImageUrl(dom), null); }); +QUnit.test("prefers embedded chapter names over generic Read Now links", function(assert) { + let dom = new DOMParser().parseFromString(NovelArrowEmbeddedChapterListSample, "text/html"); + let parser = new NovelArrowParser(); + + let chapters = parser.getChapterUrls(dom); + assert.equal(chapters.length, 2); + assert.equal(chapters[0].title, "Chapter 1: The Jar Merchant"); + assert.equal(chapters[0].sourceUrl, "https://novelarrow.com/chapter/i-sell-gacha-jars-in-one-piece/chapter-1-the-jar-merchant"); + assert.equal(chapters[1].title, "Chapter 2: Little Luffy"); +}); + QUnit.test("recognises Cloudflare challenge responses", function(assert) { let parser = new NovelArrowParser(); assert.true(parser.isCustomError({responseXML: {title: "Just a moment..."}})); @@ -106,3 +117,20 @@ let NovelArrowAuthorMetaSample = ` `; + +let NovelArrowEmbeddedChapterListSample = ` + + + I Sell Gacha Jars in One Piece + + + + +
+ Read Now + Chapter 2: Little Luffy +
+ +`; From e3353bbe46a0cb26c8837290189d3e5c61ffa922 Mon Sep 17 00:00:00 2001 From: Rahul Jain Date: Sat, 27 Jun 2026 15:18:10 +0000 Subject: [PATCH 4/5] Fix missing c1 issue --- plugin/js/parsers/NovelArrowParser.js | 61 ++++++++++++++++++++++++++- unitTest/UtestNovelArrowParser.js | 4 +- 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/plugin/js/parsers/NovelArrowParser.js b/plugin/js/parsers/NovelArrowParser.js index 7e4a7cbb..af26fa28 100644 --- a/plugin/js/parsers/NovelArrowParser.js +++ b/plugin/js/parsers/NovelArrowParser.js @@ -27,12 +27,71 @@ class NovelArrowParser extends Parser { } try { - return this.findInitialChapterList(JSON.parse(script.textContent || script.innerText || ""), dom); + return this.findInitialChapterList(this.extractJsonObject(script.textContent || script.innerText || ""), dom); } catch (e) { return []; } } + extractJsonObject(scriptText) { + let markerIndex = scriptText.indexOf("initialChapterList"); + if (markerIndex < 0) { + return null; + } + + for (let index = markerIndex; index >= 0; --index) { + if (scriptText[index] !== "{") { + continue; + } + + let candidate = this.extractBalancedJson(scriptText, index); + if (candidate == null) { + continue; + } + + try { + return JSON.parse(candidate); + } catch (e) { + // Try the next brace if this one is not a valid JSON object. + } + } + + return null; + } + + extractBalancedJson(text, startIndex) { + let depth = 0; + let isString = false; + let isEscaped = false; + + for (let index = startIndex; index < text.length; ++index) { + let char = text[index]; + if (isString) { + if (isEscaped) { + isEscaped = false; + } else if (char === "\\") { + isEscaped = true; + } else if (char === '"') { + isString = false; + } + continue; + } + + if (char === '"') { + isString = true; + } else if (char === "{") { + ++depth; + } else if (char === "}") { + --depth; + if (depth === 0) { + return text.substring(startIndex, index + 1); + } + } + } + + return null; + } + findInitialChapterList(value, dom) { if (Array.isArray(value?.initialChapterList)) { return value.initialChapterList diff --git a/unitTest/UtestNovelArrowParser.js b/unitTest/UtestNovelArrowParser.js index c3f95f4e..6e054113 100644 --- a/unitTest/UtestNovelArrowParser.js +++ b/unitTest/UtestNovelArrowParser.js @@ -123,8 +123,8 @@ let NovelArrowEmbeddedChapterListSample = ` I Sell Gacha Jars in One Piece - From bc6e3e384b852f5b76aaeae414cbf7337e1806ab Mon Sep 17 00:00:00 2001 From: Rahul Jain Date: Sat, 27 Jun 2026 15:27:49 +0000 Subject: [PATCH 5/5] fix eslint issue --- plugin/js/parsers/NovelArrowParser.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugin/js/parsers/NovelArrowParser.js b/plugin/js/parsers/NovelArrowParser.js index af26fa28..f3fb481a 100644 --- a/plugin/js/parsers/NovelArrowParser.js +++ b/plugin/js/parsers/NovelArrowParser.js @@ -71,13 +71,13 @@ class NovelArrowParser extends Parser { isEscaped = false; } else if (char === "\\") { isEscaped = true; - } else if (char === '"') { + } else if (char === "\"") { isString = false; } continue; } - if (char === '"') { + if (char === "\"") { isString = true; } else if (char === "{") { ++depth;