From 6497200afab424806aae26019f95cd832857c018 Mon Sep 17 00:00:00 2001 From: Dominique Hazael-Massieux Date: Mon, 22 Jan 2018 11:56:37 +0100 Subject: [PATCH 1/2] Tersify with ES2017 --- lib/node-linkchecker.js | 136 ++++++++++++++++------------------------ 1 file changed, 53 insertions(+), 83 deletions(-) diff --git a/lib/node-linkchecker.js b/lib/node-linkchecker.js index bb2f0bc..ca550f9 100755 --- a/lib/node-linkchecker.js +++ b/lib/node-linkchecker.js @@ -2,23 +2,19 @@ 'use strict'; // Pseudo-constants: -var DEFAULT_OPTIONS = { +const DEFAULT_OPTIONS = { schemes: ["http:", "https:"], userAgent: "node-linchecker", robotExclusion: true, fragments: true }; -var ua = require("superagent"), +const ua = require("superagent"), whacko = require("whacko"), url = require("url"), - chalk = require("chalk"), - Promise = require('promise'), - options = JSON.parse(JSON.stringify(DEFAULT_OPTIONS)), - list, - result; + Promise = require('promise'); -var linksAttr = { +const linksAttr = { background: ['body'], cite: ['blockquote', 'del', 'ins', 'q'], data: ['object'], @@ -29,72 +25,50 @@ var linksAttr = { poster: ['video'], pluginspage: ['embed'], pluginurl: ['embed'], - src: ['audio', 'embed', 'frame', 'iframe', 'img', 'input', 'script', 'source', 'track', 'video'], + src: ['audio', 'embed', 'frame', 'iframe', 'img', 'input', 'script', 'source', 'track', 'video'] }; -function isSchemeAllowed(url) { - for (var i = 0 ; i < options.schemes.length; i++) { - var scheme = options.schemes[i]; - if (url.protocol === scheme) { - return true; - } - } - return false; -} - -function sortURLs(a, b) { - if (a.href < b.href) return -1 - else if (a.href > b.href) return 1 - else return 0; -} - -// sort array of URL and remove duplicates -function sortUniq(arr) { - return arr.sort(sortURLs).filter(function(item, pos, a) { - return !pos || item.href != a[pos - 1].href; - }); -} - -function extract(baseURL, $) { - for (var attr in linksAttr) { - var elements = linksAttr[attr].map(function(tag) {return tag+'['+attr+']';}).join(','); +function extract(baseURL, $, options) { + const list = { + links : new Set(), + fragments: new Set() + }; + for (let attr in linksAttr) { + const elements = linksAttr[attr].map(tag => `${tag}[${attr}]`).join(','); $(elements).each(function() { if ($(this) !== undefined) { var resolvedUrl = url.parse(url.resolve(baseURL, $(this).attr(attr))); - if (isSchemeAllowed(resolvedUrl)) { + if (options.schemes.includes(resolvedUrl.protocol)) { if (resolvedUrl.hash === null) { - list.links.push(resolvedUrl); + list.links.add(resolvedUrl); } else { - list.fragments.push(resolvedUrl); + list.fragments.add(resolvedUrl); } } } }); } + return list; } -function checkLink(link, method) { +function checkLink(link, method, options) { var req = (method==='get') ? ua.get(link.href) : ua.head(link.href); + req.redirects(3); + const result = { + brokenLinks : [] + }; + return new Promise(function(resolve, reject) { req.set("User-Agent", options.userAgent) .on('error', function(err) { reject(err); }) .end(function(err, res) { - if (!res) { - result.brokenLinks.push({link: link.href, status: 'unknown'}); - } - else { - if (res.headers.location) { // redirect - // superagent doesn't follow the redirect when it's doing a HEAD - // https://github.com/visionmedia/superagent/issues/669 - checkLink(link, 'get'); - } - else if (res.status !== 200) { - result.brokenLinks.push({link: link.href, status: res.status}); - } + const status = res ? res.status : 'unknown'; + if (status !== 200) { + result.brokenLinks.push({link: link.href, status}); } - resolve(); + resolve(result); }); }); } @@ -106,14 +80,18 @@ function hashEscaper(hash) { }); } -function checkFragmentsList(list) { - var fragmentsList = {}; - list.forEach(function(link) { - var fragmentLessURL = link.protocol + '//' + link.host + link.pathname; +function checkFragmentsList(list, options) { + const fragmentsList = {}; + const result = { + brokenFragments: [] + }; + + for (const link of list) { + const fragmentLessURL = link.protocol + '//' + link.host + link.pathname; if (!fragmentsList[fragmentLessURL]) fragmentsList[fragmentLessURL] = []; fragmentsList[fragmentLessURL].push(link.hash); - }); + } var keys = Object.keys(fragmentsList); return new Promise(function(resolve, reject) { @@ -135,31 +113,15 @@ function checkFragmentsList(list) { processLink(index + 1); }); } else { - resolve(); + resolve(result); } - } + }; processLink(0); }); } - - exports.check = function(url, opts) { - options = JSON.parse(JSON.stringify(DEFAULT_OPTIONS)); - if (opts) { - if (opts.hasOwnProperty("userAgent")) options.userAgent = opts.userAgent; - if (opts.hasOwnProperty("schemes")) options.schemes = opts.schemes; - if (opts.hasOwnProperty("robotExclusion")) options.robotExclusion = opts.robotExclusion; - if (opts.hasOwnProperty("fragments")) options.fragments = opts.fragments; - } - list = { - links : [], - fragments: [] - }, - result = { - brokenLinks : [], - brokenFragments: [] - }; + const options = {...DEFAULT_OPTIONS, ...opts}; return new Promise(function(resolve, reject) { ua.get(url) @@ -170,19 +132,27 @@ exports.check = function(url, opts) { .end(function(err, res) { var $ = whacko.load(res.text), baseURL = (res.redirects.length > 0) ? res.redirects[res.redirects.length - 1] : url; - extract(baseURL, $); + const list = extract(baseURL, $, options); var p = []; // links - sortUniq(list.links).forEach(function(link) { - p.push(checkLink(link)); - }); + for (const link of list.links) { + p.push(checkLink(link, 'head', options)); + } // fragments if (options.fragments) { - p.push(checkFragmentsList(sortUniq(list.fragments))); + p.push(checkFragmentsList(list.fragments, options)); } - Promise.all(p).then(function() { - resolve(result); + Promise.all(p).then(function(results) { + const flatResults = results.reduce( + (a,b) => + { return { + brokenLinks: a.brokenLinks.concat(b.brokenLinks || []), + brokenFragments: a.brokenFragments.concat(b.brokenFragments || []) + }; + }, + {brokenLinks:[], brokenFragments: []}); + resolve(flatResults); }); }); }); From bbb1a4f763c0566c0b0d6a74a6941e667aedc23c Mon Sep 17 00:00:00 2001 From: Dominique Hazael-Massieux Date: Mon, 22 Jan 2018 13:56:27 +0100 Subject: [PATCH 2/2] Replace unmaintained whacko dependency by jsdom --- lib/node-linkchecker.js | 34 ++++++++++++++++------------------ package.json | 4 ++-- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/lib/node-linkchecker.js b/lib/node-linkchecker.js index ca550f9..6b8ec91 100755 --- a/lib/node-linkchecker.js +++ b/lib/node-linkchecker.js @@ -10,7 +10,7 @@ const DEFAULT_OPTIONS = { }; const ua = require("superagent"), - whacko = require("whacko"), + {JSDOM} = require('jsdom'), url = require("url"), Promise = require('promise'); @@ -28,25 +28,23 @@ const linksAttr = { src: ['audio', 'embed', 'frame', 'iframe', 'img', 'input', 'script', 'source', 'track', 'video'] }; -function extract(baseURL, $, options) { +function extract(baseURL, doc, options) { const list = { links : new Set(), fragments: new Set() }; for (let attr in linksAttr) { - const elements = linksAttr[attr].map(tag => `${tag}[${attr}]`).join(','); - $(elements).each(function() { - if ($(this) !== undefined) { - var resolvedUrl = url.parse(url.resolve(baseURL, $(this).attr(attr))); - if (options.schemes.includes(resolvedUrl.protocol)) { - if (resolvedUrl.hash === null) { - list.links.add(resolvedUrl); - } else { - list.fragments.add(resolvedUrl); - } + const elementSel = linksAttr[attr].map(tag => `${tag}[${attr}]`).join(','); + for (let el of doc.querySelectorAll(elementSel)) { + const resolvedUrl = url.parse(url.resolve(baseURL, el.getAttribute(attr))); + if (options.schemes.includes(resolvedUrl.protocol)) { + if (resolvedUrl.hash === null) { + list.links.add(resolvedUrl); + } else { + list.fragments.add(resolvedUrl); } } - }); + } } return list; } @@ -103,10 +101,10 @@ function checkFragmentsList(list, options) { reject(err); }) .end(function(err, res) { - var $ = whacko.load(res.text); + const dom = new JSDOM(res.text); fragmentsList[keys[index]].forEach(function(hash) { - var $el = $(hashEscaper(hash).join(",")).first(); - if (!$el.length) { + var el = dom.window.document.querySelector(hashEscaper(hash).join(",")); + if (!el) { result.brokenFragments.push({link: keys[index] + hash, status: res.status}); } }); @@ -130,9 +128,9 @@ exports.check = function(url, opts) { reject(err); }) .end(function(err, res) { - var $ = whacko.load(res.text), + const dom = new JSDOM(res.text), baseURL = (res.redirects.length > 0) ? res.redirects[res.redirects.length - 1] : url; - const list = extract(baseURL, $, options); + const list = extract(baseURL, dom.window.document, options); var p = []; // links for (const link of list.links) { diff --git a/package.json b/package.json index 04f3899..db5f287 100644 --- a/package.json +++ b/package.json @@ -18,10 +18,10 @@ }, "dependencies": { "chalk": "^1.1.1", + "jsdom": "^11.6.0", "promise": "^7.1.1", "resolve-url": "^0.2.1", - "superagent": "^1.7.2", - "whacko": "^0.19.1" + "superagent": "^1.7.2" }, "devDependencies": { "chai": "3.5",