From cebe4a95be4d05382577abfb89e50ce342f2ad70 Mon Sep 17 00:00:00 2001 From: Mohammad Umer Alam Date: Thu, 11 Jul 2024 14:51:47 -0400 Subject: [PATCH 1/4] Sanitizing html in web provider --- package.json | 10 +- pnpm-lock.yaml | 237 ++++++++++++++++++++++++++++++++++++++++-- provider/web/index.ts | 18 +++- 3 files changed, 252 insertions(+), 13 deletions(-) diff --git a/package.json b/package.json index 285c6a01..8082feb7 100644 --- a/package.json +++ b/package.json @@ -32,7 +32,9 @@ "@storybook/html-vite": "^7.6.7", "@storybook/react": "^7.6.7", "@storybook/react-vite": "^8.1.1", + "@types/dompurify": "^3.0.5", "@types/js-yaml": "^4.0.9", + "@types/jsdom": "^21.1.7", "@types/node": "^20", "@types/semver": "^7.5.6", "@vitejs/plugin-react": "^4.2.1", @@ -49,7 +51,9 @@ "vitest": "^1.6.0" }, "stylelint": { - "extends": ["./.config/stylelintrc.json"] + "extends": [ + "./.config/stylelintrc.json" + ] }, "pnpm": { "packageExtensions": { @@ -59,5 +63,9 @@ } } } + }, + "dependencies": { + "dompurify": "^3.1.6", + "jsdom": "^24.1.0" } } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index df4e6e02..540e0a8c 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -9,6 +9,13 @@ packageExtensionsChecksum: 62a717adeb096411cd21ec10eca96ccf importers: .: + dependencies: + dompurify: + specifier: ^3.1.6 + version: 3.1.6 + jsdom: + specifier: ^24.1.0 + version: 24.1.0 devDependencies: '@biomejs/biome': specifier: 1.5.3 @@ -28,9 +35,15 @@ importers: '@storybook/react-vite': specifier: ^8.1.1 version: 8.1.1(prettier@3.2.5)(react-dom@18.2.0)(react@18.2.0)(typescript@5.4.5)(vite@5.2.11) + '@types/dompurify': + specifier: ^3.0.5 + version: 3.0.5 '@types/js-yaml': specifier: ^4.0.9 version: 4.0.9 + '@types/jsdom': + specifier: ^21.1.7 + version: 21.1.7 '@types/node': specifier: ^20 version: 20.10.0 @@ -72,7 +85,7 @@ importers: version: 5.2.11(@types/node@20.10.0) vitest: specifier: ^1.6.0 - version: 1.6.0(@types/node@20.10.0) + version: 1.6.0(@types/node@20.10.0)(jsdom@24.1.0) bin: dependencies: @@ -6357,6 +6370,12 @@ packages: resolution: {integrity: sha512-eOIHzCUSH7SMfonMG1LsC2f8vxBFtho6NGBznK41R84YzPuvSBzrhEps33IsQiOW9+VL6NQ9DbjQJznk/S4uRA==} dev: true + /@types/dompurify@3.0.5: + resolution: {integrity: sha512-1Wg0g3BtQF7sSb27fJQAKck1HECM6zV1EB66j8JH9i3LCjYabJa0FSdiSgsD5K/RbrsR0SiraKacLB+T8ZVYAg==} + dependencies: + '@types/trusted-types': 2.0.7 + dev: true + /@types/ejs@3.1.2: resolution: {integrity: sha512-ZmiaE3wglXVWBM9fyVC17aGPkLo/UgaOjEiI2FXQfyczrCefORPxIe+2dVmnmk3zkVIbizjrlQzmPGhSYGXG5g==} dev: true @@ -6474,6 +6493,14 @@ packages: resolution: {integrity: sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg==} dev: true + /@types/jsdom@21.1.7: + resolution: {integrity: sha512-yOriVnggzrnQ3a9OKOCxaVuSug3w3/SbOj5i7VwXWZEyUNl3bLF9V3MfxGbZKuwqJOQyRfqXyROBB1CoZLFWzA==} + dependencies: + '@types/node': 20.11.20 + '@types/tough-cookie': 4.0.5 + parse5: 7.1.2 + dev: true + /@types/json-schema@7.0.14: resolution: {integrity: sha512-U3PUjAudAdJBeC2pgN8uTIKgxrb4nlDF3SF0++EldXQvQBGkpFZMSnwQiIoDU77tv45VgNkl/L4ouD+rEomujw==} dev: true @@ -6626,6 +6653,14 @@ packages: '@types/node': 20.11.20 dev: true + /@types/tough-cookie@4.0.5: + resolution: {integrity: sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA==} + dev: true + + /@types/trusted-types@2.0.7: + resolution: {integrity: sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==} + dev: true + /@types/tsscmp@1.0.2: resolution: {integrity: sha512-cy7BRSU8GYYgxjcx0Py+8lo5MthuDhlyu076KUcYzVNXL23luYgRHkMG2fIFEc6neckeh/ntP82mw+U4QjZq+g==} dev: false @@ -6765,7 +6800,7 @@ packages: std-env: 3.7.0 strip-literal: 2.0.0 test-exclude: 6.0.0 - vitest: 1.6.0(@types/node@20.10.0) + vitest: 1.6.0(@types/node@20.10.0)(jsdom@24.1.0) transitivePeerDependencies: - supports-color dev: true @@ -8003,6 +8038,12 @@ packages: resolution: {integrity: sha512-FAaLDaplstoRsDR8XGYH51znUN0UY7nMc6Z9/fvE8EXGwvJE9hu7W2vHwx1+bd6gCYnln9nLbzxFTrcO9YQDZw==} dev: false + /cssstyle@4.0.1: + resolution: {integrity: sha512-8ZYiJ3A/3OkDd093CBT/0UKDWry7ak4BdPTFP2+QEP7cmhouyq/Up709ASSj2cK02BbZiMgk7kYjZNS4QP5qrQ==} + engines: {node: '>=18'} + dependencies: + rrweb-cssom: 0.6.0 + /csstype@3.1.0: resolution: {integrity: sha512-uX1KG+x9h5hIJsaKR9xHUeUraxf8IODOwq9JLNPq6BwB04a/xgpq3rcx47l5BZu5zBPlgD342tdke3Hom/nJRA==} @@ -8013,6 +8054,13 @@ packages: type: 1.2.0 dev: true + /data-urls@5.0.0: + resolution: {integrity: sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==} + engines: {node: '>=18'} + dependencies: + whatwg-mimetype: 4.0.0 + whatwg-url: 14.0.0 + /data-view-buffer@1.0.1: resolution: {integrity: sha512-0lht7OugA5x3iJLOWFhWK/5ehONdprk0ISXqVFn/NFrDu+cuc8iADFrGQz5BnRK7LLU3JmkbXSxaqX+/mXYtUA==} engines: {node: '>= 0.4'} @@ -8103,6 +8151,9 @@ packages: engines: {node: '>=10'} dev: true + /decimal.js@10.4.3: + resolution: {integrity: sha512-VBBaLc1MgL5XpzgIP7ny5Z6Nx3UrRkIViUkPUdtl9aya5amy3De1gsUUSB1g3+3sExYNjCAsAznmukyxCb1GRA==} + /decode-named-character-reference@1.0.2: resolution: {integrity: sha512-O8x12RzrUF8xyVcY0KJowWsmaJxQbmy0/EtnNtHRpsOcT7dFk5W598coHqBVpmWo1oQQfsCqfCmkZN5DJrZVdg==} dependencies: @@ -8376,6 +8427,10 @@ packages: dependencies: domelementtype: 2.3.0 + /dompurify@3.1.6: + resolution: {integrity: sha512-cTOAhc36AalkjtBpfG6O8JimdTMWNXjiePT2xQH/ppBGi/4uIpmj8eKyIkMJErXWARyINV/sB38yf8JCLF5pbQ==} + dev: false + /domutils@3.1.0: resolution: {integrity: sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==} dependencies: @@ -9210,7 +9265,6 @@ packages: asynckit: 0.4.0 combined-stream: 1.0.8 mime-types: 2.1.35 - dev: false /forwarded@0.2.0: resolution: {integrity: sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==} @@ -9789,6 +9843,12 @@ packages: lru-cache: 6.0.0 dev: true + /html-encoding-sniffer@4.0.0: + resolution: {integrity: sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==} + engines: {node: '>=18'} + dependencies: + whatwg-encoding: 3.1.1 + /html-escaper@2.0.0: resolution: {integrity: sha512-a4u9BeERWGu/S8JiWEAQcdrg9v4QArtP9keViQjGMdff20fBdd8waotXaNmODqBe6uZ3Nafi7K/ho4gCQHV3Ig==} dev: true @@ -9866,6 +9926,15 @@ packages: - supports-color dev: true + /http-proxy-agent@7.0.2: + resolution: {integrity: sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==} + engines: {node: '>= 14'} + dependencies: + agent-base: 7.1.0 + debug: 4.3.4(supports-color@8.1.1) + transitivePeerDependencies: + - supports-color + /https-proxy-agent@4.0.0: resolution: {integrity: sha512-zoDhWrkR3of1l9QAL8/scJZyLu8j/gBkcwcaQOZh7Gyh/+uJQzGVETdgT30akuwkpL8HTRfssqI3BZuV18teDg==} engines: {node: '>= 6.0.0'} @@ -9895,6 +9964,15 @@ packages: transitivePeerDependencies: - supports-color + /https-proxy-agent@7.0.5: + resolution: {integrity: sha512-1e4Wqeblerz+tMKPIq2EMGiiWW1dIjZOksyHWSUm1rmuvw/how9hBHZ38lAGj5ID4Ik6EdkOw7NmWPy6LAwalw==} + engines: {node: '>= 14'} + dependencies: + agent-base: 7.1.0 + debug: 4.3.4(supports-color@8.1.1) + transitivePeerDependencies: + - supports-color + /human-signals@2.1.0: resolution: {integrity: sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==} engines: {node: '>=10.17.0'} @@ -9911,6 +9989,12 @@ packages: dependencies: safer-buffer: 2.1.2 + /iconv-lite@0.6.3: + resolution: {integrity: sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==} + engines: {node: '>=0.10.0'} + dependencies: + safer-buffer: 2.1.2 + /ieee754@1.2.1: resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==} requiresBuild: true @@ -10247,6 +10331,9 @@ packages: resolution: {integrity: sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==} engines: {node: '>=0.10.0'} + /is-potential-custom-element-name@1.0.1: + resolution: {integrity: sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==} + /is-promise@2.2.2: resolution: {integrity: sha512-+lP4/6lKUBfQjZ2pdxThZvLUAafmZb8OAxFb8XXtiQmS35INgr85hdOGoEs124ez1FCnZJt6jau/T+alh58QFQ==} dev: true @@ -10573,6 +10660,41 @@ packages: - supports-color dev: true + /jsdom@24.1.0: + resolution: {integrity: sha512-6gpM7pRXCwIOKxX47cgOyvyQDN/Eh0f1MeKySBV2xGdKtqJBLj8P25eY3EVCWo2mglDDzozR2r2MW4T+JiNUZA==} + engines: {node: '>=18'} + peerDependencies: + canvas: ^2.11.2 + peerDependenciesMeta: + canvas: + optional: true + dependencies: + cssstyle: 4.0.1 + data-urls: 5.0.0 + decimal.js: 10.4.3 + form-data: 4.0.0 + html-encoding-sniffer: 4.0.0 + http-proxy-agent: 7.0.2 + https-proxy-agent: 7.0.5 + is-potential-custom-element-name: 1.0.1 + nwsapi: 2.2.12 + parse5: 7.1.2 + rrweb-cssom: 0.7.1 + saxes: 6.0.0 + symbol-tree: 3.2.4 + tough-cookie: 4.1.4 + w3c-xmlserializer: 5.0.0 + webidl-conversions: 7.0.0 + whatwg-encoding: 3.1.1 + whatwg-mimetype: 4.0.0 + whatwg-url: 14.0.0 + ws: 8.18.0 + xml-name-validator: 5.0.0 + transitivePeerDependencies: + - bufferutil + - supports-color + - utf-8-validate + /jsesc@0.5.0: resolution: {integrity: sha512-uZz5UnB7u4T9LvwmFqXii7pZSouaRPorGs5who1Ip7VO0wxanFvBL7GkM6dTHlgX+jhBApRetaWpnDabOeTcnA==} hasBin: true @@ -11905,6 +12027,9 @@ packages: dependencies: boolbase: 1.0.0 + /nwsapi@2.2.12: + resolution: {integrity: sha512-qXDmcVlZV4XRtKFzddidpfVP4oMSGhga+xdMc25mv8kaLUHtgzCDhUxkrN8exkGdTlLNaXj7CV3GtON7zuGZ+w==} + /object-assign@4.1.1: resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==} engines: {node: '>=0.10.0'} @@ -12204,7 +12329,6 @@ packages: resolution: {integrity: sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==} dependencies: entities: 4.5.0 - dev: true /parseurl@1.3.3: resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==} @@ -12611,6 +12735,9 @@ packages: /proxy-from-env@1.1.0: resolution: {integrity: sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==} + /psl@1.9.0: + resolution: {integrity: sha512-E/ZsdU4HLs/68gYzgGTkMicWTLPdAftJLfJFlLUAAKZGkStNU72sZjT66SnMDVOfOWY/YAoiD7Jxa9iHvngcag==} + /pump@2.0.1: resolution: {integrity: sha512-ruPMNRkN3MHP1cWJc9OWr+T/xDP0jhXYCLfJcBuX54hhfIBnaQmAUMfDcG4DM5UMWByBbJY69QSphm3jtDKIkA==} dependencies: @@ -12636,7 +12763,10 @@ packages: /punycode@2.3.0: resolution: {integrity: sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA==} engines: {node: '>=6'} - dev: true + + /punycode@2.3.1: + resolution: {integrity: sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==} + engines: {node: '>=6'} /puppeteer-core@2.1.1: resolution: {integrity: sha512-n13AWriBMPYxnpbb6bnaY5YoY6rGj8vPLrz6CZF3o0qJNEwlcfJVxBzYZ0NJsQ21UbdJoijPCDrM++SUVEz7+w==} @@ -12697,6 +12827,9 @@ packages: dependencies: side-channel: 1.0.4 + /querystringify@2.2.0: + resolution: {integrity: sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ==} + /queue-tick@1.0.1: resolution: {integrity: sha512-kJt5qhMxoszgU/62PLP1CJytzd2NKetjSRnyuj31fDd3Rlcz3fzlFdFLD1SItunPwyqEOkca6GbV612BWfaBag==} dev: true @@ -13163,6 +13296,9 @@ packages: engines: {node: '>=0.10.0'} dev: true + /requires-port@1.0.0: + resolution: {integrity: sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ==} + /resolve-from@4.0.0: resolution: {integrity: sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==} engines: {node: '>=4'} @@ -13288,6 +13424,12 @@ packages: fsevents: 2.3.3 dev: false + /rrweb-cssom@0.6.0: + resolution: {integrity: sha512-APM0Gt1KoXBz0iIkkdB/kfvGOwC4UuJFeG/c+yV7wSc7q96cG/kJ0HiYCnzivD9SB53cLV1MlHFNfOuPaadYSw==} + + /rrweb-cssom@0.7.1: + resolution: {integrity: sha512-TrEMa7JGdVm0UThDJSx7ddw5nVm3UJS9o9CCIZ72B1vSyEZoziDqBYP3XIoi/12lKrJR8rE3jeFHMok2F/Mnsg==} + /run-applescript@7.0.0: resolution: {integrity: sha512-9by4Ij99JUr/MCFBUkDKLWK3G9HVXmabKz9U5MlIAIuvuzkiOicRYs8XJLxX+xahD+mLiiCYDqF9dKAgtzKP1A==} engines: {node: '>=18'} @@ -13343,6 +13485,12 @@ packages: resolution: {integrity: sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==} dev: true + /saxes@6.0.0: + resolution: {integrity: sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==} + engines: {node: '>=v12.22.7'} + dependencies: + xmlchars: 2.2.0 + /scheduler@0.23.0: resolution: {integrity: sha512-CtuThmgHNg7zIZWAXi3AsyIzA3n4xx7aNyjwC2VJldO2LMVDhFK+63xGqq6CsJH4rTAt6/M+N4GhZiDYPx9eUw==} dependencies: @@ -13943,6 +14091,9 @@ packages: - utf-8-validate dev: true + /symbol-tree@3.2.4: + resolution: {integrity: sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==} + /synchronous-promise@2.0.17: resolution: {integrity: sha512-AsS729u2RHUfEra9xJrE39peJcc2stq2+poBXX8bcM08Y6g9j/i/PUzwNQqkaJde7Ntg1TO7bSREbR5sdosQ+g==} dev: true @@ -14177,9 +14328,24 @@ packages: engines: {node: '>=6'} dev: false + /tough-cookie@4.1.4: + resolution: {integrity: sha512-Loo5UUvLD9ScZ6jh8beX1T6sO1w2/MpCRpEP7V280GKMVUQ0Jzar2U3UJPsrdbziLEMMhu3Ujnq//rhiFuIeag==} + engines: {node: '>=6'} + dependencies: + psl: 1.9.0 + punycode: 2.3.0 + universalify: 0.2.0 + url-parse: 1.5.10 + /tr46@0.0.3: resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==} + /tr46@5.0.0: + resolution: {integrity: sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==} + engines: {node: '>=18'} + dependencies: + punycode: 2.3.1 + /tree-kill@1.2.2: resolution: {integrity: sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A==} hasBin: true @@ -14528,6 +14694,10 @@ packages: resolution: {integrity: sha512-yCzhz6FN2wU1NiiQRogkTQszlQSlpWaw8SvVegAc+bDxbzHgh1vX8uIe8OYyMH6DwH+sdTJsgMl36+mSMdRJIQ==} dev: false + /universalify@0.2.0: + resolution: {integrity: sha512-CJ1QgKmNg3CwvAv/kOFmtnEN05f0D/cn9QntgNOQlQF9dgvVTHj3t+8JPdjqawCHk7V/KA+fbUqzZ9XWhcqPUg==} + engines: {node: '>= 4.0.0'} + /universalify@1.0.0: resolution: {integrity: sha512-rb6X1W158d7pRQBg5gkR8uPaSfiids68LTJQYOtEUhoJUWBdaQHsuT/EUduxXYxcrt4r5PJ4fuHW1MHT6p0qug==} engines: {node: '>= 10.0.0'} @@ -14587,6 +14757,12 @@ packages: resolution: {integrity: sha512-jk1+QP6ZJqyOiuEI9AEWQfju/nB2Pw466kbA0LEZljHwKeMgd9WrAEgEGxjPDD2+TNbbb37rTyhEfrCXfuKXnA==} dev: true + /url-parse@1.5.10: + resolution: {integrity: sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ==} + dependencies: + querystringify: 2.2.0 + requires-port: 1.0.0 + /url-template@2.0.8: resolution: {integrity: sha512-XdVKMF4SJ0nP/O7XIPB0JwAEuT9lDIYnNsK8yGVe43y0AWoKeJNdv3ZNWh7ksJ6KqQFjOO6ox/VEitLnaVNufw==} dev: false @@ -14837,12 +15013,12 @@ packages: vitest: '>=0.16.0' dependencies: cross-fetch: 3.1.8 - vitest: 1.6.0(@types/node@20.10.0) + vitest: 1.6.0(@types/node@20.10.0)(jsdom@24.1.0) transitivePeerDependencies: - encoding dev: true - /vitest@1.6.0(@types/node@20.10.0): + /vitest@1.6.0(@types/node@20.10.0)(jsdom@24.1.0): resolution: {integrity: sha512-H5r/dN06swuFnzNFhq/dnz37bPXnq8xB2xB5JOVk8K09rUtoeNN+LHWkoQ0A/i3hvbUKKcCei9KpbxqHMLhLLA==} engines: {node: ^18.0.0 || >=20.0.0} hasBin: true @@ -14877,6 +15053,7 @@ packages: chai: 4.3.10 debug: 4.3.4(supports-color@8.1.1) execa: 8.0.1 + jsdom: 24.1.0 local-pkg: 0.5.0 magic-string: 0.30.7 pathe: 1.1.2 @@ -14905,6 +15082,12 @@ packages: /w3c-keyname@2.2.8: resolution: {integrity: sha512-dpojBhNsCNN7T82Tm7k26A6G9ML3NkhDsnw9n/eoxSRlVBB4CEtIQ/KTCLI2Fwf3ataSXRhYFkQi3SlnFwPvPQ==} + /w3c-xmlserializer@5.0.0: + resolution: {integrity: sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==} + engines: {node: '>=18'} + dependencies: + xml-name-validator: 5.0.0 + /walker@1.0.8: resolution: {integrity: sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==} dependencies: @@ -14932,6 +15115,10 @@ packages: /webidl-conversions@3.0.1: resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==} + /webidl-conversions@7.0.0: + resolution: {integrity: sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==} + engines: {node: '>=12'} + /webpack-sources@3.2.3: resolution: {integrity: sha512-/DyMEOrDgLKKIG0fmvtz+4dUX/3Ghozwgm6iPp8KRhvn+eQf9+Q7GWxVNMk3+uCPWfdXYC4ExGBckIXdFEfH1w==} engines: {node: '>=10.13.0'} @@ -14941,6 +15128,23 @@ packages: resolution: {integrity: sha512-poXpCylU7ExuvZK8z+On3kX+S8o/2dQ/SVYueKA0D4WEMXROXgY8Ez50/bQEUmvoSMMrWcrJqCHuhAbsiwg7Dg==} dev: true + /whatwg-encoding@3.1.1: + resolution: {integrity: sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==} + engines: {node: '>=18'} + dependencies: + iconv-lite: 0.6.3 + + /whatwg-mimetype@4.0.0: + resolution: {integrity: sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==} + engines: {node: '>=18'} + + /whatwg-url@14.0.0: + resolution: {integrity: sha512-1lfMEm2IEr7RIV+f4lUNPOqfFL+pO+Xw3fJSqmjX9AbXcXcYOkCe1P6+9VBZB6n94af16NfZf+sSk0JCBZC9aw==} + engines: {node: '>=18'} + dependencies: + tr46: 5.0.0 + webidl-conversions: 7.0.0 + /whatwg-url@5.0.0: resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==} dependencies: @@ -15103,6 +15307,22 @@ packages: optional: true dev: true + /ws@8.18.0: + resolution: {integrity: sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==} + engines: {node: '>=10.0.0'} + peerDependencies: + bufferutil: ^4.0.1 + utf-8-validate: '>=5.0.2' + peerDependenciesMeta: + bufferutil: + optional: true + utf-8-validate: + optional: true + + /xml-name-validator@5.0.0: + resolution: {integrity: sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==} + engines: {node: '>=18'} + /xml2js@0.5.0: resolution: {integrity: sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA==} engines: {node: '>=4.0.0'} @@ -15116,6 +15336,9 @@ packages: engines: {node: '>=4.0'} dev: true + /xmlchars@2.2.0: + resolution: {integrity: sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==} + /xss@1.0.14: resolution: {integrity: sha512-og7TEJhXvn1a7kzZGQ7ETjdQVS2UfZyTlsEdDOqvQF7GoxNfY+0YLCzBy1kPdsDDx4QuNAonQPddpsn6Xl/7sw==} engines: {node: '>= 0.10.0'} diff --git a/provider/web/index.ts b/provider/web/index.ts index c6b925a2..f9a49913 100644 --- a/provider/web/index.ts +++ b/provider/web/index.ts @@ -6,7 +6,8 @@ import type { MetaResult, Provider, } from '@openctx/provider' - +import { JSDOM } from 'jsdom'; +import DOMPurify from 'dompurify'; /** * An OpenCtx provider that fetches the content of a URL and provides it as an item. */ @@ -101,13 +102,20 @@ async function fetchContentForURLContextItem( .replace(/\s(?:class|style)=["'][^"']*["']/gi, '') .replace(/\sdata-[\w-]+(=["'][^"']*["'])?/gi, '') - // TODO(sqs): Arbitrarily trim the response text to avoid overflowing the context window for the + // Create a JSDOM instance to use with DOMPurify +const window = new JSDOM('').window; +const DOMPurifyInstance = DOMPurify(window); + +// Sanitize the content using DOMPurify +const sanitizedContent = DOMPurifyInstance.sanitize(bodyWithoutTags); + + // TODO(sqs): Arbitrarily trim the response text to avoid overflowing the context window for the // LLM. Ideally we would make the prompt builder prioritize this context item over other context // because it is explicitly from the user. const MAX_LENGTH = 14000 - return bodyWithoutTags.length > MAX_LENGTH - ? `${bodyWithoutTags.slice(0, MAX_LENGTH)}... (web page content was truncated)` - : bodyWithoutTags + return sanitizedContent.length > MAX_LENGTH + ? `${sanitizedContent.slice(0, MAX_LENGTH)}... (web page content was truncated)` + : sanitizedContent } /** From 3a4cad76e18b8c0c2f3c7cbeaca6fb0e06ba31f9 Mon Sep 17 00:00:00 2001 From: Mohammad Umer Alam Date: Mon, 22 Jul 2024 11:27:05 -0400 Subject: [PATCH 2/4] reordering import --- provider/web/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/provider/web/index.ts b/provider/web/index.ts index f9a49913..fa12e9e9 100644 --- a/provider/web/index.ts +++ b/provider/web/index.ts @@ -6,8 +6,8 @@ import type { MetaResult, Provider, } from '@openctx/provider' -import { JSDOM } from 'jsdom'; import DOMPurify from 'dompurify'; +import { JSDOM } from 'jsdom'; /** * An OpenCtx provider that fetches the content of a URL and provides it as an item. */ From 0d10e0686e3014c6d7793ce4e96b3c63c699ab02 Mon Sep 17 00:00:00 2001 From: Mohammad Umer Alam Date: Mon, 22 Jul 2024 11:39:47 -0400 Subject: [PATCH 3/4] formatting --- provider/web/index.ts | 212 ++++++++++++++++++++++-------------------- 1 file changed, 112 insertions(+), 100 deletions(-) diff --git a/provider/web/index.ts b/provider/web/index.ts index fa12e9e9..ed7af3b8 100644 --- a/provider/web/index.ts +++ b/provider/web/index.ts @@ -1,121 +1,133 @@ import type { - ItemsParams, - ItemsResult, - MentionsParams, - MentionsResult, - MetaResult, - Provider, -} from '@openctx/provider' -import DOMPurify from 'dompurify'; -import { JSDOM } from 'jsdom'; + ItemsParams, + ItemsResult, + MentionsParams, + MentionsResult, + MetaResult, + Provider, +} from "@openctx/provider"; +import DOMPurify from "dompurify" +import { JSDOM } from "jsdom" /** * An OpenCtx provider that fetches the content of a URL and provides it as an item. */ const urlFetcher: Provider = { - meta(): MetaResult { - return { - name: 'Web URLs', - mentions: { label: 'Paste a URL...' }, - annotations: { selectors: [] }, - } - }, + meta(): MetaResult { + return { + name: "Web URLs", + mentions: { label: "Paste a URL..." }, + annotations: { selectors: [] }, + }; + }, - async mentions(params: MentionsParams): Promise { - const [item] = await fetchItem({ message: params.query }, 2000) - if (!item) { - return [] - } + async mentions(params: MentionsParams): Promise { + const [item] = await fetchItem({ message: params.query }, 2000); + if (!item) { + return []; + } - return [{ title: item.title, uri: item.url || '', data: { content: item.ai?.content } }] - }, + return [ + { + title: item.title, + uri: item.url || "", + data: { content: item.ai?.content }, + }, + ]; + }, - async items(params: ItemsParams): Promise { - return fetchItem(params) - }, -} + async items(params: ItemsParams): Promise { + return fetchItem(params); + }, +}; -async function fetchItem(params: ItemsParams, timeoutMs?: number): Promise { - if (typeof params.mention?.data?.content === 'string') { - return [ - { - ...params.mention, - url: params.mention.uri, - ui: { hover: { text: `Fetched from ${params.mention.uri}` } }, - ai: { content: params.mention.data.content }, - }, - ] - } +async function fetchItem( + params: ItemsParams, + timeoutMs?: number +): Promise { + if (typeof params.mention?.data?.content === "string") { + return [ + { + ...params.mention, + url: params.mention.uri, + ui: { hover: { text: `Fetched from ${params.mention.uri}` } }, + ai: { content: params.mention.data.content }, + }, + ]; + } - const url = params.message || params.mention?.uri - if (!url) { - return [] - } - try { - const content = await fetchContentForURLContextItem( - url, - timeoutMs ? AbortSignal.timeout(timeoutMs) : undefined - ) + const url = params.message || params.mention?.uri; + if (!url) { + return []; + } + try { + const content = await fetchContentForURLContextItem( + url, + timeoutMs ? AbortSignal.timeout(timeoutMs) : undefined + ); - if (content === null) { - return [] - } - return [ - { - url, - title: tryGetHTMLDocumentTitle(content) ?? url, - ui: { hover: { text: `Fetched from ${url}` } }, - ai: { content: content }, - }, - ] - } catch (error) { - // Suppress errors because the user might be typing a URL that is not yet valid. - return [] + if (content === null) { + return []; } + return [ + { + url, + title: tryGetHTMLDocumentTitle(content) ?? url, + ui: { hover: { text: `Fetched from ${url}` } }, + ai: { content: content }, + }, + ]; + } catch (error) { + // Suppress errors because the user might be typing a URL that is not yet valid. + return []; + } } async function fetchContentForURLContextItem( - urlStr: string, - signal?: AbortSignal + urlStr: string, + signal?: AbortSignal ): Promise { - const url = new URL(urlStr) - if (url.protocol !== 'http:' && url.protocol !== 'https:') { - return null - } - if (!/(localhost|\.\w{2,})$/.test(url.hostname)) { - return null - } + const url = new URL(urlStr); + if (url.protocol !== "http:" && url.protocol !== "https:") { + return null; + } + if (!/(localhost|\.\w{2,})$/.test(url.hostname)) { + return null; + } - const resp = await fetch(urlStr, { signal }) - if (!resp.ok) { - return null - } - const body = await resp.text() + const resp = await fetch(urlStr, { signal }); + if (!resp.ok) { + return null; + } + const body = await resp.text(); - // HACK(sqs): Rudimentarily strip HTML tags, script, and other unneeded elements from body using - // regexp. This is NOT intending to be a general-purpose HTML parser and is NOT sanitizing the - // value for security. - const bodyWithoutTags = body - .replace(/)<[^<]*)*<\/script>/gi, '') - .replace(/)<[^<]*)*<\/style>/gi, '') - .replace(/)<[^<]*)*<\/svg>/gi, '') - .replace(//gs, '') - .replace(/\s(?:class|style)=["'][^"']*["']/gi, '') - .replace(/\sdata-[\w-]+(=["'][^"']*["'])?/gi, '') + // HACK(sqs): Rudimentarily strip HTML tags, script, and other unneeded elements from body using + // regexp. This is NOT intending to be a general-purpose HTML parser and is NOT sanitizing the + // value for security. + const bodyWithoutTags = body + .replace(/)<[^<]*)*<\/script>/gi, "") + .replace(/)<[^<]*)*<\/style>/gi, "") + .replace(/)<[^<]*)*<\/svg>/gi, "") + .replace(//gs, "") + .replace(/\s(?:class|style)=["'][^"']*["']/gi, "") + .replace(/\sdata-[\w-]+(=["'][^"']*["'])?/gi, ""); - // Create a JSDOM instance to use with DOMPurify -const window = new JSDOM('').window; -const DOMPurifyInstance = DOMPurify(window); + // Create a JSDOM instance to use with DOMPurify + const window = new JSDOM("").window + const DOMPurifyInstance = DOMPurify(window) -// Sanitize the content using DOMPurify -const sanitizedContent = DOMPurifyInstance.sanitize(bodyWithoutTags); + // Sanitize the content using DOMPurify + const sanitizedContent = DOMPurifyInstance.sanitize(bodyWithoutTags) - // TODO(sqs): Arbitrarily trim the response text to avoid overflowing the context window for the + // TODO(sqs): Arbitrarily trim the response text to avoid overflowing the context window for the // LLM. Ideally we would make the prompt builder prioritize this context item over other context // because it is explicitly from the user. - const MAX_LENGTH = 14000 - return sanitizedContent.length > MAX_LENGTH - ? `${sanitizedContent.slice(0, MAX_LENGTH)}... (web page content was truncated)` - : sanitizedContent + const MAX_LENGTH = 14000; + return sanitizedContent.length > MAX_LENGTH + ? `${sanitizedContent.slice( + 0, + MAX_LENGTH + )}... (web page content was truncated)` + : sanitizedContent; } /** @@ -123,10 +135,10 @@ const sanitizedContent = DOMPurifyInstance.sanitize(bodyWithoutTags); * this feature is experimental and we don't need robustness yet). */ function tryGetHTMLDocumentTitle(html: string): string | undefined { - return html - .match(/(?<title>[^<]+)<\/title>/) - ?.groups?.title.replaceAll(/\s+/gm, ' ') - .trim() + return html + .match(/<title>(?<title>[^<]+)<\/title>/) + ?.groups?.title.replaceAll(/\s+/gm, " ") + .trim(); } -export default urlFetcher +export default urlFetcher; From 2d28ee6674459b05e7046a70e99a6c11c93738fd Mon Sep 17 00:00:00 2001 From: Mohammad Umer Alam <mohammad.alam@sourcegraph.com> Date: Tue, 23 Jul 2024 17:47:02 -0400 Subject: [PATCH 4/4] running biome formatting --- provider/web/index.ts | 208 ++++++++++++++++++++---------------------- 1 file changed, 101 insertions(+), 107 deletions(-) diff --git a/provider/web/index.ts b/provider/web/index.ts index ed7af3b8..34129d6f 100644 --- a/provider/web/index.ts +++ b/provider/web/index.ts @@ -1,118 +1,115 @@ import type { - ItemsParams, - ItemsResult, - MentionsParams, - MentionsResult, - MetaResult, - Provider, -} from "@openctx/provider"; -import DOMPurify from "dompurify" -import { JSDOM } from "jsdom" + ItemsParams, + ItemsResult, + MentionsParams, + MentionsResult, + MetaResult, + Provider, +} from '@openctx/provider' +import DOMPurify from 'dompurify' +import { JSDOM } from 'jsdom' /** * An OpenCtx provider that fetches the content of a URL and provides it as an item. */ const urlFetcher: Provider = { - meta(): MetaResult { - return { - name: "Web URLs", - mentions: { label: "Paste a URL..." }, - annotations: { selectors: [] }, - }; - }, + meta(): MetaResult { + return { + name: 'Web URLs', + mentions: { label: 'Paste a URL...' }, + annotations: { selectors: [] }, + } + }, - async mentions(params: MentionsParams): Promise<MentionsResult> { - const [item] = await fetchItem({ message: params.query }, 2000); - if (!item) { - return []; - } + async mentions(params: MentionsParams): Promise<MentionsResult> { + const [item] = await fetchItem({ message: params.query }, 2000) + if (!item) { + return [] + } - return [ - { - title: item.title, - uri: item.url || "", - data: { content: item.ai?.content }, - }, - ]; - }, + return [ + { + title: item.title, + uri: item.url || '', + data: { content: item.ai?.content }, + }, + ] + }, - async items(params: ItemsParams): Promise<ItemsResult> { - return fetchItem(params); - }, -}; + async items(params: ItemsParams): Promise<ItemsResult> { + return fetchItem(params) + }, +} -async function fetchItem( - params: ItemsParams, - timeoutMs?: number -): Promise<ItemsResult> { - if (typeof params.mention?.data?.content === "string") { - return [ - { - ...params.mention, - url: params.mention.uri, - ui: { hover: { text: `Fetched from ${params.mention.uri}` } }, - ai: { content: params.mention.data.content }, - }, - ]; - } +async function fetchItem(params: ItemsParams, timeoutMs?: number): Promise<ItemsResult> { + if (typeof params.mention?.data?.content === 'string') { + return [ + { + ...params.mention, + url: params.mention.uri, + ui: { hover: { text: `Fetched from ${params.mention.uri}` } }, + ai: { content: params.mention.data.content }, + }, + ] + } - const url = params.message || params.mention?.uri; - if (!url) { - return []; - } - try { - const content = await fetchContentForURLContextItem( - url, - timeoutMs ? AbortSignal.timeout(timeoutMs) : undefined - ); + const url = params.message || params.mention?.uri + if (!url) { + return [] + } + try { + const content = await fetchContentForURLContextItem( + url, + timeoutMs ? AbortSignal.timeout(timeoutMs) : undefined + ) - if (content === null) { - return []; + if (content === null) { + return [] + } + return [ + { + url, + title: tryGetHTMLDocumentTitle(content) ?? url, + ui: { hover: { text: `Fetched from ${url}` } }, + ai: { content: content }, + }, + ] + } catch (error) { + // Suppress errors because the user might be typing a URL that is not yet valid. + return [] } - return [ - { - url, - title: tryGetHTMLDocumentTitle(content) ?? url, - ui: { hover: { text: `Fetched from ${url}` } }, - ai: { content: content }, - }, - ]; - } catch (error) { - // Suppress errors because the user might be typing a URL that is not yet valid. - return []; - } } async function fetchContentForURLContextItem( - urlStr: string, - signal?: AbortSignal + urlStr: string, + signal?: AbortSignal ): Promise<string | null> { - const url = new URL(urlStr); - if (url.protocol !== "http:" && url.protocol !== "https:") { - return null; - } - if (!/(localhost|\.\w{2,})$/.test(url.hostname)) { - return null; - } + const url = new URL(urlStr) + if (url.protocol !== 'http:' && url.protocol !== 'https:') { + return null + } + if (!/(localhost|\.\w{2,})$/.test(url.hostname)) { + return null + } - const resp = await fetch(urlStr, { signal }); - if (!resp.ok) { - return null; - } - const body = await resp.text(); + const resp = await fetch(urlStr, { signal }) + if (!resp.ok) { + return null + } + const body = await resp.text() - // HACK(sqs): Rudimentarily strip HTML tags, script, and other unneeded elements from body using - // regexp. This is NOT intending to be a general-purpose HTML parser and is NOT sanitizing the - // value for security. - const bodyWithoutTags = body - .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, "") - .replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, "") - .replace(/<svg\b[^<]*(?:(?!<\/svg>)<[^<]*)*<\/svg>/gi, "") - .replace(/<!--.*?-->/gs, "") - .replace(/\s(?:class|style)=["'][^"']*["']/gi, "") - .replace(/\sdata-[\w-]+(=["'][^"']*["'])?/gi, ""); + // HACK(sqs): Rudimentarily strip HTML tags, script, and other unneeded elements from body using + // regexp. This is NOT intending to be a general-purpose HTML parser and is NOT sanitizing the + // value for security. + const bodyWithoutTags = body + .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '') + .replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '') + .replace(/<svg\b[^<]*(?:(?!<\/svg>)<[^<]*)*<\/svg>/gi, '') + .replace(/<!--.*?-->/gs, '') + .replace(/\s(?:class|style)=["'][^"']*["']/gi, '') + .replace(/\sdata-[\w-]+(=["'][^"']*["'])?/gi, '') // Create a JSDOM instance to use with DOMPurify - const window = new JSDOM("").window + const window = new JSDOM('').window const DOMPurifyInstance = DOMPurify(window) // Sanitize the content using DOMPurify @@ -121,13 +118,10 @@ async function fetchContentForURLContextItem( // TODO(sqs): Arbitrarily trim the response text to avoid overflowing the context window for the // LLM. Ideally we would make the prompt builder prioritize this context item over other context // because it is explicitly from the user. - const MAX_LENGTH = 14000; - return sanitizedContent.length > MAX_LENGTH - ? `${sanitizedContent.slice( - 0, - MAX_LENGTH - )}... (web page content was truncated)` - : sanitizedContent; + const MAX_LENGTH = 14000 + return sanitizedContent.length > MAX_LENGTH + ? `${sanitizedContent.slice(0, MAX_LENGTH)}... (web page content was truncated)` + : sanitizedContent } /** @@ -135,10 +129,10 @@ async function fetchContentForURLContextItem( * this feature is experimental and we don't need robustness yet). */ function tryGetHTMLDocumentTitle(html: string): string | undefined { - return html - .match(/<title>(?<title>[^<]+)<\/title>/) - ?.groups?.title.replaceAll(/\s+/gm, " ") - .trim(); + return html + .match(/<title>(?<title>[^<]+)<\/title>/) + ?.groups?.title.replaceAll(/\s+/gm, ' ') + .trim() } -export default urlFetcher; +export default urlFetcher