From 9d6a4d0d399e040d7fcc871ae6d074108e69b115 Mon Sep 17 00:00:00 2001 From: Awvyyy Date: Sun, 5 Apr 2026 15:39:39 +0300 Subject: [PATCH] feat: improve cookie clicker example --- scrapeOtherComics.js | 159 ++++++++++++++++++++++++++++++++++ src/components/LeafletMap.vue | 61 ++++++++++--- src/pages/CookieClicker.vue | 137 +++++++++++++++++++++-------- src/pages/Leaflet.vue | 28 ++++-- 4 files changed, 332 insertions(+), 53 deletions(-) create mode 100644 scrapeOtherComics.js diff --git a/scrapeOtherComics.js b/scrapeOtherComics.js new file mode 100644 index 0000000..c95794d --- /dev/null +++ b/scrapeOtherComics.js @@ -0,0 +1,159 @@ +import axios from "axios"; +import * as cheerio from 'cheerio'; +import fs from 'fs'; +import md5 from "md5"; + +const sleep = function(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +if (!fs.existsSync('cache')) { + fs.mkdirSync('cache'); +} + +const cacheGet = (name) => { + if (fs.existsSync('cache/' + name + '.html')) { + return fs.readFileSync('cache/' + name + '.html', 'utf8'); + } + + return false; +} + +const cacheSet = (name, value) => { + fs.writeFileSync('cache/' + name + '.html', value); +} + +const getHtml = async (url) => { + let data = cacheGet(md5(url)); + + if (!data) { + await sleep(1000); + console.log('!!!!! LIVE DATA', url); + let res = await axios.get(url, { + headers: { + 'User-Agent': 'Mozilla/5.0' + } + }); + data = res.data; + cacheSet(md5(url), data); + } + + return data; +} + +const absoluteUrl = (baseUrl, url) => { + if (!url) { + return ''; + } + + return new URL(url, baseUrl).href; +} + +const cleanText = (value = '') => { + return value.replace(/\s+/g, ' ').trim(); +} + +const findComicImage = ($, baseUrl) => { + const imageSelectors = [ + 'meta[property="og:image"]', + 'meta[name="twitter:image"]', + '#comic img', + '#strip', + '.entry-content img', + '.post-content img', + 'article img', + '.comic img', + '.entry img', + ]; + + for (let selector of imageSelectors) { + let element = $(selector).first(); + + if (!element.length) { + continue; + } + + if (element.is('meta')) { + let src = element.attr('content'); + if (src) { + return { + src: absoluteUrl(baseUrl, src), + alt: '', + }; + } + } + + let src = element.attr('src'); + if (src) { + return { + src: absoluteUrl(baseUrl, src), + alt: cleanText(element.attr('alt')), + }; + } + } + + let fallback = $('img').filter((index, element) => { + let src = $(element).attr('src') || ''; + + return src && + !src.includes('logo') && + !src.includes('rss') && + !src.includes('facebook') && + !src.includes('twitter') && + !src.includes('patreon'); + }).first(); + + return { + src: absoluteUrl(baseUrl, fallback.attr('src')), + alt: cleanText(fallback.attr('alt')), + }; +} + +const scrapeButtersafe = async () => { + console.log('--- BUTTERSAFE ---'); + + let url = 'https://buttersafe.com/'; + + for (let i = 0; i < 10 && url; i++) { + let data = await getHtml(url); + let $ = cheerio.load(data); + let image = findComicImage($, url); + let title = cleanText($('h2 a, h2, article h1, h1').first().text()) || cleanText($('title').text()); + let prevUrl = $('a[rel="prev"]').attr('href') || $('a.prev').attr('href') || $('a').filter((index, element) => cleanText($(element).text()) === '«').first().attr('href'); + + console.log(image.src, title, image.alt); + + url = absoluteUrl(url, prevUrl); + } +} + +const scrapeQuestionableContent = async () => { + console.log('--- QUESTIONABLE CONTENT ---'); + + const archiveUrl = 'https://www.questionablecontent.net/archive.php'; + const archiveData = await getHtml(archiveUrl); + const $ = cheerio.load(archiveData); + const comicLinks = []; + + $('a[href*="view.php?comic="]').each((index, element) => { + if (comicLinks.length >= 10) { + return false; + } + + comicLinks.push({ + url: absoluteUrl(archiveUrl, $(element).attr('href')), + title: cleanText($(element).text()), + }); + }); + + for (let comic of comicLinks) { + let data = await getHtml(comic.url); + let $comic = cheerio.load(data); + let image = findComicImage($comic, comic.url); + + console.log(image.src, comic.title, image.alt); + } +} + +await scrapeButtersafe(); +await scrapeQuestionableContent(); diff --git a/src/components/LeafletMap.vue b/src/components/LeafletMap.vue index 1b14ba2..f5a46b2 100644 --- a/src/components/LeafletMap.vue +++ b/src/components/LeafletMap.vue @@ -1,29 +1,62 @@