Files
2026-06-26 14:30:45 +02:00

58 lines
2.0 KiB
JavaScript

#!/usr/bin/env node
// Usage: node inspect.js <url> -- Dumps img alt texts + parent class from main content area
const puppeteer = require('puppeteer-extra');
puppeteer.use(require('puppeteer-extra-plugin-stealth')());
(async () => {
var url = process.argv[2];
if (!url) { console.log('Usage: node inspect.js <url>'); process.exit(1); }
var b = await puppeteer.launch({ headless:'new', executablePath:'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', args:['--no-sandbox'] });
var p = await b.newPage();
await p.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36');
console.log('Loading:', url);
await p.goto(url, { waitUntil:'domcontentloaded', timeout:60000 });
await new Promise(r => setTimeout(r, 4000));
console.log('Title:', (await p.title()).substring(0,80),'\n');
var HOST = url.replace(/https?:\/\//,'').split('/')[0].replace('www.','');
var data = await p.evaluate((h) => {
var found = [];
for (var img of document.querySelectorAll('img[alt]')) {
var alt = (img.alt || '').trim();
if (!alt || alt.length < 2) continue;
// Get closest container class info
var parCls = '';
var c = img.closest('[class]');
if (c) parCls = String(c.className).substring(0, 120);
// Walk up looking for off-site link
var cont = img.parentElement, eLink = null;
while (cont && cont !== document.body) {
for (var a of cont.querySelectorAll('a[href]')) {
try {
var u = new URL(a.href, document.baseURI);
if (u.hostname.replace('www.','') !== h) { eLink = u.origin; break; }
} catch(e) {}
}
if (eLink) break;
cont = cont.parentElement;
}
found.push({ alt: alt.substring(0,80), ext: eLink || '-', cls: parCls });
}
return found.slice(0, 60);
}, HOST);
for (var i = 0; i < data.length; i++) {
var d = data[i];
console.log(i + ' | alt="' + d.alt + '" | ext=' + d.ext + ' | cls=' + d.cls.substring(0,80));
}
await p.close();
await b.close();
})();