#!/usr/bin/env node // Usage: node inspect.js -- Dumps img alt texts + parent class from main content area const puppeteer = require('puppeteer-extra'); puppeteer.use(require('puppeteer-extra-plugin-stealth')()); (async () => { var url = process.argv[2]; if (!url) { console.log('Usage: node inspect.js '); process.exit(1); } var b = await puppeteer.launch({ headless:'new', executablePath:'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', args:['--no-sandbox'] }); var p = await b.newPage(); await p.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'); console.log('Loading:', url); await p.goto(url, { waitUntil:'domcontentloaded', timeout:60000 }); await new Promise(r => setTimeout(r, 4000)); console.log('Title:', (await p.title()).substring(0,80),'\n'); var HOST = url.replace(/https?:\/\//,'').split('/')[0].replace('www.',''); var data = await p.evaluate((h) => { var found = []; for (var img of document.querySelectorAll('img[alt]')) { var alt = (img.alt || '').trim(); if (!alt || alt.length < 2) continue; // Get closest container class info var parCls = ''; var c = img.closest('[class]'); if (c) parCls = String(c.className).substring(0, 120); // Walk up looking for off-site link var cont = img.parentElement, eLink = null; while (cont && cont !== document.body) { for (var a of cont.querySelectorAll('a[href]')) { try { var u = new URL(a.href, document.baseURI); if (u.hostname.replace('www.','') !== h) { eLink = u.origin; break; } } catch(e) {} } if (eLink) break; cont = cont.parentElement; } found.push({ alt: alt.substring(0,80), ext: eLink || '-', cls: parCls }); } return found.slice(0, 60); }, HOST); for (var i = 0; i < data.length; i++) { var d = data[i]; console.log(i + ' | alt="' + d.alt + '" | ext=' + d.ext + ' | cls=' + d.cls.substring(0,80)); } await p.close(); await b.close(); })();