58 lines
2.0 KiB
JavaScript
58 lines
2.0 KiB
JavaScript
#!/usr/bin/env node
|
|
// Usage: node inspect.js <url> -- Dumps img alt texts + parent class from main content area
|
|
const puppeteer = require('puppeteer-extra');
|
|
puppeteer.use(require('puppeteer-extra-plugin-stealth')());
|
|
|
|
(async () => {
|
|
var url = process.argv[2];
|
|
if (!url) { console.log('Usage: node inspect.js <url>'); process.exit(1); }
|
|
|
|
var b = await puppeteer.launch({ headless:'new', executablePath:'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', args:['--no-sandbox'] });
|
|
var p = await b.newPage();
|
|
await p.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36');
|
|
|
|
console.log('Loading:', url);
|
|
await p.goto(url, { waitUntil:'domcontentloaded', timeout:60000 });
|
|
await new Promise(r => setTimeout(r, 4000));
|
|
console.log('Title:', (await p.title()).substring(0,80),'\n');
|
|
|
|
var HOST = url.replace(/https?:\/\//,'').split('/')[0].replace('www.','');
|
|
|
|
var data = await p.evaluate((h) => {
|
|
var found = [];
|
|
for (var img of document.querySelectorAll('img[alt]')) {
|
|
var alt = (img.alt || '').trim();
|
|
if (!alt || alt.length < 2) continue;
|
|
|
|
// Get closest container class info
|
|
var parCls = '';
|
|
var c = img.closest('[class]');
|
|
if (c) parCls = String(c.className).substring(0, 120);
|
|
|
|
// Walk up looking for off-site link
|
|
var cont = img.parentElement, eLink = null;
|
|
while (cont && cont !== document.body) {
|
|
for (var a of cont.querySelectorAll('a[href]')) {
|
|
try {
|
|
var u = new URL(a.href, document.baseURI);
|
|
if (u.hostname.replace('www.','') !== h) { eLink = u.origin; break; }
|
|
} catch(e) {}
|
|
}
|
|
if (eLink) break;
|
|
cont = cont.parentElement;
|
|
}
|
|
|
|
found.push({ alt: alt.substring(0,80), ext: eLink || '-', cls: parCls });
|
|
}
|
|
return found.slice(0, 60);
|
|
}, HOST);
|
|
|
|
for (var i = 0; i < data.length; i++) {
|
|
var d = data[i];
|
|
console.log(i + ' | alt="' + d.alt + '" | ext=' + d.ext + ' | cls=' + d.cls.substring(0,80));
|
|
}
|
|
|
|
await p.close();
|
|
await b.close();
|
|
})();
|