Initial commit
This commit is contained in:
+57
@@ -0,0 +1,57 @@
|
||||
#!/usr/bin/env node
|
||||
// Usage: node inspect.js <url> -- Dumps img alt texts + parent class from main content area
|
||||
const puppeteer = require('puppeteer-extra');
|
||||
puppeteer.use(require('puppeteer-extra-plugin-stealth')());
|
||||
|
||||
(async () => {
|
||||
var url = process.argv[2];
|
||||
if (!url) { console.log('Usage: node inspect.js <url>'); process.exit(1); }
|
||||
|
||||
var b = await puppeteer.launch({ headless:'new', executablePath:'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', args:['--no-sandbox'] });
|
||||
var p = await b.newPage();
|
||||
await p.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36');
|
||||
|
||||
console.log('Loading:', url);
|
||||
await p.goto(url, { waitUntil:'domcontentloaded', timeout:60000 });
|
||||
await new Promise(r => setTimeout(r, 4000));
|
||||
console.log('Title:', (await p.title()).substring(0,80),'\n');
|
||||
|
||||
var HOST = url.replace(/https?:\/\//,'').split('/')[0].replace('www.','');
|
||||
|
||||
var data = await p.evaluate((h) => {
|
||||
var found = [];
|
||||
for (var img of document.querySelectorAll('img[alt]')) {
|
||||
var alt = (img.alt || '').trim();
|
||||
if (!alt || alt.length < 2) continue;
|
||||
|
||||
// Get closest container class info
|
||||
var parCls = '';
|
||||
var c = img.closest('[class]');
|
||||
if (c) parCls = String(c.className).substring(0, 120);
|
||||
|
||||
// Walk up looking for off-site link
|
||||
var cont = img.parentElement, eLink = null;
|
||||
while (cont && cont !== document.body) {
|
||||
for (var a of cont.querySelectorAll('a[href]')) {
|
||||
try {
|
||||
var u = new URL(a.href, document.baseURI);
|
||||
if (u.hostname.replace('www.','') !== h) { eLink = u.origin; break; }
|
||||
} catch(e) {}
|
||||
}
|
||||
if (eLink) break;
|
||||
cont = cont.parentElement;
|
||||
}
|
||||
|
||||
found.push({ alt: alt.substring(0,80), ext: eLink || '-', cls: parCls });
|
||||
}
|
||||
return found.slice(0, 60);
|
||||
}, HOST);
|
||||
|
||||
for (var i = 0; i < data.length; i++) {
|
||||
var d = data[i];
|
||||
console.log(i + ' | alt="' + d.alt + '" | ext=' + d.ext + ' | cls=' + d.cls.substring(0,80));
|
||||
}
|
||||
|
||||
await p.close();
|
||||
await b.close();
|
||||
})();
|
||||
Reference in New Issue
Block a user