139 lines
5.0 KiB
JavaScript
139 lines
5.0 KiB
JavaScript
#!/usr/bin/env node
|
|
// Site Inspector - compact DOM pattern discovery for casino listing pages
|
|
// Usage: node site-inspector.js "<url>"
|
|
const puppeteer = require('puppeteer-extra');
|
|
puppeteer.use(require('puppeteer-extra-plugin-stealth')());
|
|
|
|
(async () => {
|
|
var url = process.argv[2];
|
|
if (!url) { console.log('Usage: node site-inspector.js "<url>"'); process.exit(1); }
|
|
|
|
var b = await puppeteer.launch({
|
|
headless: 'new',
|
|
executablePath: '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
|
args: ['--no-sandbox']
|
|
});
|
|
var p = await b.newPage();
|
|
await p.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36');
|
|
|
|
console.error('\n>> Loading: ' + url);
|
|
await p.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
|
|
await new Promise(r => setTimeout(r, 4000));
|
|
var title = await p.title();
|
|
console.log('\n=== Title: ' + title.substring(0, 80) + '\n');
|
|
|
|
var HOST = url.replace(/https?:\/\//, '').split('/')[0].replace('www.', '');
|
|
|
|
// --- Phase 1: find card-like containers with imgs + off-site links ---
|
|
var cards = await p.evaluate(function (h) {
|
|
var res = [];
|
|
var main = document.querySelector('main, article, .content, .container, [class*="wrapper"]') || document.body;
|
|
|
|
// look at children of main for card-level divs
|
|
var candidates = [];
|
|
var level1 = main.querySelectorAll(':scope > [class]');
|
|
level1.forEach(function (d) { candidates.push(d); });
|
|
if (candidates.length < 3) {
|
|
document.querySelectorAll('[class]').forEach(function (d) { candidates.push(d); });
|
|
}
|
|
|
|
for (var i = 0; i < candidates.length && res.length < 20; i++) {
|
|
var card = candidates[i];
|
|
var imgs = card.querySelectorAll('img[alt]');
|
|
var alts = [];
|
|
for (var j = 0; j < imgs.length; j++) {
|
|
var a = (imgs[j].alt || '').trim();
|
|
if (a.length >= 2 && a.length < 80) alts.push(a);
|
|
}
|
|
if (!alts.length) continue;
|
|
|
|
// check for off-site links inside card
|
|
var extOrigins = [];
|
|
var seen = {};
|
|
var links = card.querySelectorAll('a[href]');
|
|
for (var k = 0; k < links.length && extOrigins.length < 5; k++) {
|
|
try {
|
|
var u = new URL(links[k].href, document.baseURI);
|
|
var host = u.hostname.replace('www.', '');
|
|
if (host !== h && !/google|facebook|twitter|instagram/i.test(host) && !seen[host]) {
|
|
extOrigins.push(u.origin);
|
|
seen[host] = true;
|
|
}
|
|
} catch (e) {}
|
|
}
|
|
|
|
var hasBonusText = /(?:\d+\s*free\s*spins|FS|[£€$]\d+)/.test(card.textContent || '');
|
|
|
|
res.push({
|
|
cls: String(card.className).substring(0, 80),
|
|
tag: card.tagName,
|
|
alts: alts.slice(0, 6),
|
|
ext: extOrigins.slice(0, 3),
|
|
bonus: hasBonusText
|
|
});
|
|
}
|
|
return res;
|
|
}, HOST);
|
|
|
|
// --- Phase 2: top alt-text patterns across page ---
|
|
var allAlts = await p.evaluate(function () {
|
|
var counts = {};
|
|
document.querySelectorAll('img[alt]').forEach(function (img) {
|
|
var a = (img.alt || '').trim();
|
|
if (a.length >= 2 && a.length < 80) counts[a] = (counts[a] || 0) + 1;
|
|
});
|
|
var entries = Object.entries(counts);
|
|
entries.sort(function (a, b) { return b[1] - a[1]; });
|
|
return entries.slice(0, 25);
|
|
});
|
|
|
|
// --- Output ---
|
|
if (cards.length) {
|
|
console.log('=== CARD CONTAINERS ===\n');
|
|
for (var c = 0; c < cards.length; c++) {
|
|
var cd = cards[c];
|
|
console.log('Card ' + (c + 1) + ': <' + cd.tag + ' class="' + cd.cls + '">');
|
|
console.log(' imgs: ' + JSON.stringify(cd.alts));
|
|
if (cd.ext.length) console.log(' ext: ' + cd.ext.join(', '));
|
|
if (cd.bonus) console.log(' bonus: text found');
|
|
console.log('');
|
|
}
|
|
} else {
|
|
console.log('(no card containers detected)\n');
|
|
}
|
|
|
|
if (allAlts.length) {
|
|
console.log('=== ALT TEXT PATTERNS ===\n');
|
|
for (var k = 0; k < allAlts.length; k++) {
|
|
var t = allAlts[k][0], n = allAlts[k][1];
|
|
var mark = /casino/i.test(t) ? ' *CASINO*' : '';
|
|
console.log(' x' + String(n).padStart(3, ' ') + ' "' + t + '"' + mark);
|
|
}
|
|
}
|
|
|
|
// --- recommended selector hints ---
|
|
console.log('\n=== HINTS ===\n');
|
|
var casinoCards = cards.filter(function (cd) {
|
|
return cd.alts.some(function (a) { return /casino/i.test(a); });
|
|
});
|
|
if (casinoCards.length) {
|
|
var clsArr = casinoCards[0].cls.trim().split(/\s+/);
|
|
console.log('Card class hint: .' + clsArr[0]);
|
|
if (clsArr.length > 1) console.log('Full class: ' + casinoCards[0].cls);
|
|
console.log('Has ext links: ' + !!casinoCards[0].ext.length);
|
|
var sampleAlt = casinoCards[0].alts.filter(function (a) { return /casino/i.test(a); })[0] || '';
|
|
if (sampleAlt) {
|
|
console.log('Sample alt: "' + sampleAlt + '"');
|
|
// suggest pattern
|
|
var stripped = sampleAlt.replace(/\s+C[aA]sino\b.*/i, '');
|
|
console.log('Brand would be: "' + stripped + '"');
|
|
}
|
|
} else {
|
|
console.log('No casino-card containers found.');
|
|
console.log('Check alt-text list for patterns.');
|
|
}
|
|
|
|
await p.close();
|
|
await b.close();
|
|
})();
|