Initial commit
This commit is contained in:
@@ -0,0 +1,138 @@
|
||||
#!/usr/bin/env node
|
||||
// Site Inspector - compact DOM pattern discovery for casino listing pages
|
||||
// Usage: node site-inspector.js "<url>"
|
||||
const puppeteer = require('puppeteer-extra');
|
||||
puppeteer.use(require('puppeteer-extra-plugin-stealth')());
|
||||
|
||||
(async () => {
|
||||
var url = process.argv[2];
|
||||
if (!url) { console.log('Usage: node site-inspector.js "<url>"'); process.exit(1); }
|
||||
|
||||
var b = await puppeteer.launch({
|
||||
headless: 'new',
|
||||
executablePath: '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
||||
args: ['--no-sandbox']
|
||||
});
|
||||
var p = await b.newPage();
|
||||
await p.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36');
|
||||
|
||||
console.error('\n>> Loading: ' + url);
|
||||
await p.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
|
||||
await new Promise(r => setTimeout(r, 4000));
|
||||
var title = await p.title();
|
||||
console.log('\n=== Title: ' + title.substring(0, 80) + '\n');
|
||||
|
||||
var HOST = url.replace(/https?:\/\//, '').split('/')[0].replace('www.', '');
|
||||
|
||||
// --- Phase 1: find card-like containers with imgs + off-site links ---
|
||||
var cards = await p.evaluate(function (h) {
|
||||
var res = [];
|
||||
var main = document.querySelector('main, article, .content, .container, [class*="wrapper"]') || document.body;
|
||||
|
||||
// look at children of main for card-level divs
|
||||
var candidates = [];
|
||||
var level1 = main.querySelectorAll(':scope > [class]');
|
||||
level1.forEach(function (d) { candidates.push(d); });
|
||||
if (candidates.length < 3) {
|
||||
document.querySelectorAll('[class]').forEach(function (d) { candidates.push(d); });
|
||||
}
|
||||
|
||||
for (var i = 0; i < candidates.length && res.length < 20; i++) {
|
||||
var card = candidates[i];
|
||||
var imgs = card.querySelectorAll('img[alt]');
|
||||
var alts = [];
|
||||
for (var j = 0; j < imgs.length; j++) {
|
||||
var a = (imgs[j].alt || '').trim();
|
||||
if (a.length >= 2 && a.length < 80) alts.push(a);
|
||||
}
|
||||
if (!alts.length) continue;
|
||||
|
||||
// check for off-site links inside card
|
||||
var extOrigins = [];
|
||||
var seen = {};
|
||||
var links = card.querySelectorAll('a[href]');
|
||||
for (var k = 0; k < links.length && extOrigins.length < 5; k++) {
|
||||
try {
|
||||
var u = new URL(links[k].href, document.baseURI);
|
||||
var host = u.hostname.replace('www.', '');
|
||||
if (host !== h && !/google|facebook|twitter|instagram/i.test(host) && !seen[host]) {
|
||||
extOrigins.push(u.origin);
|
||||
seen[host] = true;
|
||||
}
|
||||
} catch (e) {}
|
||||
}
|
||||
|
||||
var hasBonusText = /(?:\d+\s*free\s*spins|FS|[£€$]\d+)/.test(card.textContent || '');
|
||||
|
||||
res.push({
|
||||
cls: String(card.className).substring(0, 80),
|
||||
tag: card.tagName,
|
||||
alts: alts.slice(0, 6),
|
||||
ext: extOrigins.slice(0, 3),
|
||||
bonus: hasBonusText
|
||||
});
|
||||
}
|
||||
return res;
|
||||
}, HOST);
|
||||
|
||||
// --- Phase 2: top alt-text patterns across page ---
|
||||
var allAlts = await p.evaluate(function () {
|
||||
var counts = {};
|
||||
document.querySelectorAll('img[alt]').forEach(function (img) {
|
||||
var a = (img.alt || '').trim();
|
||||
if (a.length >= 2 && a.length < 80) counts[a] = (counts[a] || 0) + 1;
|
||||
});
|
||||
var entries = Object.entries(counts);
|
||||
entries.sort(function (a, b) { return b[1] - a[1]; });
|
||||
return entries.slice(0, 25);
|
||||
});
|
||||
|
||||
// --- Output ---
|
||||
if (cards.length) {
|
||||
console.log('=== CARD CONTAINERS ===\n');
|
||||
for (var c = 0; c < cards.length; c++) {
|
||||
var cd = cards[c];
|
||||
console.log('Card ' + (c + 1) + ': <' + cd.tag + ' class="' + cd.cls + '">');
|
||||
console.log(' imgs: ' + JSON.stringify(cd.alts));
|
||||
if (cd.ext.length) console.log(' ext: ' + cd.ext.join(', '));
|
||||
if (cd.bonus) console.log(' bonus: text found');
|
||||
console.log('');
|
||||
}
|
||||
} else {
|
||||
console.log('(no card containers detected)\n');
|
||||
}
|
||||
|
||||
if (allAlts.length) {
|
||||
console.log('=== ALT TEXT PATTERNS ===\n');
|
||||
for (var k = 0; k < allAlts.length; k++) {
|
||||
var t = allAlts[k][0], n = allAlts[k][1];
|
||||
var mark = /casino/i.test(t) ? ' *CASINO*' : '';
|
||||
console.log(' x' + String(n).padStart(3, ' ') + ' "' + t + '"' + mark);
|
||||
}
|
||||
}
|
||||
|
||||
// --- recommended selector hints ---
|
||||
console.log('\n=== HINTS ===\n');
|
||||
var casinoCards = cards.filter(function (cd) {
|
||||
return cd.alts.some(function (a) { return /casino/i.test(a); });
|
||||
});
|
||||
if (casinoCards.length) {
|
||||
var clsArr = casinoCards[0].cls.trim().split(/\s+/);
|
||||
console.log('Card class hint: .' + clsArr[0]);
|
||||
if (clsArr.length > 1) console.log('Full class: ' + casinoCards[0].cls);
|
||||
console.log('Has ext links: ' + !!casinoCards[0].ext.length);
|
||||
var sampleAlt = casinoCards[0].alts.filter(function (a) { return /casino/i.test(a); })[0] || '';
|
||||
if (sampleAlt) {
|
||||
console.log('Sample alt: "' + sampleAlt + '"');
|
||||
// suggest pattern
|
||||
var stripped = sampleAlt.replace(/\s+C[aA]sino\b.*/i, '');
|
||||
console.log('Brand would be: "' + stripped + '"');
|
||||
}
|
||||
} else {
|
||||
console.log('No casino-card containers found.');
|
||||
console.log('Check alt-text list for patterns.');
|
||||
}
|
||||
|
||||
await p.close();
|
||||
await b.close();
|
||||
})();
|
||||
Reference in New Issue
Block a user