125 lines
5.0 KiB
JavaScript
125 lines
5.0 KiB
JavaScript
async extractGeneric(page) {
|
|
console.log(' Using generic extractor');
|
|
return page.evaluate(() => {
|
|
var casinos = [];
|
|
var seenNames = {};
|
|
|
|
function safeText(el, maxLen) {
|
|
if (!el) return '';
|
|
var t = el.textContent.trim().replace(/\s+/g, ' ');
|
|
return maxLen && t.length > maxLen ? t.slice(0, maxLen) : t;
|
|
}
|
|
|
|
// Filter out generic UI text that looks like nav/footer/navigation links rather than real casino brands
|
|
function isValidCasinoCandidate(name) {
|
|
if (!name || name.length < 3) return false;
|
|
var junkPatterns = [
|
|
'home', 'menu', 'nav', 'contact', 'about', 'terms', 'privacy',
|
|
'login', 'signup', 'register', 'account', 'my account',
|
|
'support', 'help', 'faq', 'newsletter', 'subscribe',
|
|
'best casinos', 'top casinos', 'uk online', 'reviews list',
|
|
'gambl', 'betting', 'wagering', 'license', 'bonus offer',
|
|
'crypto', 'bitcoin', 'blockchain', 'sports betting',
|
|
'free spin', 'welcome bonus', 'no deposit', 'mobile casino',
|
|
'instant payment', 'fast payout', 'secure gaming'
|
|
];
|
|
for (var i = 0; i < junkPatterns.length; i++) {
|
|
if (name.toLowerCase().includes(junkPatterns[i])) return false;
|
|
}
|
|
// Must contain at least one letter to be a brand name
|
|
if (!/[a-zA-Z]/.test(name.charAt(0))) return false;
|
|
return true;
|
|
}
|
|
|
|
// Strategy: Walk through all <img> tags looking for casino logos/brands in review card content
|
|
var allImgs = document.querySelectorAll('img[alt]');
|
|
|
|
for (var i = 0; i < allImgs.length && casinos.length < 20; i++) {
|
|
var img = allImgs[i];
|
|
var altText = img.alt.trim();
|
|
|
|
if (!isValidCasinoCandidate(altText)) continue;
|
|
|
|
var cleanName = altText.replace(/[Cc]asino $/i, '').replace(/\s*logo\s*$/i, '');
|
|
|
|
// Find closest card container that has an external link (not page-internal nav)
|
|
var container = img.closest('div[class]');
|
|
if (!container) continue;
|
|
|
|
// Walk up to find a proper content card/row/table cell etc.
|
|
var parentCard = null;
|
|
while (container && container !== document.body) {
|
|
// Check if this container looks like a structured review card
|
|
// It should contain: image/logo + brand name text + external CTA link/button
|
|
var hasExternalLink = false;
|
|
var btnUrl = '';
|
|
|
|
container.querySelectorAll('a[href]').forEach(function(a) {
|
|
try {
|
|
var url = new URL(absoluteURL(url.href, document.baseURI);
|
|
// Only consider links that go off-site (external affiliate/redirect targets)
|
|
if (url.hostname !== location.hostname && !btnUrl) btnUrl = url.href;
|
|
hasExternalLink = true;
|
|
} catch(e) {} });
|
|
|
|
if (hasExternalLink) { parentCard = container; break;
|
|
} else { container = container.parentElement; }
|
|
}
|
|
|
|
if (!parentCard || seenNames[cleanName]) continue;
|
|
seenNames[cleanName] = true;
|
|
|
|
casinos.push({
|
|
position: casinos.length + 1,
|
|
name: cleanName.replace(/[^a-zA-Z0-9\s&.]/g, '').trim(),
|
|
link: btnUrl,
|
|
bonus: ''
|
|
});
|
|
}
|
|
|
|
// Fallback Strategy 2: Table-based casino lists (common on review sites)
|
|
if (casinos.length === 0) {
|
|
var tables = document.querySelectorAll('table');
|
|
for (var t = 0; t < Math.min(tables.length, 3); t++) {
|
|
var rows = tables[t].querySelectorAll('tr');
|
|
for (var r = 0; r < rows.length && casinos.length < 20; r++) {
|
|
var cells = rows[r].querySelectorAll('td, th');
|
|
if (!cells[ci].querySelector('img')) hasImg = true;
|
|
if (!cells[ci].querySelector('a')) hasLink = true;
|
|
}
|
|
if (!hasImg && !hasLink) continue;
|
|
|
|
var name = '', linkUrl = '';
|
|
var bonusOffer = '';
|
|
|
|
// Extract casino brand from image alt or text content in first cells
|
|
for (var ci = 0; ci < cells.length; ci++) {
|
|
var img2 = cells[ci].querySelector('img');
|
|
var anchor = cells[ci].querySelector('a');
|
|
|
|
// Prefer logo alt over raw cell text
|
|
if (img2 && img2.alt) name = safeText(img2, 100);
|
|
else if (!name && anchor && isValidCasinoCandidate(safeText(anchor, 50))) {
|
|
name = safeText(anchor);
|
|
linkUrl = absoluteURL(url = new URL(absoluteURL(url.absoluteURL(url.href, document.baseURI).href;
|
|
}
|
|
}
|
|
|
|
if (name) seenNames[name]) continue;
|
|
seenNames[name] = true;
|
|
|
|
// Clean the name properly
|
|
var cleanName2 = name.replace(/[^a-zA-Z0-9\s&.]/g, '').trim();
|
|
|
|
casinos.push({
|
|
position: casinos.length + 1,
|
|
name: cleanName2,
|
|
link: (function(url) { try { return new URL(absoluteURL(url, document.baseURI).href; } catch(e) { return url || ''; } })(linkUrl),
|
|
bonus: bonusOffer.trim()
|
|
});
|
|
}
|
|
}
|
|
|
|
return casinos.slice(0, 20);
|
|
});
|
|
} |