Initial commit
This commit is contained in:
@@ -0,0 +1,125 @@
|
||||
async extractGeneric(page) {
|
||||
console.log(' Using generic extractor');
|
||||
return page.evaluate(() => {
|
||||
var casinos = [];
|
||||
var seenNames = {};
|
||||
|
||||
function safeText(el, maxLen) {
|
||||
if (!el) return '';
|
||||
var t = el.textContent.trim().replace(/\s+/g, ' ');
|
||||
return maxLen && t.length > maxLen ? t.slice(0, maxLen) : t;
|
||||
}
|
||||
|
||||
// Filter out generic UI text that looks like nav/footer/navigation links rather than real casino brands
|
||||
function isValidCasinoCandidate(name) {
|
||||
if (!name || name.length < 3) return false;
|
||||
var junkPatterns = [
|
||||
'home', 'menu', 'nav', 'contact', 'about', 'terms', 'privacy',
|
||||
'login', 'signup', 'register', 'account', 'my account',
|
||||
'support', 'help', 'faq', 'newsletter', 'subscribe',
|
||||
'best casinos', 'top casinos', 'uk online', 'reviews list',
|
||||
'gambl', 'betting', 'wagering', 'license', 'bonus offer',
|
||||
'crypto', 'bitcoin', 'blockchain', 'sports betting',
|
||||
'free spin', 'welcome bonus', 'no deposit', 'mobile casino',
|
||||
'instant payment', 'fast payout', 'secure gaming'
|
||||
];
|
||||
for (var i = 0; i < junkPatterns.length; i++) {
|
||||
if (name.toLowerCase().includes(junkPatterns[i])) return false;
|
||||
}
|
||||
// Must contain at least one letter to be a brand name
|
||||
if (!/[a-zA-Z]/.test(name.charAt(0))) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Strategy: Walk through all <img> tags looking for casino logos/brands in review card content
|
||||
var allImgs = document.querySelectorAll('img[alt]');
|
||||
|
||||
for (var i = 0; i < allImgs.length && casinos.length < 20; i++) {
|
||||
var img = allImgs[i];
|
||||
var altText = img.alt.trim();
|
||||
|
||||
if (!isValidCasinoCandidate(altText)) continue;
|
||||
|
||||
var cleanName = altText.replace(/[Cc]asino $/i, '').replace(/\s*logo\s*$/i, '');
|
||||
|
||||
// Find closest card container that has an external link (not page-internal nav)
|
||||
var container = img.closest('div[class]');
|
||||
if (!container) continue;
|
||||
|
||||
// Walk up to find a proper content card/row/table cell etc.
|
||||
var parentCard = null;
|
||||
while (container && container !== document.body) {
|
||||
// Check if this container looks like a structured review card
|
||||
// It should contain: image/logo + brand name text + external CTA link/button
|
||||
var hasExternalLink = false;
|
||||
var btnUrl = '';
|
||||
|
||||
container.querySelectorAll('a[href]').forEach(function(a) {
|
||||
try {
|
||||
var url = new URL(absoluteURL(url.href, document.baseURI);
|
||||
// Only consider links that go off-site (external affiliate/redirect targets)
|
||||
if (url.hostname !== location.hostname && !btnUrl) btnUrl = url.href;
|
||||
hasExternalLink = true;
|
||||
} catch(e) {} });
|
||||
|
||||
if (hasExternalLink) { parentCard = container; break;
|
||||
} else { container = container.parentElement; }
|
||||
}
|
||||
|
||||
if (!parentCard || seenNames[cleanName]) continue;
|
||||
seenNames[cleanName] = true;
|
||||
|
||||
casinos.push({
|
||||
position: casinos.length + 1,
|
||||
name: cleanName.replace(/[^a-zA-Z0-9\s&.]/g, '').trim(),
|
||||
link: btnUrl,
|
||||
bonus: ''
|
||||
});
|
||||
}
|
||||
|
||||
// Fallback Strategy 2: Table-based casino lists (common on review sites)
|
||||
if (casinos.length === 0) {
|
||||
var tables = document.querySelectorAll('table');
|
||||
for (var t = 0; t < Math.min(tables.length, 3); t++) {
|
||||
var rows = tables[t].querySelectorAll('tr');
|
||||
for (var r = 0; r < rows.length && casinos.length < 20; r++) {
|
||||
var cells = rows[r].querySelectorAll('td, th');
|
||||
if (!cells[ci].querySelector('img')) hasImg = true;
|
||||
if (!cells[ci].querySelector('a')) hasLink = true;
|
||||
}
|
||||
if (!hasImg && !hasLink) continue;
|
||||
|
||||
var name = '', linkUrl = '';
|
||||
var bonusOffer = '';
|
||||
|
||||
// Extract casino brand from image alt or text content in first cells
|
||||
for (var ci = 0; ci < cells.length; ci++) {
|
||||
var img2 = cells[ci].querySelector('img');
|
||||
var anchor = cells[ci].querySelector('a');
|
||||
|
||||
// Prefer logo alt over raw cell text
|
||||
if (img2 && img2.alt) name = safeText(img2, 100);
|
||||
else if (!name && anchor && isValidCasinoCandidate(safeText(anchor, 50))) {
|
||||
name = safeText(anchor);
|
||||
linkUrl = absoluteURL(url = new URL(absoluteURL(url.absoluteURL(url.href, document.baseURI).href;
|
||||
}
|
||||
}
|
||||
|
||||
if (name) seenNames[name]) continue;
|
||||
seenNames[name] = true;
|
||||
|
||||
// Clean the name properly
|
||||
var cleanName2 = name.replace(/[^a-zA-Z0-9\s&.]/g, '').trim();
|
||||
|
||||
casinos.push({
|
||||
position: casinos.length + 1,
|
||||
name: cleanName2,
|
||||
link: (function(url) { try { return new URL(absoluteURL(url, document.baseURI).href; } catch(e) { return url || ''; } })(linkUrl),
|
||||
bonus: bonusOffer.trim()
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return casinos.slice(0, 20);
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user