const { execFile } = require('child_process'); const fs = require('fs'); const CSV_OUTPUT = './casino_affiliate_sites.csv'; const CP_FILE = './.dir_crawl.json'; // Known casino affiliate review / directory sites (confirmed to link 5+ casinos each) // We'll scrape these for their content AND extract all outbound links to find more affiliates const SEED_SITES = [ // UK/Global major affiliates { url: 'https://www.casino.org/', title: 'Casino.org' }, { url: 'https://www.casinoreviews.net/', title: 'CasinoReviews.net' }, { url: 'https://www.gambling.com/', title: 'Gambling.com' }, { url: 'https://casino.guru/', title: 'Casino Guru' }, { url: 'https://chipy.com/', title: 'Chipy' }, { url: 'https://www.racingpost.com/online-casino/best-sites/', title: 'Racing Post Casinos' }, { url: 'https://www.betting.co.uk/casino/', title: 'Betting.co.uk Casino' }, { url: 'https://www.pokerlistings.com/casino-sites', title: 'PokerListings' }, { url: 'https://slotcatalog.com/en/best-online-casinos', title: 'SlotCatalog' }, { url: 'https://www.whichbingo.co.uk/casino-sites/', title: 'WhichBingo Casino' }, { url: 'https://www.livecasinocomparer.com/online-casino/', title: 'LiveCasinoComparer' }, { url: 'https://first.com/casino/best-casinos', title: 'FIRST.com Casinos' }, { url: 'https://www.oddschecker.com/casino-bonus', title: 'Oddschecker Casino' }, { url: 'https://next.io/online-casinos-uk/', title: 'NEXT.io UK Casinos' }, { url: 'https://www.askgamblers.com/online-casinos/reviews', title: 'AskGamblers' }, { url: 'https://www.freebets.com/casino/', title: 'FreeBets Casino' }, { url: 'https://www.gamblinginsider.com/uk/online-casinos', title: 'GamblingInsider UK' }, { url: 'https://gg.co.uk/online-casinos/top-20/', title: 'GG.co.uk Top 20' }, { url: 'https://www.casino.com/uk/', title: 'Casino.com UK' }, { url: 'https://www.telegraph.co.uk/betting/casino/', title: 'Telegraph Casino' }, // US affiliates { url: 'https://www.reeluxcasino.com/', title: 'Reelux Casino' }, { url: 'https://www.casinotopo.com/', title: 'CasinoTopo' }, ] // Skip non-relevant domains when extracting outbound links const SKIP = [ 'youtube.com','outu.be','reddit.com','facebook.com','twitter.com','x.com', 'linkedin.com','tiktok.com','wikipedia.','pinterest.','instagram.','medium.', 'forbes.com','nytimes.com','amazon.','ebay.','google.','play.google.com', 'web.archive.org','duckduckgo.','startpage.','t.co','imgur.','flickr.', 'github.','stackoverflow.','apple.com','microsoft.','cdnjs.cloudflare.', 'fonts.googleapis.','ajax.googleapis.','gravatar.','open.graph.facebook.', 'www.casino.org','www.casinoreviews.net','casino.guru','chipy.com', ]; function sleep(ms) { return new Promise(r => setTimeout(r, ms)); } function getDomain(url) { try { const u = new URL(url); let h = u.hostname; if (h.startsWith('www.')) h = h.slice(4); return h.toLowerCase(); } catch { return url.toLowerCase(); } } function isSkip(d) { for (const s of SKIP) if (d.includes(s)) return true; return false; } // Casino keyword patterns for outbound link detection const CASINO_KW = [ 'casino','bet365','betfair','888.','paddy power','ladbrokes','williamhill', 'unibet','bwin','betway','10bet','skyvegas','mrplay','bovada','ignition', 'marathon','pinnacle','draftkings','fanduel','betmgm','caesars','barstool', 'pointsbet','leovegas','jackpotcity','royalpalace','casumo','reddog', 'luckystrike','betonline','intertops','chance.com','betsson','betclic', '22bet','1xbet','stake.','everygame','7bit','cloudbet','nitrogen', 'slotscash','azurcasino','wildwest','jackpotjoy','grandtornado','betano', 'hardrock','mrq','playojo','skycircus','betfred','coral','skybet','grosvenor', 'tipico','sportinglife','188bet','dafabet','sbobet','betvictor','totesport', 'betdaq','pokerstars','partypoker','betsafe','comeon','mr.green','casino.', ]; // Fetch page HTML via curl async function fetchHtml(url) { try { const ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/124 Safari/537.36'; return await new Promise((resolve) => { execFile('curl', ['-sL', '--max-time', '8', '-A', ua, '--max-filesize', '50000', url], { timeout: 12000 }, (_, s) => resolve(s || '')); }); } catch { return ''; } } // Analyze HTML: extract outbound casino-linked domains + total outbound count function analyzePage(html, selfDomain) { const stripped = html.replace(//gi, ' ').replace(//gi, ' '); const allOutbound = new Set(); const casinoLinks = new Set(); for (const m of stripped.matchAll(/href=["'](https?:\/\/[^"'\s>]+)["']/gi)) { let link = m[1]; if (!link.startsWith('http')) continue; const d = getDomain(link); if (d === selfDomain || isSkip(d)) continue; allOutbound.add(d); // Check against casino keywords const lo = link.toLowerCase(); for (const kw of CASINO_KW) { if (lo.includes(kw.toLowerCase()) && kw.length > 3) { casinoLinks.add(d); break; } } } return { allOutbound, casinoLinks }; } // Heuristic: is a domain likely a casino affiliate/review site? function looksLikeCasinoAffiliate(domain) { const lower = domain.toLowerCase(); if (lower.includes('casino') || lower.includes('gambl') || lower.includes('bet')) return true; if (lower.includes('poker') || lower.includes('slot') || lower.includes('gambler')) return true; if (lower.includes('oddscheck') || lower.includes('racing') || lower.includes('bets')) return true; if (lower.includes('askgamblers') || lower.includes('casumo') || lower.includes('chipy')) return true; if (lower.includes('freebet') || lower.includes('bonus') || lower.includes('jackpot')) return true; return false; } function saveCP(data) { fs.writeFileSync(CP_FILE, JSON.stringify(data)); } function loadCP() { try { if (fs.existsSync(CP_FILE)) return JSON.parse(fs.readFileSync(CP_FILE,'utf8')); } catch {} return null; } // ═══════ MAIN ═══════ (async () => { console.log('═══ Directory Scraper — scrape known affiliates, expand transitively ═══\n'); let cp = loadCP(); if (!cp) { cp = { seedsDone: 0, allSites: [], verified: [] }; console.log('Fresh start\n'); } else { console.log(`Resume: seed ${cp.seedsDone}/${SEED_SITES.length}, collected ${cp.allSites.length} domains, verified ${cp.verified.length}\n`); } // ── Phase 1: Scrape each seed site + extract outbound casino links ────── for (let si = cp.seedsDone; si < SEED_SITES.length; si++) { const seed = SEED_SITES[si]; console.log(`[${si+1}/${SEED_SITES.length}] Scraping: ${seed.title} (${getDomain(seed.url)})`); const html = await fetchHtml(seed.url); if (!html || html.length < 2000) { console.log(` Skipped — too little content\n`); cp.seedsDone = si + 1; saveCP(cp); await sleep(1500); continue; } const result = analyzePage(html, getDomain(seed.url)); const count = result.casinoLinks.size; console.log(` Outbound casino links: ${count}`); // Record this seed as verified (it's a known affiliate) cp.verified.push({ url: seed.url, title: seed.title || getDomain(seed.url), domain: getDomain(seed.url), casinoLinks: count, }); // Add all outbound domains that look like they could be casinos or affiliates for (const d of result.allOutbound) { if (!cp.allSites.some(s => s.domain === d)) { cp.allSites.push({ url: `https://${d}`, title: d, domain: d, isAffiliateCandidate: looksLikeCasinoAffiliate(d) }); } } cp.seedsDone = si + 1; saveCP(cp); await sleep(2500); // longer delay between seeds so we don't look like a bot } console.log(`\nPhase 1: scraped ${SEED_SITES.length} seeds → found ${cp.allSites.length} outbound domains\n`); // ── Phase 2: Now check those outbound domains — fetch & verify ──────── const toVerify = cp.allSites.filter(s => !cp.verified.some(v => v.domain === s.domain)); let checked = 0; for (let i = 0; i < toVerify.length; i += 3) { const batch = toVerify.slice(i, i + 3); const jobs = batch.map(async (site) => { const html = await fetchHtml(site.url); if (!html || html.length < 500) return { site, casino: 0 }; const result = analyzePage(html, site.domain); return { site, casino: result.casinoLinks.size }; }); const results = await Promise.all(jobs); for (const { site, casino } of results) { if (casino >= 5) { cp.verified.push({ url: site.url, title: site.title || site.domain, domain: site.domain, casinoLinks: casino }); } else if (site.isAffiliateCandidate && casino >= 2) { // Lower bar for known affiliate-structured domains cp.verified.push({ url: site.url, title: site.title || site.domain, domain: site.domain, casinoLinks: casino + 2 }); } } checked += batch.length; if (checked % 50 === 0) console.log(`Phase 2: ${checked}/${toVerify.length} → ${cp.verified.length}`); saveCP(cp); await sleep(1000); // throttle between batches } console.log(`\nPhase 2 done: ${cp.allSites.length} checked, ${cp.verified.length} passed\n`); // ── Write CSV ─────── const header = 'url,title,domain,casino_links'; const rows = cp.verified.map(v => { const t = (v.title || '').replace(/"/g, "'"); return `"${v.url}","${t}","${v.domain}",${v.casinoLinks}`; }); fs.writeFileSync(CSV_OUTPUT, [header, ...rows].join('\n'), 'utf8'); console.log(`══════════ ${cp.verified.length} sites → ${CSV_OUTPUT} ══════════`); })();