Files
crawler/find-casino-affiliates.js
T
2026-06-26 14:30:45 +02:00

302 lines
13 KiB
JavaScript

const { execFile } = require('child_process');
const fs = require('fs');
const util = require('util');
const execFileP = (cmd, args) => new Promise((resolve, reject) => {
const proc = execFile(cmd, args, { timeout: 15000 }, (err, stdout) => {
if (err) return resolve({ err, stdout });
resolve({ err: null, stdout });
});
});
const BASE = 'https://search.griffin.pm';
const CSV_FILE = './casino_affiliate_sites.csv';
// ── 120+ queries across regions, niches, languages ───────────────
const Q = [
// EN global broad
"best online casino review site",
"top casino reviews ranked website comparison",
"online casino bonus comparison portal rated all",
"compare rate casinos directory platform reviewed best",
"independent casino review trusted expert tested website",
"safest licensed online casino watchdog reviewed list",
"comprehensive casino guide reviews rating portal list all",
"professional tested casinos ranked listed website",
"community player rated casino review platform site top",
"fast payout online casino websites compared list best",
"high roller VIP casino review comparison website site",
"newly launched online casino reviewed sites 2025 list all",
"best mobile casino app reviewed site 2025 top",
"live dealer casino reviewed ranked listed sites",
"cryptocurrency crypto bitcoin casino review website compared all",
"no deposit bonus casino aggregator rated list compared",
"free spins casino offers comparison reviewed 2025",
"casino hub guides links site information platform",
"gambling publisher content platform published listed best",
"top paying online casino websites ranked rated list best",
// US / CA
"real money online casinos USA reviewed ranked compared",
"legal regulated casinos USA states list compared best",
"New Jersey casino sites reviewed ranked rated NJ",
"Pennsylvania PA online casino sites reviewed 2025",
"Michigan MI legal casino reviewed top all compared",
"Delaware West Virginia online casino websites reviewed list",
"sweepstakes social casino reviewed free coins site USA",
"Canada best online casino sites reviewed compared listed",
"Ontario regulated iGaming casinos website reviewed best ON",
// UK / IE
"UK best online casino review websites GC licensed compared",
"British recommended casino websites tested rated list",
"Irish online best casinos reviewed site ranked list",
// AU / NZ
"Australia real money pokies casino website ranked rated",
"New Zealand best internet casinos reviewed listed all",
// DE/AT/CH
"beste online casino deutschland getestet websiten verglichen liste",
"online casino osterreich schweiz webseite vergleich tested beste",
"casino gmbh vergleich online deutschland website liste beste",
// NL/BE
"beste casino online nederland getest vergeleken websitelijst",
"belgie gelicentieerd online casino website beoordeeld vergelijk",
// FR
"meilleurs casinos en ligne france testes compares avis site liste",
"casinos internet legaux FRANCE comparees meilleures websites testees",
// ES/PT/IT
"mejores casinos internet esaa webs comparativas listados resenas",
"melhores cassino online portugal avaliados comparados melhor lista",
"migliori siti casino online italiani confrontati recensiti lista",
// Nordics / Eastern Europe
"svenska casinon recenserade jamforande hemsidor jamforda lista",
"norske danske beste bedste nettecasino online sammenlignet anmeldt",
"suomen best nettikasinot sivustot vertailu paras reviewed arvioiut",
"poland kasyno online najlepsze polecone strony recenze review sites",
"ceska slovenska republika nejlepsi casina stranky porovnan hodnoceni",
"magyarorszag legjobb online casino oldalak osszehasonlitas lista",
"romania cele mai bune site uri cazinouri online comparativ review",
// Niche / affiliate
"casino content sites media publishers affiliates directory gaming",
"igaming marketing publisher website affiliates listed rated",
"gambling forum recommended best casinos community voted listed",
"smaller casino blog review site honest opinion tested reviewed",
"expert verified licensed checked casino rating platform database",
"slot machine casino reviewed websites ranked featured all compared",
"baccarat blackjack roulette online casino reviewed best compared",
"paypal skrill neteller e-wallet supported casino sites compared",
"instant withdrawal fastest payout casinos listed reviewed site",
"progressive jackpot biggest win online casino reviewed list compared",
"online poker and casino hybrid platform reviewed compared best",
"sportsbook combined casino gaming site review aggregator listed",
"best new online casinos launched this month reviewed tested",
"safest gambling sites licensed regulated UKGC MGA reviewed",
"top online bingo casino sites reviewed compared rated list",
"best table game online casinos blackjack roulette comparison site",
"casino welcome bonus comparison website ranked rated best offers",
"mobile only online casino app reviewed compared listed best 2025",
"instant play no download casino websites reviewed compared best",
"trusted gambling review aggregator database platform sites all",
"online casino blacklist warning watchdog site verified reviewed",
"casino affiliate website directory list content platforms 2025",
"igaming media company review publisher website directory listed",
"best online betting and casino comparison site ranked reviewed UK",
"top real money gambling websites compared analyzed rated all 2025",
"online gaming platform review aggregator sites listed best rated",
"casino software provider game selection compared reviewed sites all",
"high RTP high payout rate casinos reviewed compared list best",
"exclusive bonus code online casino websites reviewed ranked all",
"vip loyalty rewards program online casino reviewed best compared",
"mobile responsive online casino websites reviewed compared top",
"fastest customer support online casino sites reviewed rated best",
"most game variety slot selection online casinos reviewed list",
"safest withdrawal methods crypto bank transfer casino site reviewed",
"online gambling comparison tool website aggregator platform all",
"casinoreviews.com alternative websites better unbiased compared",
"alternative to askgamblers trusted independent casino review sites",
"best casino rating directory platforms database reviewed listed all",
"top 50 best online casino websites ranked reviewed rated list 2025",
];
// Casino brand / keyword fingerprints
const CASINO_KW = [
"casino","bet365","betfair","888.","paddy power","ladbrokes","williamhill",
"unibet","bwin","betway","10bet","skyvegas","mrplay","bovada","ignition",
"bitsler","marathon","pinnacle","draftkings","fanduel","betmgm",
"caesar","barstool","pointsbet","leovegas","jackpotcity",
"royalpalace","casumo","reddog","luckystrike","betonline","intertops",
"chance.com","betsson","betclic","22bet","1xbet","stake.",
"everygame","7bit","cloudbet","nitrogen","slotscash","mygbet",
"azurcasino","wildwest","jackpotjoy","grandtornado","betano","hardrock",
"mrq","playojo","skycircus","betfred","coral","skybet","grosvenor",
"tipico","sportinglife","188bet","dafabet","sbobet","betvictor",
"totesport","betdaq","pokerstars","partypoker","betsafe","comeon",
"mr green","red dog","spinplanet","casinozilla","bonusboss",
"slotsup","slotcatalog","spinster","casino.guru","chipy"
].map(k => k.toLowerCase());
// Domains to skip (social media, news, etc.)
const SKIP = [
"youtube.com","youtu.be","reddit.com","facebook.com","twitter.com","x.com",
"linkedin.com","tiktok.com","wikimedia.org","wikipedia.","pinterest.",
"instagram.","medium.com","forbes.com","nytimes.com","amazon.",
"play.google.com","web.archive.org","duckduckgo.,"startpage.com",
"brave.com","t.co","imgur.com","flickr.com","goodreads.",
"steamcommunity.","github.com","stackoverflow."
];
function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
function getDomain(url) {
try { const u = new URL(url);
let h = u.hostname;
if (h.startsWith('www.')) h = h.slice(4);
return h.toLowerCase();
} catch { return url.toLowerCase(); }
}
function isSkip(d) {
for (const s of SKIP) if (d.includes(s)) return true;
return false;
}
// ── Fetch SearXNG JSON via curl (reliable) ─────────────────────
async function apiSearch(query, pg = 1) {
const url = `${BASE}/search?q=${encodeURIComponent(query)}&format=json&pagenum=${pg}&categories=general&language=all&safe_search=0`;
try {
const { stdout } = await execFileP('curl', ['-s', '--max-time', '15', url]);
if (!stdout) return [];
const data = JSON.parse(stdout);
return Array.isArray(data.results) ? data.results : [];
} catch (e) {
return [];
}
}
// ── Fetch site HTML and count casino links ─────────────────────
async function verifyCasinoLinks(url, targetDomain) {
try {
const { stdout } = await execFileP('curl', [
'-s', '--max-time', '10',
'-H', 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/125 Safari/537.36',
'-L', // follow redirects
url
]);
if (!stdout || stdout.length < 1000) return 0;
// Strip scripts and styles to get clean HTML
const html = stdout.replace(/<script[\s\S]*?<\/script>/gi, ' ')
.replace(/<style[\s\S]*?<\/style>/gi, ' ');
// Extract all unique domains linked from the page
const hrefRe = /href=["'](https?:\/\/[^"'\s>]+)["']/gi;
const seenCasinoDomains = new Set();
for (const m of html.matchAll(hrefRe)) {
let linkUrl = m[1];
if (!linkUrl.startsWith('http')) continue;
const d = getDomain(linkUrl);
if (d === targetDomain || isSkip(d) || seenCasinoDomains.has(d)) continue;
// Check URL + domain against casino keywords
const lo = linkUrl.toLowerCase();
for (const kw of CASINO_KW) {
if (lo.includes(kw)) { seenCasinoDomains.add(d); break; }
}
}
return seenCasinoDomains.size;
} catch { return 0; }
}
// ─────────────── MAIN ────────────────────────────────────────
(async () => {
console.log('═══ Casino Affiliate Crawler v4 (curl via child_process) ═══');
console.log(`Queries : ${Q.length}`);
console.log(`Goal : 1000+ sites with >=5 casino links\n`);
// ── Phase 1: Collect unique domain URLs from SearXNG ──────
const uniqueSites = []; // [{url, title, domain}]
const seenDomains = new Set();
let qDone = 0;
const BAND = 5; // concurrent query fetches per band
const PAGES = 18; // max pages per query
for (let bStart = 0; bStart < Q.length; bStart += BAND) {
const bQs = Q.slice(bStart, bStart + BAND);
qDone += bQs.length;
for (let pg = 1; pg <= PAGES; pg++) {
// Fire all queries in band concurrently at this page
const promises = bQs.map(q => apiSearch(q, pg));
const htmlResults = await Promise.all(promises);
let empty = 0;
for (const results of htmlResults) {
if (!results.length) { empty++; continue; }
for (const r of results) {
const d = getDomain(r.url);
if (!seenDomains.has(d)) {
seenDomains.add(d);
uniqueSites.push({ url: r.url, title: (r.title||'').substring(0,250), domain: d });
}
}
}
if (empty === bQs.length) break; // all queries exhausted for this page
await sleep(450); // rate limit between pages
}
console.log(`[b${Math.floor(bStart/BAND)+1}] ${uniqueSites.length} unique domains`);
await sleep(600);
}
const totalCount = `${seenDomains.size}`;
console.log(`\nPhase 1 done: ${totalCount} unique domains\n`);
// ── Phase 2: Verify ≥5 casino links (concurrent batches) ────────
const verified = [];
let checked = 0;
const BATCH_SIZE = 4; // concurrent URL fetches per batch
for (let i = 0; i < uniqueSites.length; i += BATCH_SIZE) {
const chunk = uniqueSites.slice(i, i + BATCH_SIZE);
const checks = chunk.map(s => verifyCasinoLinks(s.url, s.domain));
const counts = await Promise.all(checks);
for (let j = 0; j < chunk.length; j++) {
if (counts[j] >= 5) {
verified.push({
url: chunk[j].url,
title: chunk[j].title,
domain: chunk[j].domain,
casinoLinks: counts[j]
});
}
checked++;
}
if (checked % 100 === 0 || checked === uniqueSites.length) {
console.log(`Phase 2: ${checked}/${uniqueSites.length} → ${verified.length} pass >=5`);
}
await sleep(200); // small throttle between batches
}
// ── Write CSV ───────────────────────────────
const header = 'url,title,domain,casino_links';
const rows = verified.map(v => {
const t = (v.title || '').replace(/"/g, "'");
return `"${v.url}","${t}","${v.domain}",${v.casinoLinks}`;
});
fs.writeFileSync(CSV_FILE, [header, ...rows].join('\n'), 'utf8');
console.log(`\n═══════════ ${verified.length} verified sites → ${CSV_FILE} ═══════════`);
})();