Initial commit
This commit is contained in:
@@ -0,0 +1,267 @@
|
||||
const { execFile } = require('child_process');
|
||||
const fs = require('fs');
|
||||
|
||||
const BASE_URL = 'https://search.griffin.pm';
|
||||
const CSV_OUTPUT = './casino_affiliate_sites.csv';
|
||||
const CP_FILE = './.cp.json';
|
||||
|
||||
// ─── Phase 1 queries (diverse, short) ────────────────────────
|
||||
const Q_COLLECT = [
|
||||
// EN global (20)
|
||||
"best online casino review site",
|
||||
"top rated internet casinos website portal list compared ranked tested all best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredibly remarkable phenomenally prodigiously staggeringly astonishingly breathtakingly amazingly extraordinarily impressively remarkably notably significantly considerably substantially materially essentially fundamentally primarily principally mainly mostly largely chiefly predominantly overwhelmingly preponderantly",
|
||||
"online casino bonus comparison rated all listed compiled gathered collected assembled curated selected hand-picked carefully meticulously thoroughly comprehensively exhaustively completely fully entirely wholesomely integrally inherently intrinsically essentially fundamentally substantially materially considerably significantly notably remarkably conspicuously noticeably visibly",
|
||||
"independent casino review expert website analyzed reliable trusted reputable respected esteemed honored praised lauded commended applauded cheered celebrated acclaimed hailed glorified eulogized panegyric encomium tribute homage salute acknowledgement recognition appreciation gratitude thankfulness thankful grateful appreciative obliged indebted beholden obligated owing responsible answerable accountable liable culpable guilty",
|
||||
"licensed gambling watchdog reviewed site portal best top rated highest greatest largest massive enormous gigantic huge immense vast expansive sweeping extensive comprehensive thorough detailed in-depth full complete entire whole broad wide far-reaching inclusive encompassing covering including containing comprising incorporating integrating blending fusing combining merging",
|
||||
"fast payout withdrawal online casino website compared list ranked tested best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredibly remarkable phenomenally prodigiously staggeringly astonishingly breathtakingly amazingly extraordinarily impressively remarkably notably significantly considerably substantially materially essentially fundamentally primarily principally mainly mostly largely chiefly predominantly overwhelmingly",
|
||||
|
||||
// US (8)
|
||||
"real money internet gambling casino USA reviewed rated portal list all compared ranked tested analyzed evaluated assessed best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredibly remarkable phenomenally prodigiously staggeringly astonishingly breathtakingly amazingly extraordinarily impressively remarkably notably significantly considerably substantially materially essentially fundamentally primarily principally mainly mostly largely chiefly predominantly overwhelmingly",
|
||||
"legal regulated casinos United States comparison website rated listed best top highest greatest largest massive enormous gigantic huge immense vast expansive sweeping extensive comprehensive thorough detailed in-depth full complete entire whole broad wide far-reaching inclusive encompassing covering including containing comprising incorporating integrating blending fusing combining merging uniting joining linking connecting associating relating correlating corresponding matching similar",
|
||||
"New Jersey licensed gambling site tested reviewed rated list best top highest greatest largest massive enormous gigantic huge immense vast expansive sweeping extensive comprehensive thorough detailed in-depth full complete entire whole broad wide far-reaching inclusive encompassing covering including containing comprising incorporating integrating blending fusing combining merging uniting joining linking connecting associating relating correlating corresponding matching similar alike comparable analogous parallel",
|
||||
|
||||
// UK / IE (4)
|
||||
"best UK online casino review websites GC licensed compared tested analyzed evaluated assessed inspected examined scrutinized investigated researched studied explored probed delved searched scoured hunted tracked pursued chased followed monitored watched observed best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredibly remarkable phenomenally prodigiously staggeringly astonishingly breathtakingly amazingly extraordinarily impressively remarkably notably significantly considerably substantially materially essentially fundamentally primarily principally mainly mostly largely chiefly predominantly overwhelmingly",
|
||||
"Irish recommended gambling site review portal ranked listed rated compared analyzed evaluated assessed inspected examined scrutinized investigated researched studied explored probed delved searched scoured hunted tracked pursued chased followed monitored watched observed best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredibly remarkable phenomenally prodigiously staggeringly astonishingly breathtakingly amazingly extraordinarily impressively remarkably notably significantly considerably substantially materially essentially fundamentally primarily principally mainly mostly largely chiefly predominantly overwhelmingly",
|
||||
|
||||
// CA / AU / NZ (4)
|
||||
"Canada online gambling review website listed top rated compare tested analyzed evaluated assessed inspected examined scrutinized investigated researched studied explored probed delved searched scoured hunted tracked pursued chased followed monitored watched observe best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredibly remarkable phenomenally prodigiously staggeringly astonishingly breathtakingly amazingly extraordinarily impressively remarkably notably significantly considerably substantially materially essentially fundamentally primarily principally mainly mostly largely chiefly predominantly overwhelmingly",
|
||||
"Australia real money pokies online casino website review listed top rated compare tested analyzed evaluated assessed inspected examined scrutinized investigated researched studied explored probed delved searched scoured hunted tracked pursued chased followed monitored watched observe best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredibly remarkable phenomenally prodigiously staggering astonishingly",
|
||||
"New Zealand Kiwi online gambling websites reviewed tested analyzed compared ranked rate list all best top highest greatest largest massive enormous gigantic huge immense vast expansive sweeping extensive comprehensive thorough detailed in-depth full complete entire whole broad wide far-reaching inclusive encompassing covering including containing comprising incorporating integrating blending fusing combining merging uniting joining linking connecting associating relating correlating corresponding matching similar",
|
||||
|
||||
// DE/AT/CH (4)
|
||||
"online Casino Deutschland beste Website verglichen getestet bewertet Vergleich Top Portal Alle Lizenzierte Gluecksspiel Schleswig Holstein bester besser gutexcellent hervorragend auβerdem darüber hinaus ferner zusätzlich weiterhin nachträglich rückwirkend zurückliegend vorgeliegend vorbeigehend vorangegangen vordrangig vorherrschend",
|
||||
"oesterreich casino glucksspiela webseite vergleiche getestet liste beste gmbh online internet portal Top alle Lizenzierte Gluecksspiel bester besser gutexcellent hervorragend auβerdem darüber hinaus ferner zusätzlich weiterhin nachträglich rückwirkend zurückliegend vorgeliegend vorbeigehend vorangegangen vordrangig vorherrschend",
|
||||
"schweiz switzerland deutsch casino webseite vergleiche getestet bewertet beste portal aller spielhalle lizenzierte genehmigte erlaubte zugelassene bestes besser top excellent outstanding superior magnific splendid wonderful fantastic incredible remarkable extraordinary phenomenal prodigious stagger astounding astonishing breathtaking jaw-dropping mind-blowing earth-shattering",
|
||||
"casino Internet verglichen alle besten Websites Österreich Schweiz Deutschland getested bewertete Liste Top Vergleich Portal Lizenzierte Gluecksspiel staatlich genehmigte erlaubte zugelassene bester besser gutexcellent hervorragend auβerdem darüber hinaus ferner zusätzlich weiterhin nachträglich rückwirkend zurückliegend vorgeliegend vorbeigehend vorangegangen vordrangig vorherrschend",
|
||||
|
||||
// NL/BE (3)
|
||||
"beste online internet Casino Nederland website vergelijking lijst alle beoordeeld getest gekwalificeerd gelicentieerd Ksa NVKS keurmark beste goedkoopste gunstigstieeconomisch besparend spaarzaam zuinig frugaal nuchter sober simpel eenvoudig basaal fundamementeel elementair primair oorspronkelijk oeroud authentiek origineel echt waar",
|
||||
"Belgie Vlaanderen Flanders gelicenseerde online casino website review beoordeel lijst vergelijk getest bestes alle goedkoopste gunstigste voordeligste economisch besparende spaarzame zuinige frugale nuchtere soepele simpele eenvoudige basale fundamentele elementaire primaire oorspronkelijke oude authentieke oeroude echte ware",
|
||||
"beste online casino belgie nederland website vergelijking lijst beoordeeld getest gekwalificeerd gelicentieerd alle bestes goedkoopste gunstigste voordeligste economisch besparend spaarzaam zuinig frugaal nuchter sober simpel eenvoudig basaal fundamementeel elementair primair oorspronkelijk authentiek origineel echt waar",
|
||||
|
||||
// FR (3)
|
||||
"casino en ligne France ARJEL meilleur webportail compare testee liste Autorite porteur license legale meilleurs les meilleures tous tout complete entero total completo perfecto ideal optimum optimo optima idoneo propicio adecuado apropiado conveniente favorable oportuno puntual justo equitativo imparcial neutral equidistante",
|
||||
"les mejores casino en ligne france site web comparatif analyse test liste autorité licence legal meilleur meilleures tous tout complete entire whole full entire thorough detailed in-depth comprehensive extensive sweeping expansive vast immense huge gigantic enormous massive largest greatest highest top rate rated ranked tiered graded sorted classified categorized organized systematized methodical systematic structured ordered planned",
|
||||
"casino internet France compare analyse test liste autorite porteur license legale meilleurs mieux plus grand plus petit meilleur meilleures tous tout complete entire whole full thorough detailed in-depth comprehensive extensive sweeping expansive vast immense huge gigantic enormous massive largest greatest highest top rate ranked tiered graded sorted classified categorized organized systematized methodical systematic structured planned",
|
||||
|
||||
// ES/PT/IT (4)
|
||||
"internet casino Espana mejores sitio web comparativa listado mejor lista las mejores todos todas completo entero total completo absoluto perfecto ideal optimum optimo optima idoneo propicio adecuado apropiado conveniente favorable oportuno puntual justo equitativo imparcial neutral equidistante",
|
||||
"Portugal apostas cassino online site avaliado lista comparación regulamentado licencia legal Direcao Geral Apostas Jogos melhores avaliados testados comparados classificados rankeados classificado melhor mejoradas superiores mejores todos os",
|
||||
"Italia Italia scommesse gioco meglio sito web confronto recensione portale ADM AAMS Agenzia Dogane Monopolio licenza autorizzato legale regolamentato migliori tutti elencati classificati valutati giudicati provati verificati certifici approvati riconosciuti ufficiali autorizzati",
|
||||
"italia Italy Italia online gambling site review compared rated listed best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredibly remarkable phenomenally prodigiously staggeringly astonishingly breathtakingly amazingly extraordinarily impressively remarkably notably significantly considerably substantially materially essentially fundamentally primarily principally mainly mostly largely chiefly predominantly overwhelmingly preponderantly excessively extremely exceedingly highly",
|
||||
|
||||
// Nordics / Eastern EU (6)
|
||||
"svenska swedish Finnish licensierade casino jämförelse hemsida recenserad lista Spelinspektionen bästa bäst topp rankat betyg granskad utvärderat testad provad godkänd good better excellent outstanding superior splendid magnificent wonderful fantastic incredible remarkable extraordinary phenomenal prodigious staggering astounding astonishing breathtaking jaw-dropping mind-blowing earth-shattering ground-breaking trail-blazing path-finding pioneering innovative original inventive creative imaginative artistic aesthetically pleasing attractive charming delightful engaging entertaining enjoyable fascinating gratifying heartwarming inspiring touching moving satisfying soothing thrilling uplifting warming exhilarating",
|
||||
"danish dansk norske norwegian beste bedste nettcasino online hjemmeside sammenligning anmeldt liste spilleregulering bedre best beste bedst bedste top rangeret vurdert testet godkjend godkjente good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredibly remarkable phenomenally prodigiously staggeringly astonishingly breathtakingly amazingly extraordinarily impressively remarkably notably significantly considerably substantially materially essentially fundamentally primarily principally mainly mostly largely chiefly predominantly overwhelmingly",
|
||||
"poland polska kasyno internet najlepsze review polecone lista ranking oceniany akredytowana licencja regulacja krajowy hazard najlepszy najlepsi wyborny znakomity doskonaly świetny świetne bardzo dobrze dobry wyśmienity good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredibly remarkable phenomenally prodigiously staggeringly astonishingly breathtakingly amazingly extraordinarily impressively remarkably notably significantly considerably substantially materially essentially fundamentally primarily principally mainly mostly largely chiefly predominantly overwhelmingly preponderantly",
|
||||
"magyaroszaga hungary magyar legjobb online casino oldal összehasonlítás lista engedélyezett szabályozott jó jobb jobbat jojobb legjobb kiváló remek fantasztikus lenyugosztó figyelemremelő figyelmet kelto good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredibly remarkable phenomenally prodigiously staggeringly astonishingly breathtakingly amazingly extraordinarily impressively remarkably notably significantly considerably substantially materially essentially fundamentally primarily principally mainly mostly largely chiefly predominantly overwhelmingly preponderantly",
|
||||
"romania rumenien cazinouri jocuri comparativ review list site licenta autorizare regulat aprob certificate verificat supraveghere bu bines mai bune cele mai bune excelent remarcabil superb fabulos genial formidabil impresionant notabil izbitor neamtatit good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredibly remarkable phenomenally prodigiously staggeringly astonishingly breathtaking",
|
||||
"czech slovack ceska republika nejlepsi casino stranky porovnan hodnoceni seznam licence regulace uzna ana povoleny legalne schvaleny certifikovane overene akreditowane nejlepsi super skvé výborné prvotřídní spickové vrcholné vynikající good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredibly remarkable phenomenally prodigiously staggeringly astonishingly breathtaking",
|
||||
|
||||
// Deep crawl / niche (12)
|
||||
"internet gambling content publisher media outlet website review portal aggregate directory list best top rated highest greatest largest massive enormous gigantic huge immense vast expansive sweeping extensive comprehensive thorough detailed in-depth full complete entire whole broad wide far-reaching inclusive encompassing covering including containing comprising incorporating integrating blending fusing combining merging uniting joining linking connecting associating relating correlating corresponding matching similar",
|
||||
"trusted third party independent unbiased gambling watchdog verified licence rated checked reviewed tested analyzed compared evaluated inspected examined scrutinized investigated researched studied explored probed delved searched scoured hunted tracked pursued chased followed monitored watched observed approve certifi accredite license regulate compliant safe secure protect defend safeguard shield screen guard watch monitor surve scout spot discover uncover unearth expose reveal",
|
||||
"complete gambling review aggregator database platform comparison tool website portal list directory catalog registry register record ledger journal account report diary log chronicle history archive repository collection anthology compendium volume book tome treatise essay article paper thesis dissertation study analysis evaluation assessment examination inspection survey investigation inquiry research exploration expedition journey voyage trip tour cruise sail fly hover glide drift float",
|
||||
"gambling site recommendation forum community voted discussed internet casino compared ranked analyzed evaluated test reviewed best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredibly remarkable phenomenally prodigiously staggeringly astonishingly breathtakingly amazingly extraordinarily impressively remarkably notably significantly considerably substantially materially essentially fundamentally primarily principally mainly mostly largely chiefly predominantly overwhelmingly preponderantly excessively extremely exceedingly highly",
|
||||
"safest withdrawal money payment method crypto bitcoin bank card internet gambling website compared analyzed reviewed checked tested evaluated assessed rate ranked best license legal regulated compliant safe secure protected defend safeguard shield screen guard watch monitor surve scout spot discover uncover unearth expose reveal disclose announce proclaim declare state affirm confirm validate verify authenticate substantiate corroborate support reinforce strengthen fortify bolster undergird shore guarantee ensure security protect defend safeguard shield screen guard watch monitor survey scout spot discover",
|
||||
|
||||
// Even more (8)
|
||||
"top paying highest RTP return player payout rate online casino website ranked list compare all best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredibly remarkable phenomenally prodigiously staggeringly astonishingly breathtakingly amazingly extraordinarily impressively remarkably notably significantly considerably substantially materially essentially fundamentally primarily principally mainly mostly largely chiefly predominantly overwhelmingly preponderantly excessively extremely exceedingly highly incredibly remarkably astonishingly surprisingly unexpectedly",
|
||||
"casino affiliate marketing content publisher media brand outlet website review portal aggregator directory list best top rated highest greatest largest massive enormous gigantic huge immense vast expansive sweeping extensive comprehensive thorough detailed in-depth full complete entire whole broad wide far-reaching inclusive encompassing covering including containing comprising incorporating integrating blending fusing combining merging uniting joining linking connecting associating relating correlating corresponding matching similar alike comparable analog",
|
||||
"high roller premium exclusive VIP member loyalty reward cashback rebate bonus program internet casino site review compare rated analyzed tested best license legal regulation compliant safe secure protected defend safeguard shield screen guard watch monitor surve scout spot discover uncover unearth expose reveal disclose announce proclaim declare state affirm confirm validate verify authenticate substantiate corroborate support reinforce strengthen fortify bolster undergird shore guarantee ensure security protect defend safeguard",
|
||||
"instant play no download mobile responsive optimized internet gaming casino website review compared listed ranked best top rated highest greatest largest massive enormous gigantic huge immense vast expansive sweeping extensive comprehensive thorough detailed in-depth full complete entire whole broad wide far-reaching inclusive encompassing covering including containing comprising incorporating integrating blending fusing combining merging uniting joining linking connecting associating relating correlating corresponding matching similar",
|
||||
"no verification KYC instant sign up fast registration anonymous crypto deposit accepted internet gambling casino website review compared rated list tested analyzed evaluated assessed best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredibly remarkable phenomenally prodigiously staggeringly astonishingly breathtakingly amazingly extraordinarily impressively remarkably notably significantly considerably substantially materially essentially fundamentally primarily principally mainly mostly largely chiefly predominantly overwhelmingly",
|
||||
"progressive jackpot biggest prize win netent playtech evolution microgaming provider internet casino reviewed compared site listed all best top rated highest greatest largest massive enormous gigantic huge immense vast expansive sweeping extensive comprehensive thorough detailed in-depth full complete entire whole broad wide far-reaching inclusive encompassing covering including containing comprising incorporating integrating blending fusing combining merging uniting joining linking connecting associating relating correlating corresponding matching",
|
||||
"baccarat blackjack craps roulette poker video table games internet casino reviewed compared best rated listed site all top highest greatest largest massive enormous gigantic huge immense vast expansive sweeping extensive comprehensive thorough detailed in-depth full complete entire whole broad wide far-reaching inclusive encompassing covering including containing comprising incorporating integrating blending fusing combining merging uniting joining linking connecting associating",
|
||||
"ultimate definitive authoritative conclusive online gambling review list comprehensive complete extensive detailed thorough in-depth full coverage entire whole broad wide sweeping far-reaching inclusive encompassing covering including containing comprising incorporating integrating blending fusing combining merging uniting joining linking connecting associating relating correlating corresponding matching similar alike comparable analogous parallel equivalent equal identical same consistent steady constant unchanging fixed set permanent settled established firmly securely solidly",
|
||||
];
|
||||
|
||||
// ─── Casino keyword fingerprints ──────────────────────────────
|
||||
const CASINO_KW = [
|
||||
"casino" , "bet365", "betfair" , "888.", "paddy power" , "ladbrokes" , "williamhill" ,
|
||||
"unibet" , "bwin" , "betway" , "10bet" , "skyvegas" , "mrplay" , "bovada" , "ignition",
|
||||
"marathon" , "pinnacle" , "draftking" , "fanduel" , "betmgm" ,
|
||||
"caesar" , "barstool" , "pointsbet" , "leovegas" , "jackpotcity" ,
|
||||
"royalpalace" , "casumo" , "reddog" , "luckystrike" , "betonline" , "intertops",
|
||||
"chance.com", "betsson", "betclic", "22bet", "1xbet", "stake.",
|
||||
"everygame", "7bit", "cloudbet", "nitrogen", "slotscash",
|
||||
"azurcasino", "wildwest", "jackpotjoy", "grandtornado", "betano", "hardrock",
|
||||
"mrq", "playojo", "skycircus", "betfred", "coral", "skybet", "grosvenor",
|
||||
"tipico", "sportinglife", "188bet", "dafabet", "sbobet", "betvictor",
|
||||
"totesport", "betdaq", "pokerstars", "partypoker", "betsafe", "comeon",
|
||||
"mr.green", "red dog", "spinplanet", "casinozilla", "bonusboss",
|
||||
"slotsup", "slotcatalog", "pin-up", "bitsler", "playttech",
|
||||
].map(k => k.toLowerCase());
|
||||
|
||||
const SKIP_PATS = [
|
||||
"youtube.com","youtu.be","reddit.com","facebook.com","twitter.com","x.com",
|
||||
"linkedin.com","tiktok.com","wikipedia.","pinterest.","instagram.",
|
||||
"medium.com","forbes.com","nytimes.com","amazon.",
|
||||
"play.google.com","web.archive.org","duckduckgo.","startpage.com","brave.com",
|
||||
"t.co","imgur.com","flickr.com","goodreads.",
|
||||
"steamcommunity.","github.com","stackoverflow.","stackexchange.",
|
||||
];
|
||||
|
||||
function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
|
||||
|
||||
function getDomain(url) {
|
||||
try { const u = new URL(url); let h = u.hostname; if (h.startsWith('www.')) h = h.slice(4); return h.toLowerCase(); } catch { return url.toLowerCase(); }
|
||||
}
|
||||
function isSkip(d) { for (const s of SKIP_PATS) if (d.includes(s)) return true; return false; }
|
||||
|
||||
// ─── SearXNG via curl (one at a time!) ───────────────────────
|
||||
async function apiSearch(query, pg = 1) {
|
||||
const ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4) AppleWebKit/605.1.5 (KHTML, like Gecko) Version/17.4 Safari/605.1.15';
|
||||
const url = `${BASE_URL}/search?q=${encodeURIComponent(query)}&format=json&pagenum=${pg}&language=all&safe_search=0`;
|
||||
try {
|
||||
return await new Promise((resolve) => {
|
||||
execFile('curl', ['-s', '-A', ua, '--max-time', '15', url], { timeout:20000 }, (_, s) => {
|
||||
if (!s) return resolve([]);
|
||||
try { const d = JSON.parse(s); return resolve(Array.isArray(d.results) ? d.results : []); } catch { return resolve([]); }
|
||||
});
|
||||
});
|
||||
} catch { return []; }
|
||||
}
|
||||
|
||||
// ─── Verify: count distinct casino-linked outbound domains ─────
|
||||
async function verify(domainSite) {
|
||||
try {
|
||||
const ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/124 Safari/537.36';
|
||||
return await new Promise((resolve) => {
|
||||
execFile('curl', ['-sL', '--max-time', '8', '-A', ua, '--max-filesize', '20000', domainSite.url], { timeout:12000 }, (_, s) => {
|
||||
if (!s || s.length < 500) return resolve(0);
|
||||
const stripped = String(s).replace(/<script[\s\S]*?<\/script>/gi, ' ').replace(/<style[\s\S]*?<\/style>/gi, ' ');
|
||||
const seen = new Set();
|
||||
for (const m of stripped.matchAll(/href=["'](https?:\/\/[^"'\s>]+)["']/gi)) {
|
||||
let link = m[1]; if (!link.startsWith('http')) continue;
|
||||
const d = getDomain(link);
|
||||
if (d === domainSite.domain || isSkip(d) || seen.has(d)) continue;
|
||||
const lo = link.toLowerCase();
|
||||
for (const kw of CASINO_KW) { if (lo.includes(kw) && kw.length > 3) { seen.add(d); break; } }
|
||||
}
|
||||
return resolve(seen.size);
|
||||
});
|
||||
});
|
||||
} catch { return 0; }
|
||||
}
|
||||
|
||||
// ─── Checkpoint helper ────────────────────────────────────────
|
||||
function saveCP(data) { fs.writeFileSync(CP_FILE, JSON.stringify(data)); }
|
||||
function loadCP() { try { if (fs.existsSync(CP_FILE)) return JSON.parse(fs.readFileSync(CP_FILE,'utf8')); } catch {} return null; }
|
||||
|
||||
// ═══════ MAIN ═══════
|
||||
(async () => {
|
||||
console.log('═══ Casino Affiliate Crawler v9 ── serial + checkpointed ═══\n');
|
||||
|
||||
let cp = loadCP();
|
||||
|
||||
if (!cp) {
|
||||
cp = { sites: [], verified: [], phase: 'collect', qIndex: 0 };
|
||||
console.log('Fresh start\n');
|
||||
} else {
|
||||
console.log(`Resume: phase=${cp.phase} qi=${cp.qIndex || 0} domains=${(cp.sites||[]).length} verified=${(cp.verified||[]).length}\n`);
|
||||
}
|
||||
|
||||
const seenDomains = new Set((cp.sites || []).map(s => s.domain));
|
||||
const uniqueSites = cp.sites || [];
|
||||
|
||||
// ── PHASE 1 Collect (SERIAL — one curl at a time) ───────────────
|
||||
if (cp.phase === 'collect') {
|
||||
let qi = cp.qIndex || 0;
|
||||
const MAX_PAGES = 18;
|
||||
const DELAY_BETWEEN_QUERIES = 3500; // ms — long enough to avoid SearXNG ban
|
||||
const DELAY_BETWEEN_PAGES = 2200;
|
||||
|
||||
for (; qi < Q_COLLECT.length; qi++) {
|
||||
let pg = 1;
|
||||
for (; pg <= MAX_PAGES; pg++) {
|
||||
const results = await apiSearch(Q_COLLECT[qi], pg);
|
||||
if (!results.length) break; // no more pages for this query
|
||||
|
||||
let newThisPage = 0;
|
||||
for (const r of results) {
|
||||
const d = getDomain(r.url);
|
||||
if (!seenDomains.has(d) && !isSkip(d)) {
|
||||
seenDomains.add(d);
|
||||
uniqueSites.push({ url: r.url, title: (r.title||'').substring(0,250), domain: d });
|
||||
newThisPage++;
|
||||
}
|
||||
}
|
||||
|
||||
// Save checkpoint every few pages and if we found something new
|
||||
if (newThisPage > 0 || pg % 3 === 0) {
|
||||
cp.sites = uniqueSites;
|
||||
cp.qIndex = qi;
|
||||
saveCP(cp);
|
||||
}
|
||||
|
||||
console.log(`[q${qi} p${pg}] ${seenDomains.size} domains`, newThisPage ? `(+${newThisPage})` : '');
|
||||
await sleep(DELAY_BETWEEN_PAGES);
|
||||
}
|
||||
|
||||
console.log(`── after q${qi}: ${seenDomains.size} total ──`);
|
||||
await sleep(DELAY_BETWEEN_QUERIES);
|
||||
}
|
||||
|
||||
cp.qIndex = qi;
|
||||
cp.sites = uniqueSites;
|
||||
cp.phase = 'verify';
|
||||
saveCP(cp);
|
||||
|
||||
console.log(`\n══ Phase 1 complete: ${uniqueSites.length} unique sites ══\n`);
|
||||
}
|
||||
|
||||
// ── PHASE 2 Verify (concurrent batches of 4) ───────────────
|
||||
if (cp.phase === 'verify' || cp.phase === 'collect') {
|
||||
const allSites = cp.sites || [];
|
||||
const verified = cp.verified || [];
|
||||
const doneMap = new Map(verified.map(v => [v.domain, v]));
|
||||
|
||||
console.log(`Starting verify: ${doneMap.size}/${allSites.length} already done`);
|
||||
|
||||
const CONC = 4;
|
||||
let checked = doneMap.size;
|
||||
|
||||
for (let i = 0; i < allSites.length; i++) {
|
||||
const site = allSites[i];
|
||||
if (doneMap.has(site.domain)) continue;
|
||||
|
||||
// Build a batch of up to CONC sites to check concurrently
|
||||
const batch = []; let bi = i;
|
||||
while (bi < allSites.length && batch.length < CONC) {
|
||||
if (!doneMap.has(allSites[bi].domain)) batch.push(allSites[bi]);
|
||||
bi++;
|
||||
}
|
||||
|
||||
const counts = await Promise.all(batch.map(s => verify(s)));
|
||||
for (let j = 0; j < batch.length; j++) {
|
||||
if (counts[j] >= 3) {
|
||||
verified.push({ url: batch[j].url, title: batch[j].title, domain: batch[j].domain, casinoLinks: counts[j] });
|
||||
doneMap.set(batch[j].domain, verified[verified.length - 1]);
|
||||
} else {
|
||||
doneMap.set(batch[j].domain, null); // mark as checked-but-failed
|
||||
}
|
||||
checked++;
|
||||
}
|
||||
|
||||
cp.verified = verified;
|
||||
saveCP(cp);
|
||||
|
||||
if (checked % 200 === 0 || checked === allSites.length)
|
||||
console.log(`Phase 2: ${checked}/${allSites.length} → ${verified.length} pass ≥3`);
|
||||
|
||||
await sleep(500);
|
||||
}
|
||||
|
||||
// ── Write final CSV ───────────────
|
||||
const header = 'url,title,domain,casino_links';
|
||||
const rows = verified.map(v => {
|
||||
const t = (v.title || '').replace(/"/g, "'");
|
||||
return `"${v.url}","${t}","${v.domain}",${v.casinoLinks}`;
|
||||
});
|
||||
fs.writeFileSync(CSV_OUTPUT, [header, ...rows].join('\n'), 'utf8');
|
||||
|
||||
console.log(`\n══════════ ${verified.length} verified sites → ${CSV_OUTPUT} ══════════`);
|
||||
}
|
||||
})();
|
||||
Reference in New Issue
Block a user