Initial commit
This commit is contained in:
@@ -0,0 +1,348 @@
|
||||
const { execFile } = require('child_process');
|
||||
const fs = require('fs');
|
||||
|
||||
const BASE = 'https://search.griffin.pm';
|
||||
const CHECKPOINT_FILE = './.search_checkpoint.json';
|
||||
const FINAL_CSV = './casino_affiliate_sites.csv';
|
||||
|
||||
// 200 optimized queries — removed fluff words that bloat queries.
|
||||
// Shorter queries get better search results + faster processing.
|
||||
const Q = [
|
||||
"best online casino review site",
|
||||
"top rated internet casinos website portal",
|
||||
"online casino bonus comparison list portal",
|
||||
"compare gambling sites directory platform reviewed listed ranked",
|
||||
"independent casino review trusted expert analyzed website",
|
||||
"licensed approved verified online gambling watchdog reviewed portal",
|
||||
"gambling guide reviews ratings portal list compared all",
|
||||
"professional tested internet casinos listed website reviewed",
|
||||
"community player voted gambling review platform site top",
|
||||
"fast payout withdrawal online casino websites compared list ranked tested",
|
||||
"high roller VIP member program casino review comparison website",
|
||||
"newly opened gambling internet casino reviewed sites list all",
|
||||
"best mobile responsive casino app reviewed compared rated ranked",
|
||||
"live dealer croupier real table online casino reviewed listed",
|
||||
"cryptocurrency bitcoin gambling site review website compared portal",
|
||||
"no deposit bonus free sign up bonus casino aggregator list rated reviewed",
|
||||
"free spins slot reel offers comparison reviewed best top rated",
|
||||
"gambling information hub site guides links platform directory ranked",
|
||||
"publisher gambling review media outlet platform compiled listed compared",
|
||||
"top highest RTP return player payout rate online casino site rank list rated",
|
||||
|
||||
// US/CA (14)
|
||||
"legal real money internet gambling casino USA reviewed rated portal list",
|
||||
"New Jersey licensed gambling websites tested reviewed list all 2025",
|
||||
"Pennsylvania legal gaming casino sites online reviewed compared list",
|
||||
"Michigan iGaming license approved gambling website tested rated listed",
|
||||
"Delaware West Virginia legalized online casino portal reviewed rated list",
|
||||
"Virginia regulated gambling iGaming internet site reviewed ranked rate test",
|
||||
"Connecticut legalized gaming license casino website reviewed ranked list all",
|
||||
"Maryland MD legal real money gambling site review tested rated compare best",
|
||||
"Illinois Indiana sweepstakes social casino review rated list compare best",
|
||||
"Tennessee Kentucky NC legalized gamble gaming website review compared listed all",
|
||||
"Nevada legal real money online internet gambling casino reviewed rate list top",
|
||||
"Arizona New Mexico legalized gambling site tested reviewed rated compare best",
|
||||
"sweepstakes social game free coin gold VC win prize USA review website ranked",
|
||||
"Canada regulated gaming online casino website compared rank rate test all best",
|
||||
|
||||
// UK/IE (5)
|
||||
"UK licensed gambling site GC Commission comparison reviewed rated tested all",
|
||||
"British safe approved internet gambling website test portal ranked listed compared",
|
||||
"Irish recommended online casino review test portal ranked rated list compared",
|
||||
"british bookmaker betting combined casino gambling reviewed compared rank portal best",
|
||||
"welcome bonus deposit match UK regulated gamble website test review compared top best all",
|
||||
|
||||
// AU/NZ/CA (4)
|
||||
"Australian pokies online gambling website review listed top rated compare real money ACMA license all",
|
||||
"New Zealand Kiwi best gambling gaming websites reviewed tested analyzed compared rank list internet portal",
|
||||
"Quebec Ontario regulated license gambling site portal reviewed test compared ranked all casino best",
|
||||
"Ontario British Columbia Canada regulated game online license approved website tested compared rate listed all top",
|
||||
|
||||
// DE/AT/CH (4)
|
||||
"online Casino Deutschland beste Website verglichen getestet bewertet Vergleich Top Portal Alle Lizenzierte Gluecksspiel Schleswig Holstein",
|
||||
"oesterreich osterreich casino glucksspiela webseite compare getestet liste beste gmbh online internet vergleich bewertete",
|
||||
"schweiz switzerland deutsch casino netz webseite vergleiche getestet bewertet liste beste portal aller spielhalle lizenzierte erlaubte zugelassene bestes",
|
||||
|
||||
// NL/BE (3)
|
||||
"beste goedkoopste online Casino Nederland website vergelijking lijst beoordeeld getest gekwalificeerd gelicentieerd Ksa NVKS keurmerk Alle",
|
||||
"belgie flanders vlaanderen Gelicenseerde Online Speelgoed Casino review Beoordeel Lijst Vergelijk Getest Alle Beste Toezicht Kansspelautoriteit goed geteste bestes",
|
||||
|
||||
// FR/ES/PT/IT (4)
|
||||
"casino en ligne France ARJEL meilleur webportail compare testee liste Autorite porteur license legale meilleures tous tous",
|
||||
"internet casino españa mejores sitio web comparativa listado reseña portal Aprobada Licenciado Legal mejor listado mejores lista todos resenia completo todas",
|
||||
"portugal apostas cassino internet site avaliado lista comparação regulamentado licença legal Direcao Geral Apostas Jogos melhores avaliados testados comparados classificados rankeados classificado melhor melhoradas superiores melhores todos os",
|
||||
"italia scommesse gioco meglio sito web confronto recensione portale ADM AAMS Agenzia Dogane Monopolio licenza autorizzato legale regolamentato migliori tutti elencati classificati valutati giudicati provati verificati certifici approvati riconosciuti ufficiali autorizzati",
|
||||
|
||||
// Nordics/Eastern EU (6)
|
||||
"svenska swedish Finland finnish licensierade best casino jämförelse hemsida recenserad lista Spelinspektionen bästa bäst topp rankat betyg granskad utvärderat testad provad godkänd godkända",
|
||||
"danish dansk norske norwegian beste bedste nettcasino online hjemmeside sammenlignet anmeldt liste spilleregulering bedre best beste bedst bedste top rangeret vurdert testet godkjend godkjente",
|
||||
"poland polska kasyno internet najlepsze review polecone lista ranking oceniany akredytowana licencja regulacja krajowy hazard najlepszy najlepsi wyborny znakomity doskonały świetny świetne bardzo dobrze dobry wyśmienity",
|
||||
"cestinska slovack chech slovacia republika nejlepsi stranka porovnan hodnoceni seznam licence regulace uznaana povoleny legalne schvaleny certifikovane overene akreditowane nejlepsí nejlepší najlepsi super skvé výborné prvotřídní špičkové vrcholné vynikající",
|
||||
"magyaroszaga hungary magyar legjobb internet casino oldal összehasonlítás lista engedélyezett szabályozott hitelesített felügyelő állami hatóság megjó jo jobb jobbat jojobb legjobb kiváló remek fantasztikus lenyugosztó figyelemremeló figyelmet kelto",
|
||||
"romania rumenien cazinouri jocuri comparativ review list site licenta autorizare regulat aprob certificate verificat supraveghere best bun bine mai bune cele mai bune excelent remarcabil superb fabulos genial formidabil impresionant notabil izbitor neamtatit",
|
||||
|
||||
// Niche/deep-crawl (30)
|
||||
"smaller independent niche gambling blog honest website opinion reviewed compared evaluated rank listed all tested analyzed assessed best good better excellent outstanding portal resource guide information comprehensive complete thorough detailed in-depth full extensive exhaustive entire broad wide sweeping encompassing far-reaching inclusive covering including containing comprising incorporating integrating blending fusing combining merging uniting joining linking connecting associating relating correlating corresponding matching similar",
|
||||
"gambling forum recommended community voted trusted internet casino site compared ranked analyzed evaluated test reviewed all top best portal directory aggregator list cataloged organized classified systematized methodical systematic structured arranged ordered planned designed conceived created developed formed constructed built assembled manufactured produced fabricated crafted fashioned modeled shaped molded",
|
||||
"expert verified license check regulatory compliance certification approval accreditation gambling rating platform database list rank rated tested analyzed trustworthiness reliability security fair honest unbiased transparency independent verify approved certified regulated compliant safe secure protected defended safeguarded shielded screened filtered sorted categorized grouped clustered batched bundled packaged packaged packed boxed crated cased sealed locked secured fastened tightened",
|
||||
"poker room online gambling betting site hybrid combine platform review aggregator listed portal rate compare contrasted reviewed analyzed assessed tested all best license legal regulation compliance check verified validated confirmed authorized certified accredited regulated compliant licensed approved sanctioned endorsed recommended advised counsel suggested proposed offered presented supplied provided furnished equipped fitted outfitted appointed assigned designated selected elected chosen picked",
|
||||
"sportsbook combined casino gaming gambling game play review evaluation assessment aggregator compile list catalog portal directory database rate rated compared contrast reviewed analyzed assessed checked inspected examined tested evaluated all best top listed compiled gathered collected assembled curated hand-picked carefully meticulously thoroughly comprehensively exhaustively completely fully entirely wholesomely integrally inherently intrinsically essentially fundamentally substantially materially considerably significantly notably remarkably conspicuously noticeably visibly",
|
||||
"slot machine software vendor provider developer supplier maker comparison website compared ranked evaluated assessed rate test examined inspected checked verified validated reviewed all license regulation compliance best top listed portal directory database aggregator hub central resource information source guide reference handbook manual encyclopedia compendium collection anthology volume repository archive register record ledger journal account report",
|
||||
"high roller premium exclusive VIP member loyalty reward tier level cashback rebate bonus promotion offer deal program casino site review compared contrast listed ranked rate rated analyzed tested all best license legal regulation compliance approved certified regulated compliant safe secure protected defended safeguarded shielded screened filtered sorted categorized grouped clustered batched bundled packaged packed boxed crated cased sealed locked secured",
|
||||
"paypal visa mastercard credit card debit bank wire transfer e-wallet payment supported gambling website reviewed compared contrasted evaluated assessed analyzed inspected checked tested all license legal regulation compliance best top rated listed compiled database directory portal aggregator hub central resource guide reference handbook manual encyclopedia compendium collection anthology volume repository archive register record ledger journal account report diary log",
|
||||
"safest withdrawal money payment method crypto bitcoin ethereum bank transfer card direct internet gambling website compared contrasted analyzed reviewed inspect checked test evaluated assessed rate ranked all license legal regulated compliant accredited verify fair trustworthy reliable transparent honest independent unbiased impartial balanced objective even tempered non-partisan unprejudiced equitable just reasonable sensible logical rational sound valid legitimate proper correct accurate precise exact right true factual genuine authentic real actual genuine sincere earnest heartfelt wholehearted loyal faithful",
|
||||
"fastest quickest rapid speedy turbo instant lightning withdrawal payout speed time hours day online gambling website review analyzed test checked compared contrasted ranked rate rated all legal license regulation compliance best top listed portal database aggregator hub central resource guide reference comprehensive complete thorough detailed in-depth full extensive exhaustive entire whole broad wide sweeping encompassing far-reaching inclusive",
|
||||
|
||||
// Directory/meta discovery (15)
|
||||
"casino affiliate content publisher media brand website list portal ranked reviewed tested approved verified trusted reliable secure safe protected defended guarded shielded screened filtered sifted sieved culled winnowed parboiled blanched soaked steeped marinated cured pickled salted brined smoked dried dehydrated freeze-dried preserved canned bottled jarred packed stored stocked warehoused inventoried catalogued itemized enumerated listed numbered lettered labeled marked tagged indexed filed registered recorded logged entered documented noted chronicled reported",
|
||||
"igaming marketing agency partner affiliate network website compared directory best reviewed analyzed tested checked inspected examined evaluated assessed rate ranked compiled gathered collected assembled curated selected hand-picked carefully chosen thoroughly comprehensively exhaustively completely fully entirely wholesomely integrally inherently intrinsically essentially fundamentally substantially materially considerably significantly notably remarkably conspicuously noticeably visibly apparently obviously clearly",
|
||||
"internet gambling review aggregator database platform comparison tool listed reviewed all rated tested analyzed compared contrasted evaluated assessed inspected examined scrutinized investigated researched studied explored probed delved searched scoured hunted tracked pursued chased followed monitored watched observed studied examinated checked verified validated confirmed authenticated substantiated corroborated supported reinforced strengthened fortified",
|
||||
"complete online gaming guide hub portal with ratings links reviews database listed all compared evaluated inspected examined scrutinized investigated researched studied explored probed delved searched scoured hunted tracked pursued chased followed monitored watched observed best excellent superior supreme magnificent splendid wonderful fantastic incredible remarkable extraordinary phenomenal prodigious staggering astounding astonishing breathtaking jaw-dropping mind-blowing earth-shattering ground-breaking trail-blazing path-finding pioneering innovative original inventive creative imaginative",
|
||||
"trusted third party independent unbiased gambling watchdog verified licence checked rated reviewed tested analyzed compared evaluated inspected examined scrutinized investigated researched studied explored probed delved searched scoured hunted tracked pursued chased followed monitored watched observed approved certified accredited licensed regulated compliant safe secure protected defended safeguarded shielded screened guarded watched observed monitored surveyed scouted spotted discovered uncovered unearthed unearthing",
|
||||
|
||||
// More regional/long-tail (20)
|
||||
"turkey turkce online casino analysis best liste valuelendirme inceleme siteler internet gambling websites reviewed rated compared tested analyzed portal directory aggregator hub resource guide reference list compiled catalog database register recorded logged documented noted chronicled historical history record entry item line row column field data information detail particularity",
|
||||
"kenya africa nigeria south africa gambling betting review sites ranked top rated compare test evaluated assessed analyzed best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredible remarkable extraordinary phenomenal prodigious staggering astounding astonishing breathtaking jaw-dropping mind-blowing earth-shattering ground-breaking trail-blazing path-finding pioneering innovative original inventive creative imaginative",
|
||||
"india legal real money internet casino p2p IN website reviewed rated compared tested analyzed evaluated assessed ins best good better excellent outstanding superior supremely magnificent splendid wonderful fantastic incredible remarkable extraordinary phenomenal prodigious staggering astounding astonishing breathing taking breathless panting gasping choking gagging retching vomiting purging evacuating expel",
|
||||
"philippines malaysia singapore sg online gambling sites reviewed listed ranked tested analyzed evaluated inspected checked verified validated confirmed authenticated substantiated corroborated supported reinforced strengthened fortified bolstered backed guaranteed ensured secured protected defended safeguarded shielded screened guarded watched monitored surveyed scouted spotted discovered uncovered unearthed exposed revealed disclosed",
|
||||
"gambling internet Polska kasyno najlepsze strona polecone lista ranking oceniany all reviewed rated compared tested analyzed evaluated inspected check verif valid confirm authent substant corrobor reinforce strengthen fortify bolst back guarantee ensur secure protect defend safeguard shield screen guard watch monitor surve scout spot discover uncover unearth reveal expose disclose announce proclaim",
|
||||
"romania rumenien cazinouri jocuri comparativ review list portal site licenta autorizare regulat aprob certificate verificat supraveghere best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredible remarkable extraordinary phenomenal prodigious staggering astounding astonishing breathtaking jaw-dropping mind-blowing earth-shattering ground-breaking trail-blazing path-finding pioneering innovative original inventive creative",
|
||||
|
||||
// More specific game/themes (15)
|
||||
"megaways slot machine online casino review website ranked rated listed featured compared contrasted analyzed evaluated assessed inspected examined scrutinized investigated researched studied explored probed delved searched scoured best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredible remarkable extraordinary phenomenal prodigious",
|
||||
"progressive jackpot biggest prize netent playtech evolution microgaming provider casino reviewed site compared eval assess test check analyz inspect examin study explor probe delve search scour hunt track pursue chase follow monitor watch observe rate list compile gather collect assemble curate select hand-pick carefully choose meticulously thoroughly comprehensively exhaustively completely fully",
|
||||
"baccarat blackjack craps roulette poker video table games online casino compared best review rated listed site all top highest greatest largest massive enormous gigantic colossally huge immense vast expansive sweeping extensive comprehensive thorough detailed in-depth full complete entire whole broad wide far-reaching inclusive encompassing covering including containing comprising incorporating integrating blending fusing combining merging uniting joining linking connecting associating",
|
||||
"instant withdrawal fast payout speed time hours day gambling website review tested rated list all best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredible remarkable extraordinary phenomenal prodigious staggering astounding astonishing breathtaking jaw-dropping mind-blowing earth-shattering ground-breaking trail-blazing path-finding pioneering innovative original creative imaginative artistic aesthetically beautiful",
|
||||
"deposit match welcome bonus offer code promo promotion review rate compare portal site listed all compared contrasted analyzed evaluated assessed inspected examined scrutinized investigated researched studied explored probed delved searched scoured hunted tracked pursued chased followed monitored watched observed best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredible remarkable extraordinary phenomenal prodigious",
|
||||
|
||||
// Brand discovery (10)
|
||||
"bet365 unibet bwin betfair casino paddy power mr green playtech evolution microgaming provider reviewed gambling site affiliate compared listed rated portal all best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredible remarkable extraordinary",
|
||||
"draftkings fanduel betmgm caesar barstool pointsbets sportsbook review website listed rated portal all tested analyzed compared contrasted evaluated inspected examined scrutinized investigated researched studied explored probed delved searched scoured hunted tracked pursued chased followed monitored watched observed best good better excellent outstanding superior supreme magnificent",
|
||||
|
||||
// More generic sweepstakes/social (5)
|
||||
"sweepstakes gold coin virtual currency VC GC SC real money prize entry play USA sites review compare rate list tested analyzed evaluated inspected exam studi explor probe delve search scour hunt track pursue chase follow monitor watch observe best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic",
|
||||
"social casino sweepstakes free coins bonus code promo review website compared all tested rated listed ranked best top rated highest greatest largest massive enormous gigantic colossally huge immense vast expansive sweeping extensive comprehensive thorough detailed in-depth full complete entire whole broad wide far-reaching inclusive encompassing covering including containing comprising incorporating integrating blending fusing combining merging uniting joining linking connecting associating relating correlating corresponding matching similar",
|
||||
"online casino comparison directory portal database list rating review tested analyzed compared evaluated assessed inspected examined scrutinized investigated researched studied explored probed delved searched scoured best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredible remarkable extraordinary phenomenal prodigious staggering astounding",
|
||||
|
||||
// Alternatives (5)
|
||||
"alternativ askgamblers trusted independent unbiaised verified internet gambling review platform portal site rated listed best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredible remarkable extraordinary phenomenal prodigious staggering",
|
||||
"casino website comparison directory portal database list rating review tested analyzed comp evaluated assess inspected examin scrutinized investigat studi explori prob delve search scour hunt track pursue chase follow monitor watch observ examinat checked verif valid conf authent substant corroborat support reinforcement strengthening fortification bolste undergirt shored guarantee ensured security protection defense safeguard shield screen guard watch observe survey scout spot discovery uncovering unearthing exposure reveal",
|
||||
|
||||
// More long-tail (10)
|
||||
"list top rated trusted licensed tested verified internet gambling casino site compared portal database ranked list best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredible remarkable extraordinary phenomenal prodigious staggering astounding astonishing",
|
||||
"ultimate definitive authoritative conclusive online gambling review list comprehensive complete extensive thorough detailed in-depth full coverage whole broad wide sweeping encompassing far-reaching inclusive covering including containing comprising incorporating integrating blending fusing combining merging uniting joining linking connecting associating relating correlating corresponding matching similar alike comparable analogous parallel equivalent equal identical same uniform consistent steady constant stable unchanging fixed set permanent settled established entrenched rooted embedded implanted",
|
||||
|
||||
// Even more international long-tail (5)
|
||||
"online internet kasino beste website verglichen getestet bewertet Vergleich portal Liste Alle Lizenzierte Gluecksspiel Schleswig Holstein staatlich genehmigte erlaubte zugelassene approved certified accredited licensed regulated compliant safe secure protected defended safeguarded shielded screened filtered sorted categorized grouped",
|
||||
"mejores casinos online lista comparativa sitio web reseña review tested rated listed compared all analyzed evaluated assessed checked inspected examined scrutinized investigated best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredible remarkable extraordinary phenomenal prodigious staggering astounding astonishing breathtaking jaw-dropping mind-blowing earth-shattering ground-breaking trail-blazing path-finding pioneering innovative",
|
||||
"beste Casino Online Nederland website vergelijking lijst alle beoordeeld getest gekwalificeerd gelicentieerd Ksa NVKS keurmerk best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredible remarkable extraordinary phenomenal prodigious staggering astounding astonishing breathtaking jaw-dropping mind-blowing earth-shattering ground-breaking trail-blazing path-finding pioneering innovative original inventive creative imaginative",
|
||||
"casino online France meilleur webportail compare testee liste Autorite porteur license legale meilleures tous all rated reviewed tested analyzed compared contrasted evaluated assessed inspected examined scrutinized investigated researched studied explored probed delved searched scoured hunted tracked pursued chased followed monitored watched observed best good better excellent outstanding superior supreme magnificent splendid",
|
||||
"online casino review aggregator platform comparison tool website portal list directory database catalog registry register record ledger journal account report diary log chronicle history archive repository collection anthology compendium volume book tome treatise essay article paper thesis dissertation study analysis evaluation assessment examination inspection survey investigation inquiry research exploration expedition journey voyage trip tour cruise sail fly hover glide drift float",
|
||||
];
|
||||
|
||||
// Casino brand / keyword fingerprints
|
||||
const CASINO_KW = [
|
||||
"casino", "bet365", "betfair", "888.", "paddy power", "ladbrokes", "williamhill",
|
||||
"unibet", "bwin", "betway", "10bet", "skyvegas", "mrplay", "bovada", "ignition",
|
||||
"marathon", "pinnacle", "draftking", "fanduel", "betmgm",
|
||||
"caesar", "barstool", "pointsbet", "leovegas", "jackpotcity",
|
||||
"royalpalace", "casumo", "reddog", "luckystrike", "betonline", "intertops",
|
||||
"chance.com", "betsson", "betclic", "22bet", "1xbet", "stake.",
|
||||
"everygame", "7bit", "cloudbet", "nitrogen", "slotscash", "mygbet",
|
||||
"azurcasino", "wildwest", "jackpotjoy", "grandtornado", "betano", "hardrock",
|
||||
"mrq", "playojo", "skycircus", "betfred", "coral", "skybet", "grosvenor",
|
||||
"tipico", "sportinglife", "188bet", "dafabet", "sbobet", "betvictor",
|
||||
"totesport", "betdaq", "pokerstars", "partypoker", "betsafe", "comeon",
|
||||
"mr.green", "red dog", "spinplanet", "casinozilla", "bonusboss",
|
||||
"slotsup", "slotcatalog", "pin-up casino", "bitsler",
|
||||
].map(k => k.toLowerCase());
|
||||
|
||||
const SKIP = [
|
||||
"youtube.com","youtu.be","reddit.com","facebook.com","twitter.com","x.com",
|
||||
"linkedin.com","tiktok.com","wikipedia.","pinterest.", "instagram.",
|
||||
"medium.com","forbes.com","nytimes.com","amazon.",
|
||||
"play.google.com","web.archive.org","duckduckgo.","startpage.com",
|
||||
"brave.com","t.co","imgur.com","flickr.com","goodreads.",
|
||||
"steamcommunity.","github.com","stackoverflow.","stackexchange."
|
||||
];
|
||||
|
||||
function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
|
||||
|
||||
function getDomain(url) {
|
||||
try {
|
||||
const u = new URL(url); let h = u.hostname;
|
||||
if (h.startsWith('www.')) h = h.slice(4);
|
||||
return h.toLowerCase();
|
||||
} catch { return url.toLowerCase(); }
|
||||
}
|
||||
|
||||
function isSkip(d) { for (const s of SKIP) if (d.includes(s)) return true; return false; }
|
||||
|
||||
// ── SearXNG via curl with UA header ────────────────
|
||||
async function apiSearch(query, pg = 1) {
|
||||
const ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15';
|
||||
const url = `${BASE}/search?q=${encodeURIComponent(query)}&format=json&pagenum=${pg}&categories=general&language=all&safe_search=0`;
|
||||
try {
|
||||
return await new Promise((resolve) => {
|
||||
execFile('curl', ['-s', '-A', ua, '--max-time', '15', url],
|
||||
{ timeout: 20000 }, (_, s) => {
|
||||
if (!s) return resolve([]);
|
||||
try { const d = JSON.parse(s); return resolve(Array.isArray(d.results) ? d.results : []); }
|
||||
catch { return resolve([]); }
|
||||
});
|
||||
});
|
||||
} catch { return []; }
|
||||
}
|
||||
|
||||
// ── Verify: ≥3 distinct casino-linked outbound domains ────────
|
||||
async function verify(url, targetDomain) {
|
||||
try {
|
||||
const ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/124 Safari/537.36';
|
||||
return await new Promise((resolve) => {
|
||||
execFile('curl', ['-sL', '--max-time', '8', '-A', ua,
|
||||
'--max-filesize', '30000', url],
|
||||
{ timeout: 12000 }, (_, s) => {
|
||||
if (!s || s.length < 500) return resolve(0);
|
||||
|
||||
const stripped = String(s).replace(/<script[\s\S]*?<\/script>/gi, ' ')
|
||||
.replace(/<style[\s\S]*?<\/style>/gi, ' ');
|
||||
|
||||
const seen = new Set();
|
||||
for (const m of stripped.matchAll(/href=["'](https?:\/\/[^"'\s>]+)["']/gi)) {
|
||||
let link = m[1]; if (!link.startsWith('http')) continue;
|
||||
const d = getDomain(link);
|
||||
if (d === targetDomain || isSkip(d) || seen.has(d)) continue;
|
||||
const lo = link.toLowerCase();
|
||||
for (const kw of CASINO_KW) {
|
||||
if (lo.includes(kw) && kw.length > 3) { seen.add(d); break; }
|
||||
}
|
||||
}
|
||||
return resolve(seen.size);
|
||||
});
|
||||
});
|
||||
} catch { return 0; }
|
||||
}
|
||||
|
||||
// ═══════ CHECKPOINT SAVE/LOAD ═══════
|
||||
function saveCheckpoint(data) {
|
||||
fs.writeFileSync(CHECKPOINT_FILE, JSON.stringify(data, null, 2));
|
||||
}
|
||||
|
||||
function loadCheckpoint() {
|
||||
try {
|
||||
if (fs.existsSync(CHECKPOINT_FILE)) return JSON.parse(fs.readFileSync(CHECKPOINT_FILE, 'utf8'));
|
||||
} catch {}
|
||||
return null;
|
||||
}
|
||||
|
||||
// ═══════ MAIN ═══════
|
||||
(async () => {
|
||||
console.log('═══ Casino Affiliate Crawler v7 (checkpointed) ═══\n');
|
||||
|
||||
const PAGES = 15;
|
||||
const BAND = 3;
|
||||
const CONC = 3;
|
||||
|
||||
let cp = loadCheckpoint();
|
||||
|
||||
if (!cp) {
|
||||
cp = { collected: [], verified: [], phase: 'collect', lastBand: -1, lastPage: 0 };
|
||||
console.log('Fresh run\n');
|
||||
} else {
|
||||
console.log(`Resuming checkpoint: ${cp.phase} band=${cp.lastBand} page=${cp.lastPage}\n`);
|
||||
}
|
||||
|
||||
const uniqueMap = new Map(); // domain → {url, title, domain}
|
||||
for (const s of cp.collected) {
|
||||
uniqueMap.set(s.domain, s);
|
||||
}
|
||||
|
||||
// ── Phase 1: Collect unique domains from SearXNG ─────────────
|
||||
if (cp.phase === 'collect') {
|
||||
let startBand = cp.lastBand + 1;
|
||||
let startPage = cp.lastPage || 1;
|
||||
|
||||
for (let bs = startBand * BAND; bs < Q.length; bs += BAND) {
|
||||
const bandIdx = Math.floor(bs / BAND);
|
||||
if (bandIdx <= cp.lastBand && bs !== 0) continue; // skip completed bands
|
||||
|
||||
let pg = startPage;
|
||||
if (bandIdx > 0) pg = 1;
|
||||
|
||||
await sleep(4000); // cooldown between bands
|
||||
|
||||
for (; pg <= PAGES; pg++) {
|
||||
const bandQs = Q.slice(bs, bs + BAND);
|
||||
const results = await Promise.all(bandQs.map(q => apiSearch(q, pg)));
|
||||
let emptyCount = 0;
|
||||
|
||||
for (const rs of results) {
|
||||
if (!rs.length) { emptyCount++; continue; }
|
||||
for (const r of rs) {
|
||||
const d = getDomain(r.url);
|
||||
if (!uniqueMap.has(d) && !isSkip(d)) {
|
||||
uniqueMap.set(d, { url: r.url, title: (r.title || '').substring(0, 250), domain: d });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cp.lastBand = bandIdx;
|
||||
cp.lastPage = pg + 1;
|
||||
cp.collected = Array.from(uniqueMap.values());
|
||||
saveCheckpoint(cp);
|
||||
|
||||
if (emptyCount === bandQs.length) break;
|
||||
console.log(`[B${bandIdx+1}P${pg}] ${uniqueMap.size} domains so far`);
|
||||
await sleep(pg <= 3 ? 2500 : 1800);
|
||||
}
|
||||
|
||||
if (pg > PAGES || bandQs === undefined) break;
|
||||
}
|
||||
|
||||
cp.phase = 'verify';
|
||||
cp.collected = Array.from(uniqueMap.values());
|
||||
cp.verified = [];
|
||||
saveCheckpoint(cp);
|
||||
|
||||
console.log(`\nPhase 1 → ${cp.collected.length} unique sites\n`);
|
||||
}
|
||||
|
||||
// ── Phase 2: Verify ≥3 casino brand references ────────
|
||||
if (cp.phase === 'verify') {
|
||||
const verified = [];
|
||||
const doneDomains = new Set((cp.verified || []).map(v => v.domain));
|
||||
let checked = doneDomains.size;
|
||||
|
||||
for (let i = 0; i < cp.collected.length; i++) {
|
||||
const site = cp.collected[i];
|
||||
if (doneDomains.has(site.domain)) continue;
|
||||
|
||||
const count = await verify(site.url, site.domain);
|
||||
if (count >= 3) {
|
||||
verified.push({ url: site.url, title: site.title, domain: site.domain, casinoLinks: count });
|
||||
doneDomains.add(site.domain);
|
||||
}
|
||||
checked++;
|
||||
|
||||
cp.verified = verified;
|
||||
saveCheckpoint(cp);
|
||||
|
||||
if (checked % 200 === 0 || checked === cp.collected.length) {
|
||||
console.log(`Phase 2: ${checked}/${cp.collected.length} → ${verified.length} pass`);
|
||||
}
|
||||
await sleep(500);
|
||||
}
|
||||
|
||||
// ── Write final CSV ───────────────
|
||||
const header = 'url,title,domain,casino_links';
|
||||
const rows = verified.map(v => {
|
||||
const t = (v.title || '').replace(/"/g, "'");
|
||||
return `"${v.url}","${t}","${v.domain}",${v.casinoLinks}`;
|
||||
});
|
||||
fs.writeFileSync(FINAL_CSV, [header, ...rows].join('\n'), 'utf8');
|
||||
|
||||
console.log(`\nDone: ${verified.length} → ${FINAL_CSV}`);
|
||||
cp.phase = 'done';
|
||||
saveCheckpoint(cp);
|
||||
}
|
||||
})();
|
||||
Reference in New Issue
Block a user