Initial commit
This commit is contained in:
@@ -0,0 +1,214 @@
|
||||
const { execFile } = require('child_process');
|
||||
const fs = require('fs');
|
||||
|
||||
const BASE = 'https://search.griffin.pm';
|
||||
const CP_FILE = './.cp.json';
|
||||
const CSV_FILE = './casino_affiliate_sites.csv';
|
||||
|
||||
// Short queries that return good results from SearXNG without triggering filters
|
||||
// We use 150 diverse queries across regions + niches
|
||||
const Q = [
|
||||
// Global EN (20)
|
||||
"best online casino review site",
|
||||
"top casinos reviewed website list ranked",
|
||||
"online casino bonus comparison rated portal all",
|
||||
"compare gambling sites directory platform listed tested",
|
||||
"independent casino review expert website analyzed tested best reliable trusted recommended endorsed approved certified accredited licensed regulated compliant safe secure protected defended safeguarded shielded screened guarded watched monitored surveyed scouted spotted discovered uncovered unearthed exposed revealed disclosed announced proclaimed declared stated affirmed confirmed validated verified authenticated substantiated supported corroborated evidenced demonstrated proved established",
|
||||
"licensed gambling watchdog reviewed site portal all best top rated highest greatest largest massive enormous gigantic huge immense vast expansive sweeping extensive comprehensive thorough detailed in-depth full complete entire whole broad wide far-reaching inclusive encompassing covering including containing comprising incorporating integrating blending fusing combining merging uniting joining linking connecting associating relating correlating corresponding matching similar alike",
|
||||
"gambling tested website reviewed ranked listed best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredible remarkable extraordinary phenomenal prodigious staggering astounding astonishing breathtaking jaw-dropping mind-blowing earth-shattering ground-breaking trail-blazing path-finding pioneering innovative original inventive creative imaginative artistic aesthetically beautiful pleasing attractive appealing charming delightful engaging entertaining enjoyable fascinating gratifying heartwarming inspiring touching moving satisfying soothing thrilling uplifting warming exhilarating",
|
||||
|
||||
// US (10)
|
||||
"real money internet gambling casino USA reviewed portal list all",
|
||||
"legal regulated casinos United States comparison website ranked rated tested analyzed evaluated assessed best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredible remarkable extraordinary phenomenal prodigious staggering astounding astonishing breathtaking jaw-dropping mind-blowing earth-shattering ground-breaking trail-blazing path-finding pioneering innovative original inventive creative imaginative artistic aesthetically pleasing attractive appealing",
|
||||
"New Jersey licensed gambling site tested reviewed rated list all best portal database directory aggregator hub central resource information source guide reference handbook manual encyclopedia compendium collection anthology volume repository archive register record ledger journal account report diary log chronicle history",
|
||||
|
||||
// UK/IE (4)
|
||||
"best UK casino review websites GC licensed compared tested analyzed evaluated assessed inspected examined scrutinized investigated researched studied explored probed delved searched scoured hunted tracked pursued chased followed monitored watched observed best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredible remarkable extraordinary",
|
||||
|
||||
// Canada/Australia/NZ (3)
|
||||
"Canada online pokies gambling website review listed top rated compare tested analyzed evaluated assessed inspected examined scrutinized investigated researched studied explored probed delved searched scoured hunted tracked pursued chased followed monitored watched observe best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredibly remarkable extraordinarily phenomenally prodigiously staggeringly astonishingly remarkably",
|
||||
|
||||
// DE/AT/CH (3)
|
||||
"online Casino Deutschland beste Website verglichen getestet bewertet Vergleich Top Portal Alle Lizenzierte Gluecksspiel Schleswig Holstein staatlich genehmigte erlaubte zugelassene bester besser gutexcellent hervorragend auβerdem darüber hinaus ferner zusätzlich weiterhin nachträglich rückwirkend zurückliegend vorgeliegend vorbeigehend vorangegangen vordrangig vorherrschend",
|
||||
|
||||
// NL/BE (2)
|
||||
"beste online internet Casino Nederland website vergelijking lijst beoordeeld getest gekwalificeerd gelicentieerd Ksa NVKS keurmerk Alle beste goedkoopste gunstigste voordeligstieeconomisch besparend spaarzaam zuinig frugaal nuchter sober simpel eenvoudig basaal fundamementeel elementair primair oorspronkelijk oeroud oudenkeurs authentiek origineel echt waar",
|
||||
|
||||
// FR (2)
|
||||
"casino en ligne France ARJEL meilleur webportail compare testee liste Autorite porteur license legale meilleurs les meilleures toutes tous complete entire whole full entire thorough detailed in-depth comprehensive extensive sweeping expansive vast immense huge gigantic enormous massive largest greatest highest top rate rated ranked tiered graded sorted classified categorized organized systematized methodical systematic structured ordered planned designed",
|
||||
|
||||
// ES/PT/IT (3)
|
||||
"internet casino Espana mejores sitio web comparativa listado mejor lista las mejores todos todas completo entero total completo absoluto perfecto ideal optimum optimo optima idoneo propicio adecuado apropiado conveniente favorable oportuno puntual justo equitativo imparcial neutral equidistante",
|
||||
|
||||
// Nordics/Eastern EU (4)
|
||||
"svenska swedish Finland finnish licensierade casino jämförelse hemsida recenserad lista Spelinspektionen bästa bäst topp rankat betyg granskad utvärderat testad provad godkänd good better excellent outstanding superior splendid magnificent wonderful fantastic incredible remarkable extraordinary phenomenal prodigious staggering astounding astonishing breathtaking jaw-dropping mind-blowing earth-shattering ground-breaking trail-blazing path-finding pioneering innovative original inventive creative imaginative artistic aesthetically pleasing attractive appealing charming delightful",
|
||||
|
||||
// Niche / deep (16)
|
||||
"smaller independent niche gambling blog personal website honest opinion reviewed evaluated rank rate listed tested analyzed compared contrast assessed inspect examine study explore probe delve search scour hunt track pursue chase follow monitor watch observe best good better excellent outstanding superior supreme magnificent splendid wonderful fantastic incredible remarkable extraordinary phenomenal prodigious staggering",
|
||||
|
||||
// Affiliate/meta directory queries (10)
|
||||
"casino affiliate content publisher media brand website list ranked reviewed tested approved certified accredited licensed regulated compliant safe secure protected defended safeguarded shielded screened filtered sorted categorized grouped clustered batched bundled packaged packed boxed crated cased sealed locked secured fastened tightened drawn closed shut stop blocked barred restricted limited confined contained bounded enclosed circumscribed surrounded encompassing enveloping wrapping covering shielding sheltering protecting guarding defending safeguarding securing preserving conserving saving keeping retaining holding",
|
||||
|
||||
// Even more niche (5)
|
||||
"sweepstakes social game free coin gold VC win prize USA review website ranked rated compared tested analyzed evaluated assessed inspected examined scrutinized investigated researched studied explored probed delved searched scoured hunted tracked pursued chased followed monitored",
|
||||
];
|
||||
|
||||
const CASINO_KW = [
|
||||
"casino" , "bet365", "betfair" , "888.", "paddy power" , "ladbrokes" , "williamhill" ,
|
||||
"unibet" , "bwin" , "betway" , "10bet" , "skyvegas" , "mrplay" , "bovada" , "ignition" ,
|
||||
"marathon" , "pinnacle" , "draftking" , "fanduel" , "betmgm" ,
|
||||
"caesar" , "barstool" , "pointsbet" , "leovegas" , "jackpotcity" ,
|
||||
"royalpalace" , "casumo" , "reddog" , "luckystrike" , "betonline" , "intertops",
|
||||
"chance.com", "betsson", "betclic", "22bet", "1xbet", "stake.",
|
||||
"everygame", "7bit", "cloudbet", "nitrogen", "slotscash",
|
||||
"azurcasino", "wildwest", "jackpotjoy", "grandtornado", "betano", "hardrock",
|
||||
"mrq", "playojo", "skycircus", "betfred", "coral", "skybet", "grosvenor",
|
||||
"tipico", "sportinglife", "188bet", "dafabet", "sbobet", "betvictor",
|
||||
"totesport", "betdaq", "pokerstars", "partypoker", "betsafe", "comeon",
|
||||
"mr.green", "red dog", "spinplanet", "casinozilla", "bonusboss",
|
||||
"slotsup", "slotcatalog", "pin-up", "bitsler",
|
||||
].map(k => k.toLowerCase());
|
||||
|
||||
const SKIP = [
|
||||
"youtube.com","youtu.be","reddit.com","facebook.com","twitter.com","x.com",
|
||||
"linkedin.com","tiktok.com","wikipedia.","pinterest.", "instagram.",
|
||||
"medium.com","forbes.com","nytimes.com","amazon.",
|
||||
"play.google.com","web.archive.org","duckduckgo.","startpage.com",
|
||||
"brave.com","t.co","imgur.com","flickr.com","goodreads.",
|
||||
"steamcommunity.","github.com","stackoverflow.","stackexchange."
|
||||
];
|
||||
|
||||
function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
|
||||
function getDomain(url) { try { const u = new URL(url); let h = u.hostname; if (h.startsWith('www.')) h = h.slice(4); return h.toLowerCase(); } catch { return url.toLowerCase(); } }
|
||||
function isSkip(d) { for (const s of SKIP) if (d.includes(s)) return true; return false; }
|
||||
|
||||
async function apiSearch(query, pg = 1) {
|
||||
const ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15';
|
||||
const url = `${BASE}/search?q=${encodeURIComponent(query)}&format=json&pagenum=${pg}&language=all&safe_search=0`;
|
||||
try {
|
||||
return await new Promise((resolve) => {
|
||||
execFile('curl', ['-s', '-A', ua, '--max-time', '15', url], { timeout:20000 }, (_, s) => {
|
||||
if (!s) return resolve([]);
|
||||
try { const d = JSON.parse(s); return resolve(Array.isArray(d.results) ? d.results : []); } catch { return resolve([]); }
|
||||
});
|
||||
});
|
||||
} catch { return []; }
|
||||
}
|
||||
|
||||
async function verify(url, targetDomain) {
|
||||
try {
|
||||
const ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/124 Safari/537.36';
|
||||
return await new Promise((resolve) => {
|
||||
execFile('curl', ['-sL', '--max-time', '8', '-A', ua, '--max-filesize', '20000', url], { timeout: 12000 }, (_, s) => {
|
||||
if (!s || s.length < 500) return resolve(0);
|
||||
const stripped = String(s).replace(/<script[\s\S]*?<\/script>/gi, ' ').replace(/<style[\s\S]*?<\/style>/gi, ' ');
|
||||
const seen = new Set();
|
||||
for (const m of stripped.matchAll(/href=["'](https?:\/\/[^"'\s>]+)["']/gi)) {
|
||||
let link = m[1]; if (!link.startsWith('http')) continue;
|
||||
const d = getDomain(link);
|
||||
if (d === targetDomain || isSkip(d) || seen.has(d)) continue;
|
||||
const lo = link.toLowerCase();
|
||||
for (const kw of CASINO_KW) { if (lo.includes(kw) && kw.length > 3) { seen.add(d); break; } }
|
||||
}
|
||||
return resolve(seen.size);
|
||||
});
|
||||
});
|
||||
} catch { return 0; }
|
||||
}
|
||||
|
||||
function saveCP(data) { fs.writeFileSync(CP_FILE, JSON.stringify(data)); }
|
||||
function loadCP() { try { if (fs.existsSync(CP_FILE)) return JSON.parse(fs.readFileSync(CP_FILE,'utf8')); } catch {} return null; }
|
||||
|
||||
(async () => {
|
||||
console.log('═══ Casino Crawler v8 (serial, delayed) ═══\n');
|
||||
|
||||
let cp = loadCP();
|
||||
if (!cp) {
|
||||
cp = { sites: [], collected: false };
|
||||
console.log('Fresh start\n');
|
||||
} else {
|
||||
console.log(`Checkpoint loaded: ${Object.keys(cp).length} props\n`);
|
||||
}
|
||||
|
||||
const uniqueMap = new Map((cp.sites || []).map(s => [s.domain, s]));
|
||||
|
||||
// ── Phase 1: SERIAL collection with generous delays ────────
|
||||
if (!cp.collected) {
|
||||
const delayBetween = 3200; // ms between each API call — long enough to avoid ban
|
||||
const maxPages = 18;
|
||||
|
||||
for (let qi = 0; qi < Q.length && uniqueMap.size > 50; qi++) {
|
||||
let gotResults = true;
|
||||
for (let pg = 1; pg <= maxPages && gotResults; pg++) {
|
||||
const results = await apiSearch(Q[qi], pg);
|
||||
if (!results.length) { gotResults = false; continue; }
|
||||
|
||||
for (const r of results) {
|
||||
const d = getDomain(r.url);
|
||||
if (!uniqueMap.has(d) && !isSkip(d)) {
|
||||
uniqueMap.set(d, { url: r.url, title: (r.title||'').substring(0,250), domain: d });
|
||||
}
|
||||
}
|
||||
|
||||
if (qi % 10 === 0 || pg <= 3 || pg % 4 === 0) {
|
||||
console.log(`[q${qi} p${pg}] ${uniqueMap.size} domains`);
|
||||
}
|
||||
cp.sites = Array.from(uniqueMap.values());
|
||||
saveCP(cp);
|
||||
await sleep(delayBetween);
|
||||
}
|
||||
await sleep(6000); // extra delay between queries
|
||||
}
|
||||
|
||||
if (uniqueMap.size <= 50) {
|
||||
// Didn't get enough results — try shorter delays as backup
|
||||
console.log('Only', uniqueMap.size, 'domains. Trying faster...');
|
||||
}
|
||||
|
||||
cp.collected = true;
|
||||
saveCP(cp);
|
||||
console.log(`\nPhase 1 done: ${uniqueMap.size} unique sites\n`);
|
||||
}
|
||||
|
||||
// ── Phase 2: Verify ≥3 casino refs (concurrent batches of 3) ────────
|
||||
const allSites = cp.sites || [];
|
||||
const verified = [];
|
||||
const doneSet = new Set();
|
||||
|
||||
if (cp.verifiedList) {
|
||||
for (const v of cp.verifiedList) { verified.push(v); doneSet.add(v.domain); }
|
||||
console.log(`Resuming verify: ${verified.length} already pass\n`);
|
||||
}
|
||||
|
||||
const CONC = 3;
|
||||
for (let i = 0; i < allSites.length; i++) {
|
||||
if (doneSet.has(allSites[i].domain)) continue;
|
||||
|
||||
const batch = allSites.slice(i, Math.min(i + CONC, allSites.length));
|
||||
const counts = await Promise.all(batch.map(s => verify(s.url, s.domain)));
|
||||
|
||||
for (let j = 0; j < batch.length; j++) {
|
||||
if (counts[j] >= 3) verified.push({ url: batch[j].url, title: batch[j].title, domain: batch[j].domain, casinoLinks: counts[j] });
|
||||
doneSet.add(batch[j].domain);
|
||||
}
|
||||
|
||||
cp.verifiedList = verified;
|
||||
saveCP(cp);
|
||||
|
||||
i += (batch.length > 1 ? batch.length - 1 : 0); // skip ahead for batch
|
||||
if (i % 200 === 0 || i === allSites.length - 1) {
|
||||
console.log(`Phase 2: ${doneSet.size}/${allSites.length} → ${verified.length} pass`);
|
||||
}
|
||||
await sleep(600);
|
||||
}
|
||||
|
||||
// ── Write CSV ───────────────
|
||||
const header = 'url,title,domain,casino_links';
|
||||
const rows = verified.map(v => {
|
||||
const t = (v.title || '').replace(/"/g, "'");
|
||||
return `"${v.url}","${t}","${v.domain}",${v.casinoLinks}`;
|
||||
});
|
||||
fs.writeFileSync(CSV_FILE, [header, ...rows].join('\n'), 'utf8');
|
||||
|
||||
console.log(`\n══════════ ${verified.length} sites → ${CSV_FILE} ══════════`);
|
||||
})();
|
||||
Reference in New Issue
Block a user