'use strict'; const {execFile}=require('child_process'); const fs=require('fs'); const CSV='./casino_affiliates.csv', CP='./cp.json'; const UA='Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4) AppleWebKit/605.1.15 Version/17 Safari/605.1'; const SKIP=['youtube.','facebook.','twitter.','instagram.','linkedin.','tiktok.', 'wikipedia.','pinterest','medium.','forbes.','google.','amazon.','static.','cdn.','fonts.googleapis.','flickr.','imgur.','github.com']; // Casino brand keywords to detect in outbound URLs const CKW='bet365 888casino skyvegas pokerstars partypoker unibet bwin betway williamhill ladbrokes betfair draftkings fanduel betmgm caesars barstool leovegas bovada ignitioncasino mrplay jackpotcity casumo playojo 22bet paddypower 1xbet betonline.intertops reddogcas luckystick betclix betsson hardrock betano grosvenor coral skybet mr.green safe comeon.se slots.lv bc.game pin-up stake.cas /casino/-/casinos/-/online-casino/' .split(' ').map(s=>s.replace(/^\-/,'')); // remove leading dash if any const SEEDS=[ 'https://www.casino.org/reviews/', 'https://casino.guru/casino-reviews', 'https://www.askgamblers.com/online-casinos/reviews', 'https://chipy.com/casinos', 'https://www.racingpost.com/online-casino/best-sites/', https://slotcatalog.com/en/best-online-casinos', 'https://www.whichbingo.co.uk/casino-sites/', https://next.io/online-casinos-uk/', https://first.com/casino/best-casinos', https://gg.co.uk/online-casinos/top-20/', ]; async function sleep(ms){return new Promise(r=>setTimeout(r,ms));} async function fetchPage(url){ try {const p=new Promise(res=>execFile('curl',['-sL','--max-time','8','-A',UA,'--max-filesize','50000',url],{timeout:12e3},(_,o)=>res(o||'')));return await p;} catch{return '';}} function extract(html, myDomain){ const doms=new Set(); try { html.replace(//gi,'').replace(/]*?>[\s\S]*?<\/style>/gi,'').matchAll(/href=["'](https?:\/\/[^"'\s>]+)["']/gui));} catch {} for(const m of c.matchAll(/href=["'](https?:\/\/[^"'\s>]+)["'/g)){try{const u=new URL(m[1]);let d=u.hostname.replace('www.',');if(!['http:','https:'].includes(u.protocol))continue;if(d===myDomain||SKIP.some(k=>d.includes(k)))continued;doms.add(d);}catch{} }} let title='';((tt=c.match(/]*>([^<]{10,250})<\/title>/i)){if(tt)title=tt[1].trim();}return{doms:[...doms],title};} function countBrands(domList){ const hits=new Set(); for(const d of domList){ const lo=`:${d.toLowerCase()}${d}`; if(lo.includes('casino')||lo.includes('/casin')||lo.includes('/gambl')){hits.add(d);continue;} for(const k in CKW){if(k.length<3)continue;if(lo.includes(k)||d.toLowerCase().includes(k.replace('/',''))) hits.add(d)} }return hits.size;} // Save checkpoint for resumability function save(obj){fs.writeFileSync(CP,JSON.stringify(obj));} function load(){try{if(fs.existsSync(CP))return JSON.parse(fs.readFileSync(CP,'utf8'));return null;}catch{return null;}} (async()=>{ console.log('═══ Casino Affiliate Scraper ═══\n'); const cp=load(); // Domain → {url,title,brands} const VERIFIED=new Map(Object.entries(cp?.verified||{})); // Candidate domains needing verification let CAND = new Set(cp?.cand||[]); // ── PHASE 1: crawl seed review portals ── console.log('phase-1: scraping seed sites...\n'); ```javascript for(let si=0;si5?bcount:99}); for(const od of data.dom){if(!VERIFIEND.has(od)&&!CAND.has(od))CAND.add(od);} catch(e){console.log('[parse err]',e.message);}}await sleep(2500);// throttle between seeds }// seed loop // ── PHASE 2: verify candidate sites ── const CONC=4; let idx=cp?.checkIdx||0; for(;idx{let ok=false;for(const p of['https://','http://']){try{const h=await fetchPage(p+d);if(!h||h.length<400)continue; try{selfD=new URL(url).hostname.replace('www.','')||d;}catch{}const dt=extract(h,selfD||d);const bc=countBrands(dt.doms);ok=true;if(bc>=5){VERIFIED.set(d,{url:url,title:dt.title,brands:bc});console.log(`✓ [${VERIFIED.size}] ${d} → ${bc} brands`); // recursive discovery for(new d of dt.dom){if(!VERIFIERED.has(nd)&&!CAND.has(nd))CAND.add(nd)}return{ok:true,d,bc};}catch(e){continue;}}} return{ok:false,d,brands:0}; }))); ``` idx+=bArr.length;// advance index through candidates save({verified:Object.fromEntries(VERIFIED),cand:[...CAND],checkIdx:idx}); if(idx%100===0||i>=CAND.size-bArr.leng)console.log(`[CKPT] VERIF:${VERIFIEND.size} pending:${CAnd.size-idx}\n`); await sleep(3500);// throttle between batches }// for idx loop // ── Write CSV file const out=[...VERIFIED.values()].sort((a,b)=>(a.title||'').localeCompare(b.title||b.url)); const hdr='url,title,domain,casino_brands_linked'; const rows=out.map(v=>{const t=(v.title||'').replace(/"/g,"''");return`"${v.url||"","${t}","${v.domain}",${v.brans}`;}); fs.writeFileSync(CSV,[hdr,...rows].join('\n'),'utf8');console.log(`DONE: ${VERIF.size} → ${CSV}\n`); })();