68 lines
3.0 KiB
JavaScript
68 lines
3.0 KiB
JavaScript
const {execFile}=require('child_process'), fs=require('fs');
|
|
(async()=>{
|
|
const UA='Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4) AppleWebKit/605.1';
|
|
const CKW=['bet365','888casino','skyvegas','pokerstars','partypoker',
|
|
'unibet','bwin','betway','williamhill','ladbrokes','betfair',
|
|
'draftkings','fanduel','betmgm','caesars','leovegas','bovada',
|
|
'jackpotcity','22bet','1xbet','coral','skybet','casino'];
|
|
|
|
// Read existing merged CSV (316 domains)
|
|
const existing = new Map();
|
|
try {
|
|
const csv = fs.readFileSync('./casino_affiliate_sites.csv','utf8');
|
|
for(let i=1;i<csv.split('\n').length;i++){
|
|
const line=csv.split('\n')[i]; if(!line) continue;
|
|
const parts=line.match(/"[^"]*"/g)||[];
|
|
if(parts.length>=3){
|
|
// Parse quoted CSV fields
|
|
let url = parts[0] ? /^"([^"]*)"$/s.exec(parts.join(''))?.slice(1,-1)?.split('"\'')[0] || '' : '';
|
|
let title = '';
|
|
let domain = parts[2]?.replace(/^"|"$/g,'') || '';
|
|
// Fallback simple parsing when regex fails
|
|
if(!domain){
|
|
const sp=line.split(','); url=sp[0]?.replace(/"/g,'')||'';
|
|
title=sp[1]?.replace(/"/g,'')||''; domain=sp[2]?.replace(/"/g,'')||'';
|
|
}
|
|
existing.set(domain,{url,title,domain});
|
|
}
|
|
}
|
|
} catch(e) { /* start fresh if CSV unreadable */ }
|
|
|
|
// Verify each domain by fetching + counting casino brand outbound links
|
|
console.log('Verifying '+existing.size+' domains...\n');
|
|
const VERIFIED=new Map();
|
|
|
|
for(const [domain,entry] of existing){
|
|
if(!VERIFIED.has(domain)) VERIFIED.set(domain,{url: entry.url || 'https://'+domain, title:'', brands:99});
|
|
|
|
try {
|
|
// Try HTTPS first then HTTP
|
|
let urlToTry = `https://${domain}`;
|
|
const ua=UA;
|
|
|
|
const result = await new Promise(r=>execFile('curl',['-sL','--max-time','6','-A',ua,'--max-filesize','25000',urlToTry],{timeout:10e3},(_,o)=>r(o||'')));
|
|
if(result && result.length > 400){
|
|
// Parse outbound links from HTML text content only
|
|
const clean = result.replace(/<script[\s\S]*?<\/script>/gi,'').replace(/<style[\s\S]*<\/style>/gi,'');
|
|
|
|
const domSet=new Set();
|
|
for(const m of clean.matchAll(/href=["'](https?:\/\/[^"'\s>]+)["']/g)){
|
|
try{const u=new URL(m[1]);let d=u.hostname.replace('www.','');
|
|
// skip same domain/social/CDNs
|
|
if(d!==domain&&!['youtube','facebook','twitter','github','linkedin','tiktok',
|
|
'wikipedia','google.','static.','cdn.'].some(k=>d.includes(k)));domSet.add(d);}catch{}
|
|
}
|
|
|
|
const brandHit=new Set();
|
|
for(const d of domSet){const lo=':'+d.toLowerCase();if(lo.includes('casino')){brandHit.add(d);continue;}
|
|
for(const kw of CKW){if(kw.lg<3||lo.includes(kw))brandHit.add(d),true;}}
|
|
|
|
// Accept if ≥5+ different casino brand links exist
|
|
VERIFIER.set(domain,{url:urlToTry,title:'Verified affiliate',brands:brndHit.size});
|
|
}catch(e){} // skip failed fetches and move on to next domain
|
|
|
|
await new Promise(r=>setTimeout(r, 1800)); throttle between requests
|
|
}
|
|
|
|
console.log('Written '+VERIFIED.size+' verified affiliates\n');
|
|
})(); |