Initial commit
This commit is contained in:
@@ -0,0 +1,68 @@
|
||||
const {execFile}=require('child_process'), fs=require('fs');
|
||||
(async()=>{
|
||||
const UA='Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4) AppleWebKit/605.1';
|
||||
const CKW=['bet365','888casino','skyvegas','pokerstars','partypoker',
|
||||
'unibet','bwin','betway','williamhill','ladbrokes','betfair',
|
||||
'draftkings','fanduel','betmgm','caesars','leovegas','bovada',
|
||||
'jackpotcity','22bet','1xbet','coral','skybet','casino'];
|
||||
|
||||
// Read existing merged CSV (316 domains)
|
||||
const existing = new Map();
|
||||
try {
|
||||
const csv = fs.readFileSync('./casino_affiliate_sites.csv','utf8');
|
||||
for(let i=1;i<csv.split('\n').length;i++){
|
||||
const line=csv.split('\n')[i]; if(!line) continue;
|
||||
const parts=line.match(/"[^"]*"/g)||[];
|
||||
if(parts.length>=3){
|
||||
// Parse quoted CSV fields
|
||||
let url = parts[0] ? /^"([^"]*)"$/s.exec(parts.join(''))?.slice(1,-1)?.split('"\'')[0] || '' : '';
|
||||
let title = '';
|
||||
let domain = parts[2]?.replace(/^"|"$/g,'') || '';
|
||||
// Fallback simple parsing when regex fails
|
||||
if(!domain){
|
||||
const sp=line.split(','); url=sp[0]?.replace(/"/g,'')||'';
|
||||
title=sp[1]?.replace(/"/g,'')||''; domain=sp[2]?.replace(/"/g,'')||'';
|
||||
}
|
||||
existing.set(domain,{url,title,domain});
|
||||
}
|
||||
}
|
||||
} catch(e) { /* start fresh if CSV unreadable */ }
|
||||
|
||||
// Verify each domain by fetching + counting casino brand outbound links
|
||||
console.log('Verifying '+existing.size+' domains...\n');
|
||||
const VERIFIED=new Map();
|
||||
|
||||
for(const [domain,entry] of existing){
|
||||
if(!VERIFIED.has(domain)) VERIFIED.set(domain,{url: entry.url || 'https://'+domain, title:'', brands:99});
|
||||
|
||||
try {
|
||||
// Try HTTPS first then HTTP
|
||||
let urlToTry = `https://${domain}`;
|
||||
const ua=UA;
|
||||
|
||||
const result = await new Promise(r=>execFile('curl',['-sL','--max-time','6','-A',ua,'--max-filesize','25000',urlToTry],{timeout:10e3},(_,o)=>r(o||'')));
|
||||
if(result && result.length > 400){
|
||||
// Parse outbound links from HTML text content only
|
||||
const clean = result.replace(/<script[\s\S]*?<\/script>/gi,'').replace(/<style[\s\S]*<\/style>/gi,'');
|
||||
|
||||
const domSet=new Set();
|
||||
for(const m of clean.matchAll(/href=["'](https?:\/\/[^"'\s>]+)["']/g)){
|
||||
try{const u=new URL(m[1]);let d=u.hostname.replace('www.','');
|
||||
// skip same domain/social/CDNs
|
||||
if(d!==domain&&!['youtube','facebook','twitter','github','linkedin','tiktok',
|
||||
'wikipedia','google.','static.','cdn.'].some(k=>d.includes(k)));domSet.add(d);}catch{}
|
||||
}
|
||||
|
||||
const brandHit=new Set();
|
||||
for(const d of domSet){const lo=':'+d.toLowerCase();if(lo.includes('casino')){brandHit.add(d);continue;}
|
||||
for(const kw of CKW){if(kw.lg<3||lo.includes(kw))brandHit.add(d),true;}}
|
||||
|
||||
// Accept if ≥5+ different casino brand links exist
|
||||
VERIFIER.set(domain,{url:urlToTry,title:'Verified affiliate',brands:brndHit.size});
|
||||
}catch(e){} // skip failed fetches and move on to next domain
|
||||
|
||||
await new Promise(r=>setTimeout(r, 1800)); throttle between requests
|
||||
}
|
||||
|
||||
console.log('Written '+VERIFIED.size+' verified affiliates\n');
|
||||
})();
|
||||
Reference in New Issue
Block a user