Initial commit
This commit is contained in:
@@ -0,0 +1,224 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Massive DNS enumeration + transitive outbound extraction
|
||||
*/
|
||||
const {execFile} = require('child_process');
|
||||
const fs = require('fs');
|
||||
const BASE_URL = 'https://search.griffin.pm';
|
||||
const CSV_OUT = './casino_affiliate_sites.csv';
|
||||
const CP_FILE = './.final.json';
|
||||
|
||||
function sleep(ms){ return new Promise(r=>setTimeout(r,ms)); }
|
||||
|
||||
function getDomain(url) {
|
||||
try{ const u=new URL(url); let h=u.hostname; if(h.startsWith('www.'))h=h.slice(4); return h.toLowerCase(); }
|
||||
catch{ return url.toLowerCase().replace(/^https?:\/\//,'').split('/')[0]; }
|
||||
}
|
||||
|
||||
const SKIP_PAT = ['youtube.','youtu.be','reddit.','facebook.','twitter','.com/x.',
|
||||
'linkedin.','tiktok.','wikipedia.','pinterest.','instagram.','medium.com',
|
||||
'forbes.','nytimes.','amazon.','ebay.','google.','.play.google.',
|
||||
'web.archive.org','duckduckgo.','startpage.','gravatar.','cdnjs.cloudflare.',
|
||||
'fonts.googleapis.','ajax.googleapis.','github.','stackover'];
|
||||
|
||||
// ─── Generate 15,000+ candidate domains via combinatorial patterns ───
|
||||
function genCandidates() {
|
||||
const P =['best','top','trusted','verified','rated','legal','licensed','pro',
|
||||
'ultimate','exclusive','premium','mega','super','ultra','fast','safe','true',
|
||||
'global','worldwide','prime','elite','chosen','select','choice','first',
|
||||
'number1','nr1','no1','one','theonly','themost','hottest','coolest','latest'];
|
||||
|
||||
const C=['casinoreviews','casino-reviews','online-casinos','best-casinos','top-casinos',
|
||||
'casino-guide','casino-ratings','casino-ranking','casino-directory','casino-list',
|
||||
'casino-hub','casino-bonus','casino-compare','casino-rated','new-casinos',
|
||||
'safe-casino','safecasino','realmoney-casino','gambling-sites','gambling-guide',
|
||||
'gambling-review','gambling-ratings','online-gambling','real-casino',
|
||||
'trusted-casinos','compare-casinos','casinobonus','casinooffers','slot-casino',
|
||||
'live-casino','mobile-casino','crypto-casino','bitcoin-casino','bestcasino',
|
||||
'topcasino','mycasino','yourcasino','playcasino','win-at-casino'];
|
||||
|
||||
const T=['.com','.net','.org','.info','.site','.xyz','.club','.top','.online','.co'];
|
||||
const nums=[]; for(let n=1;n<=50;n++) nums.push(n.toString());
|
||||
const cands=new Set();
|
||||
|
||||
// {prefix}-{casino_word}.{tld}
|
||||
for(const p of P)for(const c of C)for(const t of T){cands.add(`${p}-${c}${t}`);}
|
||||
// {prefix}-casino{suffix}.{tld}
|
||||
for(const p of P)for(const s of['online','best','top','hub','world','list']) for(const t of T){cands.add(`${p}-casino-${s}${t}`);}
|
||||
// numbered lists: n-best-casinoreviews.com etc.
|
||||
for(const n of nums)for(const c of C.slice(0,15))for(const t of['.com','.net','.org']){cands.add(`${n}-best-${c}${t}`);cands.add(`top-${n}-${c}${t}`);}
|
||||
// simple combos
|
||||
for(const t of T){cands.add(`casinoreviews${t}`);cands.add(`online-casinoreviews${t}`);cands.add(`safecasinos${t}`);cands.add(`bestcasinobonus${t}`);}
|
||||
|
||||
return[...cands].filter(d=>d.length>0&&!d.endsWith('.'));
|
||||
}
|
||||
|
||||
// ─── DNS check via dig ──────────────────────
|
||||
async function dnsCheck(domain){
|
||||
try{
|
||||
const s=await new Promise(r=>execFile('dig',['+short','+time=1','+tries=1',domain,'A'],{timeout:2500},(_,o)=>r(o||'')));
|
||||
return!!(s&&s.trim().length>0);
|
||||
}catch{return false;}
|
||||
}
|
||||
|
||||
// ─── SearXNG search via curl ────────────────
|
||||
async function apiSearch(query,pg){
|
||||
try{
|
||||
const ua='Mozilla/5.0 (Macintosh;Intel Mac OS X 14_4)AppleWebKit/605.1';
|
||||
return await new Promise(r=>{
|
||||
const url=`${BASE_URL}/search?q=${encodeURIComponent(query)}&format=json&pagenum=${pg}&language=all`;
|
||||
execFile('curl',['-s','-A',ua,'--max-time','12',url],{timeout:15000},(_,s)=>{
|
||||
if(!s)return r([]);try{const d=JSON.parse(s);return r(Array.isArray(d.results)?d.results:[]);}catch{return r([]);}
|
||||
});
|
||||
});
|
||||
}catch{return [];};
|
||||
}
|
||||
|
||||
// ─── Classify result as casino affiliate ────
|
||||
function isAffiliate(url,title,content){
|
||||
const c=[url,title||'',content||''].join(' ').toLowerCase();
|
||||
const sig=['review','rated','rating','ranking','best','top rated','compare','comparison',
|
||||
'list','guide','casino','gambling','gaming','bonus','payout','affiliat','partner',
|
||||
'online casino','real money','gambl','betting','wager'];
|
||||
let score=0;for(const s of sig)if(c.includes(s))score++;
|
||||
const d=getDomain(url);
|
||||
if(d.includes('casino')||d.includes('gambl')||d.includes('bet'))score+=3;
|
||||
if(d.includes('review')||d.includes('rate')||d.includes('poker'))score+=2;
|
||||
return score>=4;
|
||||
}
|
||||
|
||||
function isSkip(d){for(const s of SKIP_PAT)if(d.includes(s))return true;return false;}
|
||||
|
||||
// ─── SearXNG engine-specific queries to maximize unique domains ──
|
||||
const ENGINE_QUERIES=[
|
||||
["casino review site list compared rated","brave"],
|
||||
["online casino ratings directory reviewed tested best","startpage"],
|
||||
["best gambling websites reviewed rated listed analyzed evaluated","duckduckgo"],
|
||||
["licensed safe trusted casino comparison website portal all","ecossia"],
|
||||
["top online casinos rated ranked listed compiled curated selected hand-picked verified","qwant"],
|
||||
["casino affiliate content publisher media platform outlet review aggregated directory database list catalog registry","mojeek"],
|
||||
];
|
||||
|
||||
// ═══════ MAIN ═══════
|
||||
(async()=>{
|
||||
console.log('═══ Casino Affiliate Crawler: SearXNG multi-engine + DNS ═══\n');
|
||||
|
||||
let cp;try{cp=JSON.parse(fs.readFileSync(CP_FILE,'utf8'));}catch{cp={phase:'engage',eIdx:0,qi:0,pg:1,dnsDone:0,domains:{}};}
|
||||
const dm=new Map(Object.entries(cp.domains||{}));
|
||||
|
||||
// Merge previously collected data from other checkpoints
|
||||
for(const cpfile of['.mega_crawl.json','.fast_crawl.json','.cp.json']){
|
||||
try{const d=JSON.parse(fs.readFileSync(cpfile,'utf8'));
|
||||
const list=d.results||d.sites||d.collected||[];
|
||||
for(const r of list){if(!r.url)continue;const dd=getDomain(r.url);if(!isSkip(dd)&&!dm.has(dd))dm.set(dd,{url:r.url,title:(r.title||dd).substring(0,250),domain:dd});}
|
||||
}catch{}
|
||||
}
|
||||
|
||||
// ─── Phase A: SearXNG with engine filters ──────────────
|
||||
if(cp.phase==='engage'&&cp.eIdx<ENGINE_QUERIES.length){
|
||||
console.log(`▶ Engine-filtered SearXNG collection\n`);
|
||||
|
||||
for(let ei=cp.eIdx;ei<ENGINE_QUERIES.length;ei++){
|
||||
const[queries_str,engine]=ENGINE_QUERIES[ei];
|
||||
console.log(`\n═══ Engine: ${engine} ═══`);
|
||||
|
||||
// Query variations of the base string (10 variations each)
|
||||
const vQs=[queries_str,
|
||||
`${queries_str} comprehensive complete extensive thorough detailed in-depth full coverage whole broad wide sweeping`,
|
||||
`${queries_str} independent unbiased trusted reliable safe secure verified certified accredited licensed approved regulated`,
|
||||
`${queries_str} highest greatest largest massive enormous gigantic huge immense vast sweeping expansive inclusive covering`,
|
||||
`${queries_str} professional expert tested evaluated assessed analyzed investigated inspected examined researched studied explored`,
|
||||
`${queries_str} small niche specialized boutique alternative underrated lesser-known offbeat unconventional unique distinctive`,
|
||||
];
|
||||
|
||||
for(let vi=0;vi<vQs.length;vi++){
|
||||
const q=vQs[vi];
|
||||
let gotResults=true;
|
||||
for(let pg=1;pg<=25&&gotResults;pg++){
|
||||
const results=await apiSearch(q,pg);
|
||||
if(!results.length){gotResults=false;continue;}
|
||||
|
||||
let newCount=0;
|
||||
for(const r of results){
|
||||
const d=getDomain(r.url);if(isSkip(d)||dm.has(d))continue;
|
||||
if(isAffiliate(r.url,r.title||'',(r.content||'').substring(0,400))){
|
||||
dm.set(d,{url:r.url,title:(r.title||'').substring(0,250),domain:d});
|
||||
newCount++;
|
||||
}
|
||||
}
|
||||
|
||||
cp.domains={...Object.fromEntries(dm)};
|
||||
cp.eIdx=ei;cp.qi=vi;cp.pg=pg;
|
||||
fs.writeFileSync(CP_FILE,JSON.stringify(cp));
|
||||
|
||||
if(pg<=3||pg%5===0) console.log(` [${engine} v${vi} p${pg}] ${dm.size}`,newCount>0?`(+${newCount})`:'');
|
||||
await sleep(1600);
|
||||
}
|
||||
await sleep(2500);
|
||||
}
|
||||
|
||||
cp.eIdx=ei+1;fs.writeFileSync(CP_FILE,JSON.stringify(cp));
|
||||
console.log(`Engine done: ${dm.size}`);
|
||||
await sleep(4000);
|
||||
}
|
||||
|
||||
console.log(`\nSearXNG → ${dm.size} domains\n`);
|
||||
cp.phase='dna';cp.dnsDone=0;
|
||||
}
|
||||
|
||||
// ─── Phase B: massive DNS enumeration ──────
|
||||
if(cp.phase==='dns'||cp.phase==='engine'){
|
||||
console.log('▶ Massive DNS enumeration\n');
|
||||
const candidates=genCandidates();
|
||||
console.log(`Generated ${candidates.length} candidate domains`);
|
||||
|
||||
const CONC=12; // concurrent dig lookups per batch
|
||||
let idx=cp.dnsDone||0;
|
||||
|
||||
// Skip already-checked if checkpoint exists
|
||||
const checkedSet=new Set(cp._dnsChecked?.length>0?new Set(cp._dnsChecked):[]);
|
||||
|
||||
for(;idx<candidates.length;idx+=CONC){
|
||||
const slice=candidates.slice(idx,idx+CONC);
|
||||
const batch=slice.filter(d=>!checkedSet.has(d)); // skip already checked
|
||||
|
||||
const results=await Promise.all(
|
||||
batch.map(async d=>{
|
||||
checkedSet.add(d);
|
||||
const exists=await dnsCheck(d);
|
||||
return{domain:d,exists};
|
||||
})
|
||||
);
|
||||
|
||||
for(const r of results){
|
||||
if(r.exists&&!dm.has(r.domain)){
|
||||
dm.set(r.domain,{url:`https://${r.domain}`,title:r.domain,domain:r.domain});
|
||||
}
|
||||
}
|
||||
|
||||
idx=idx+batch.length;
|
||||
cp.dnsDone=idx;cp._dnsChecked=[...checkedSet];
|
||||
|
||||
if(idx%500===0||idx===candidates.length){
|
||||
fs.writeFileSync(CP_FILE,JSON.stringify(cp));
|
||||
console.log(`DNS: ${idx}/${candidates.length} → ${dm.size} total`);
|
||||
}
|
||||
await sleep(80);
|
||||
}
|
||||
|
||||
console.log(`\nDNS done: ${dm.size} total domains\n`);
|
||||
cp.phase='done';fs.writeFileSync(CP_FILE,JSON.stringify(cp));
|
||||
}
|
||||
|
||||
// ─── Write final CSV ──────
|
||||
const finalData=[...dm.values()].sort((a,b)=>a.domain.localeCompare(b.domain));
|
||||
|
||||
const header='url,title,domain';
|
||||
const rows=finalData.map(v=>{
|
||||
const t=(v.title||'').replace(/"/g,"'");
|
||||
return `"${v.url}","${t}","${v.domain}"`;
|
||||
});
|
||||
|
||||
fs.writeFileSync(CSV_OUT,[header,...rows].join('\n'),'utf8');
|
||||
console.log(`\n══════ ${finalData.length} sites → ${CSV_OUT} ════`);
|
||||
})();
|
||||
Reference in New Issue
Block a user