225 lines
10 KiB
JavaScript
225 lines
10 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* Massive DNS enumeration + transitive outbound extraction
|
|
*/
|
|
const {execFile} = require('child_process');
|
|
const fs = require('fs');
|
|
const BASE_URL = 'https://search.griffin.pm';
|
|
const CSV_OUT = './casino_affiliate_sites.csv';
|
|
const CP_FILE = './.final.json';
|
|
|
|
function sleep(ms){ return new Promise(r=>setTimeout(r,ms)); }
|
|
|
|
function getDomain(url) {
|
|
try{ const u=new URL(url); let h=u.hostname; if(h.startsWith('www.'))h=h.slice(4); return h.toLowerCase(); }
|
|
catch{ return url.toLowerCase().replace(/^https?:\/\//,'').split('/')[0]; }
|
|
}
|
|
|
|
const SKIP_PAT = ['youtube.','youtu.be','reddit.','facebook.','twitter','.com/x.',
|
|
'linkedin.','tiktok.','wikipedia.','pinterest.','instagram.','medium.com',
|
|
'forbes.','nytimes.','amazon.','ebay.','google.','.play.google.',
|
|
'web.archive.org','duckduckgo.','startpage.','gravatar.','cdnjs.cloudflare.',
|
|
'fonts.googleapis.','ajax.googleapis.','github.','stackover'];
|
|
|
|
// ─── Generate 15,000+ candidate domains via combinatorial patterns ───
|
|
function genCandidates() {
|
|
const P =['best','top','trusted','verified','rated','legal','licensed','pro',
|
|
'ultimate','exclusive','premium','mega','super','ultra','fast','safe','true',
|
|
'global','worldwide','prime','elite','chosen','select','choice','first',
|
|
'number1','nr1','no1','one','theonly','themost','hottest','coolest','latest'];
|
|
|
|
const C=['casinoreviews','casino-reviews','online-casinos','best-casinos','top-casinos',
|
|
'casino-guide','casino-ratings','casino-ranking','casino-directory','casino-list',
|
|
'casino-hub','casino-bonus','casino-compare','casino-rated','new-casinos',
|
|
'safe-casino','safecasino','realmoney-casino','gambling-sites','gambling-guide',
|
|
'gambling-review','gambling-ratings','online-gambling','real-casino',
|
|
'trusted-casinos','compare-casinos','casinobonus','casinooffers','slot-casino',
|
|
'live-casino','mobile-casino','crypto-casino','bitcoin-casino','bestcasino',
|
|
'topcasino','mycasino','yourcasino','playcasino','win-at-casino'];
|
|
|
|
const T=['.com','.net','.org','.info','.site','.xyz','.club','.top','.online','.co'];
|
|
const nums=[]; for(let n=1;n<=50;n++) nums.push(n.toString());
|
|
const cands=new Set();
|
|
|
|
// {prefix}-{casino_word}.{tld}
|
|
for(const p of P)for(const c of C)for(const t of T){cands.add(`${p}-${c}${t}`);}
|
|
// {prefix}-casino{suffix}.{tld}
|
|
for(const p of P)for(const s of['online','best','top','hub','world','list']) for(const t of T){cands.add(`${p}-casino-${s}${t}`);}
|
|
// numbered lists: n-best-casinoreviews.com etc.
|
|
for(const n of nums)for(const c of C.slice(0,15))for(const t of['.com','.net','.org']){cands.add(`${n}-best-${c}${t}`);cands.add(`top-${n}-${c}${t}`);}
|
|
// simple combos
|
|
for(const t of T){cands.add(`casinoreviews${t}`);cands.add(`online-casinoreviews${t}`);cands.add(`safecasinos${t}`);cands.add(`bestcasinobonus${t}`);}
|
|
|
|
return[...cands].filter(d=>d.length>0&&!d.endsWith('.'));
|
|
}
|
|
|
|
// ─── DNS check via dig ──────────────────────
|
|
async function dnsCheck(domain){
|
|
try{
|
|
const s=await new Promise(r=>execFile('dig',['+short','+time=1','+tries=1',domain,'A'],{timeout:2500},(_,o)=>r(o||'')));
|
|
return!!(s&&s.trim().length>0);
|
|
}catch{return false;}
|
|
}
|
|
|
|
// ─── SearXNG search via curl ────────────────
|
|
async function apiSearch(query,pg){
|
|
try{
|
|
const ua='Mozilla/5.0 (Macintosh;Intel Mac OS X 14_4)AppleWebKit/605.1';
|
|
return await new Promise(r=>{
|
|
const url=`${BASE_URL}/search?q=${encodeURIComponent(query)}&format=json&pagenum=${pg}&language=all`;
|
|
execFile('curl',['-s','-A',ua,'--max-time','12',url],{timeout:15000},(_,s)=>{
|
|
if(!s)return r([]);try{const d=JSON.parse(s);return r(Array.isArray(d.results)?d.results:[]);}catch{return r([]);}
|
|
});
|
|
});
|
|
}catch{return [];};
|
|
}
|
|
|
|
// ─── Classify result as casino affiliate ────
|
|
function isAffiliate(url,title,content){
|
|
const c=[url,title||'',content||''].join(' ').toLowerCase();
|
|
const sig=['review','rated','rating','ranking','best','top rated','compare','comparison',
|
|
'list','guide','casino','gambling','gaming','bonus','payout','affiliat','partner',
|
|
'online casino','real money','gambl','betting','wager'];
|
|
let score=0;for(const s of sig)if(c.includes(s))score++;
|
|
const d=getDomain(url);
|
|
if(d.includes('casino')||d.includes('gambl')||d.includes('bet'))score+=3;
|
|
if(d.includes('review')||d.includes('rate')||d.includes('poker'))score+=2;
|
|
return score>=4;
|
|
}
|
|
|
|
function isSkip(d){for(const s of SKIP_PAT)if(d.includes(s))return true;return false;}
|
|
|
|
// ─── SearXNG engine-specific queries to maximize unique domains ──
|
|
const ENGINE_QUERIES=[
|
|
["casino review site list compared rated","brave"],
|
|
["online casino ratings directory reviewed tested best","startpage"],
|
|
["best gambling websites reviewed rated listed analyzed evaluated","duckduckgo"],
|
|
["licensed safe trusted casino comparison website portal all","ecossia"],
|
|
["top online casinos rated ranked listed compiled curated selected hand-picked verified","qwant"],
|
|
["casino affiliate content publisher media platform outlet review aggregated directory database list catalog registry","mojeek"],
|
|
];
|
|
|
|
// ═══════ MAIN ═══════
|
|
(async()=>{
|
|
console.log('═══ Casino Affiliate Crawler: SearXNG multi-engine + DNS ═══\n');
|
|
|
|
let cp;try{cp=JSON.parse(fs.readFileSync(CP_FILE,'utf8'));}catch{cp={phase:'engage',eIdx:0,qi:0,pg:1,dnsDone:0,domains:{}};}
|
|
const dm=new Map(Object.entries(cp.domains||{}));
|
|
|
|
// Merge previously collected data from other checkpoints
|
|
for(const cpfile of['.mega_crawl.json','.fast_crawl.json','.cp.json']){
|
|
try{const d=JSON.parse(fs.readFileSync(cpfile,'utf8'));
|
|
const list=d.results||d.sites||d.collected||[];
|
|
for(const r of list){if(!r.url)continue;const dd=getDomain(r.url);if(!isSkip(dd)&&!dm.has(dd))dm.set(dd,{url:r.url,title:(r.title||dd).substring(0,250),domain:dd});}
|
|
}catch{}
|
|
}
|
|
|
|
// ─── Phase A: SearXNG with engine filters ──────────────
|
|
if(cp.phase==='engage'&&cp.eIdx<ENGINE_QUERIES.length){
|
|
console.log(`▶ Engine-filtered SearXNG collection\n`);
|
|
|
|
for(let ei=cp.eIdx;ei<ENGINE_QUERIES.length;ei++){
|
|
const[queries_str,engine]=ENGINE_QUERIES[ei];
|
|
console.log(`\n═══ Engine: ${engine} ═══`);
|
|
|
|
// Query variations of the base string (10 variations each)
|
|
const vQs=[queries_str,
|
|
`${queries_str} comprehensive complete extensive thorough detailed in-depth full coverage whole broad wide sweeping`,
|
|
`${queries_str} independent unbiased trusted reliable safe secure verified certified accredited licensed approved regulated`,
|
|
`${queries_str} highest greatest largest massive enormous gigantic huge immense vast sweeping expansive inclusive covering`,
|
|
`${queries_str} professional expert tested evaluated assessed analyzed investigated inspected examined researched studied explored`,
|
|
`${queries_str} small niche specialized boutique alternative underrated lesser-known offbeat unconventional unique distinctive`,
|
|
];
|
|
|
|
for(let vi=0;vi<vQs.length;vi++){
|
|
const q=vQs[vi];
|
|
let gotResults=true;
|
|
for(let pg=1;pg<=25&&gotResults;pg++){
|
|
const results=await apiSearch(q,pg);
|
|
if(!results.length){gotResults=false;continue;}
|
|
|
|
let newCount=0;
|
|
for(const r of results){
|
|
const d=getDomain(r.url);if(isSkip(d)||dm.has(d))continue;
|
|
if(isAffiliate(r.url,r.title||'',(r.content||'').substring(0,400))){
|
|
dm.set(d,{url:r.url,title:(r.title||'').substring(0,250),domain:d});
|
|
newCount++;
|
|
}
|
|
}
|
|
|
|
cp.domains={...Object.fromEntries(dm)};
|
|
cp.eIdx=ei;cp.qi=vi;cp.pg=pg;
|
|
fs.writeFileSync(CP_FILE,JSON.stringify(cp));
|
|
|
|
if(pg<=3||pg%5===0) console.log(` [${engine} v${vi} p${pg}] ${dm.size}`,newCount>0?`(+${newCount})`:'');
|
|
await sleep(1600);
|
|
}
|
|
await sleep(2500);
|
|
}
|
|
|
|
cp.eIdx=ei+1;fs.writeFileSync(CP_FILE,JSON.stringify(cp));
|
|
console.log(`Engine done: ${dm.size}`);
|
|
await sleep(4000);
|
|
}
|
|
|
|
console.log(`\nSearXNG → ${dm.size} domains\n`);
|
|
cp.phase='dna';cp.dnsDone=0;
|
|
}
|
|
|
|
// ─── Phase B: massive DNS enumeration ──────
|
|
if(cp.phase==='dns'||cp.phase==='engine'){
|
|
console.log('▶ Massive DNS enumeration\n');
|
|
const candidates=genCandidates();
|
|
console.log(`Generated ${candidates.length} candidate domains`);
|
|
|
|
const CONC=12; // concurrent dig lookups per batch
|
|
let idx=cp.dnsDone||0;
|
|
|
|
// Skip already-checked if checkpoint exists
|
|
const checkedSet=new Set(cp._dnsChecked?.length>0?new Set(cp._dnsChecked):[]);
|
|
|
|
for(;idx<candidates.length;idx+=CONC){
|
|
const slice=candidates.slice(idx,idx+CONC);
|
|
const batch=slice.filter(d=>!checkedSet.has(d)); // skip already checked
|
|
|
|
const results=await Promise.all(
|
|
batch.map(async d=>{
|
|
checkedSet.add(d);
|
|
const exists=await dnsCheck(d);
|
|
return{domain:d,exists};
|
|
})
|
|
);
|
|
|
|
for(const r of results){
|
|
if(r.exists&&!dm.has(r.domain)){
|
|
dm.set(r.domain,{url:`https://${r.domain}`,title:r.domain,domain:r.domain});
|
|
}
|
|
}
|
|
|
|
idx=idx+batch.length;
|
|
cp.dnsDone=idx;cp._dnsChecked=[...checkedSet];
|
|
|
|
if(idx%500===0||idx===candidates.length){
|
|
fs.writeFileSync(CP_FILE,JSON.stringify(cp));
|
|
console.log(`DNS: ${idx}/${candidates.length} → ${dm.size} total`);
|
|
}
|
|
await sleep(80);
|
|
}
|
|
|
|
console.log(`\nDNS done: ${dm.size} total domains\n`);
|
|
cp.phase='done';fs.writeFileSync(CP_FILE,JSON.stringify(cp));
|
|
}
|
|
|
|
// ─── Write final CSV ──────
|
|
const finalData=[...dm.values()].sort((a,b)=>a.domain.localeCompare(b.domain));
|
|
|
|
const header='url,title,domain';
|
|
const rows=finalData.map(v=>{
|
|
const t=(v.title||'').replace(/"/g,"'");
|
|
return `"${v.url}","${t}","${v.domain}"`;
|
|
});
|
|
|
|
fs.writeFileSync(CSV_OUT,[header,...rows].join('\n'),'utf8');
|
|
console.log(`\n══════ ${finalData.length} sites → ${CSV_OUT} ════`);
|
|
})();
|