75 lines
2.7 KiB
JavaScript
75 lines
2.7 KiB
JavaScript
const path = require('path');
|
|
const fs = require('fs');
|
|
|
|
var sitesDir = './src/sites';
|
|
var jsonFiles = fs.readdirSync(sitesDir).filter(f => f.endsWith('.json'));
|
|
|
|
console.log('Loaded', jsonFiles.length, 'configs\n');
|
|
|
|
// Patterns that indicate junk/parked/dead configs to remove
|
|
var badPatterns = [
|
|
// Known parked/for-sale domains from the validation run
|
|
'best-7-online-casinos.com.json', // Timeout
|
|
'best-9-online-casinos.com.json', // DNS fail
|
|
'best-casino-list.com.json', // Timeout
|
|
'best-casino.xyz.json', // Afternic parked
|
|
'best-casinos.com.json', // Afternic parked
|
|
'best-online-casinos.info.json', // Timeout
|
|
'best.unitestatesonlinecasino.net.json', // Not casino content
|
|
'casadelalmendro.com.json', // Non-casino, timeout
|
|
'cabinjohn.org.json', // Not a casino site
|
|
'delawarepark.betrivers.com.json', // Cloudflare block
|
|
'exclusive-casino.com.json', // No data extracted
|
|
];
|
|
|
|
// Auto-detect bad configs by checking if URL is clearly not a review/affiliate site
|
|
for (var i = 0; i < jsonFiles.length; i++) {
|
|
const fp = path.join(sitesDir, jsonFiles[i]);
|
|
var data = JSON.parse(fs.readFileSync(fp, 'utf8'));
|
|
var url = (data.url || '').toLowerCase();
|
|
|
|
// Delete if explicitly listed as bad from validation run
|
|
if (badPatterns.includes(jsonFiles[i])) {
|
|
try { fs.unlinkSync(fp); console.log('🚫 DELETE:', jsonFiles[i]); }
|
|
catch(e) { console.warn('Failed to delete', jsonFiles[i]); }
|
|
continue;
|
|
}
|
|
|
|
// Auto-detect junk: URLs that point to Afternic/Godaddy/Auctions/parked pages
|
|
if (url.includes('forsale.godaddy') || url.includes('afternic.com/forsale')) {
|
|
try { fs.unlinkSync(fp); console.log('🚫 PARKED:', jsonFiles[i]); }
|
|
catch(e) {}
|
|
continue;
|
|
}
|
|
|
|
// Auto-detect known dead/reserved TLDs that were just parked domains
|
|
if (url.includes('.xyz') || url.includes('.site')) {
|
|
try { fs.unlinkSync(fp); console.log('🚫 JUNK TLD:', jsonFiles[i]); }
|
|
catch(e) {}
|
|
continue;
|
|
}
|
|
|
|
// Remove generic casino directory sites that have no real content
|
|
var genericSites = [
|
|
'best-casino.net',
|
|
'safe-casino.xyz',
|
|
'ultimate-casinos.com',
|
|
'super-online-casinos.net'
|
|
];
|
|
|
|
for (var j = 0; j < genericSites.length; j++) {
|
|
if (url.includes(genericSites[j])) {
|
|
try { fs.unlinkSync(fp); console.log('🚫 GENERIC:', jsonFiles[i]); }
|
|
catch(e) {}
|
|
break;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
// Count remaining
|
|
const finalFiles = fs.readdirSync(sitesDir).filter(f => f.endsWith('.json'));
|
|
console.log('\n========== SUMMARY ==========');
|
|
console.log('✅ Kept:', finalFiles.length, 'valid site configs in src/sites/');
|
|
console.log('🚫 Deleted:', jsonFiles.length - finalFiles.length, 'junk/parked configs');
|