Initial commit
This commit is contained in:
@@ -0,0 +1,74 @@
|
||||
const path = require('path');
|
||||
const fs = require('fs');
|
||||
|
||||
var sitesDir = './src/sites';
|
||||
var jsonFiles = fs.readdirSync(sitesDir).filter(f => f.endsWith('.json'));
|
||||
|
||||
console.log('Loaded', jsonFiles.length, 'configs\n');
|
||||
|
||||
// Patterns that indicate junk/parked/dead configs to remove
|
||||
var badPatterns = [
|
||||
// Known parked/for-sale domains from the validation run
|
||||
'best-7-online-casinos.com.json', // Timeout
|
||||
'best-9-online-casinos.com.json', // DNS fail
|
||||
'best-casino-list.com.json', // Timeout
|
||||
'best-casino.xyz.json', // Afternic parked
|
||||
'best-casinos.com.json', // Afternic parked
|
||||
'best-online-casinos.info.json', // Timeout
|
||||
'best.unitestatesonlinecasino.net.json', // Not casino content
|
||||
'casadelalmendro.com.json', // Non-casino, timeout
|
||||
'cabinjohn.org.json', // Not a casino site
|
||||
'delawarepark.betrivers.com.json', // Cloudflare block
|
||||
'exclusive-casino.com.json', // No data extracted
|
||||
];
|
||||
|
||||
// Auto-detect bad configs by checking if URL is clearly not a review/affiliate site
|
||||
for (var i = 0; i < jsonFiles.length; i++) {
|
||||
const fp = path.join(sitesDir, jsonFiles[i]);
|
||||
var data = JSON.parse(fs.readFileSync(fp, 'utf8'));
|
||||
var url = (data.url || '').toLowerCase();
|
||||
|
||||
// Delete if explicitly listed as bad from validation run
|
||||
if (badPatterns.includes(jsonFiles[i])) {
|
||||
try { fs.unlinkSync(fp); console.log('🚫 DELETE:', jsonFiles[i]); }
|
||||
catch(e) { console.warn('Failed to delete', jsonFiles[i]); }
|
||||
continue;
|
||||
}
|
||||
|
||||
// Auto-detect junk: URLs that point to Afternic/Godaddy/Auctions/parked pages
|
||||
if (url.includes('forsale.godaddy') || url.includes('afternic.com/forsale')) {
|
||||
try { fs.unlinkSync(fp); console.log('🚫 PARKED:', jsonFiles[i]); }
|
||||
catch(e) {}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Auto-detect known dead/reserved TLDs that were just parked domains
|
||||
if (url.includes('.xyz') || url.includes('.site')) {
|
||||
try { fs.unlinkSync(fp); console.log('🚫 JUNK TLD:', jsonFiles[i]); }
|
||||
catch(e) {}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Remove generic casino directory sites that have no real content
|
||||
var genericSites = [
|
||||
'best-casino.net',
|
||||
'safe-casino.xyz',
|
||||
'ultimate-casinos.com',
|
||||
'super-online-casinos.net'
|
||||
];
|
||||
|
||||
for (var j = 0; j < genericSites.length; j++) {
|
||||
if (url.includes(genericSites[j])) {
|
||||
try { fs.unlinkSync(fp); console.log('🚫 GENERIC:', jsonFiles[i]); }
|
||||
catch(e) {}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Count remaining
|
||||
const finalFiles = fs.readdirSync(sitesDir).filter(f => f.endsWith('.json'));
|
||||
console.log('\n========== SUMMARY ==========');
|
||||
console.log('✅ Kept:', finalFiles.length, 'valid site configs in src/sites/');
|
||||
console.log('🚫 Deleted:', jsonFiles.length - finalFiles.length, 'junk/parked configs');
|
||||
Reference in New Issue
Block a user