Files
crawler/db-cleanup-4.js
T
2026-06-26 14:30:45 +02:00

85 lines
4.3 KiB
JavaScript

const { pool } = require('./src/db');
(async () => {
console.log('=== Aggressive final cleanup pass ===\n');
var stillJunkPatterns = [
'about us', 'achievements', 'ai.com', 'american football',
'antislapp laws australia', 'arsenal', 'author avatar', 'author', 'avatar photo', 'background',
'bc.math.msu.su', 'best online casino', 'bookie betting sites', 'bonus icons',
'casino bonus codes not working (tried everything)', 'cloud data engineering & ai solutions',
'contact us', 'deposit methods', 'dmca protected', 'dmca.com protection status',
'dmca.com protection', 'european patentpending modular indoor', 'estates.com',
'fast and easy transfers', 'fb.com', 'frictionless affordability checks',
'full t&c apply, 18+', 'free bonuses free spins free giveaways or anything related to gambling',
'games reviews sites in the uk for real money', 'get bonus →', 'healthinsurance.com',
'how we rank online casinos', 'how we help you choose the right gambling site',
'icon-chevron-down-white', 'immediate payment', 'infingame has published operational insights on player engagement across sweepstakes platforms',
'it can pay big to find slots that have win multipliers',
'join one of the best online casinos in the world', 'kryto bonus bis 3.000 plus 133 freispiele',
'licensed-casino.com', 'licenz und spielerschutz bei exclusive casino',
'lucky rebel', 'malta gaming authority', 'menu-img-games.png', 'mt.svg',
'online gambling for me', 'our guide on online slot rtps read review play now',
'paysafecard casinos', 'paytm deposit', 'play responsibily', 'porn.com',
'quality casinos', 'quick and easy deposits', 'rakebit', 'read more about how we rate gambling operators',
'safe and secure transactions', 'secure & trusted gambling sites for real money in the usa',
'see more details', 'sex.com', 'sidepot', 'sign up today to play at any of our recommended gambling sites!',
'sitemap', 'sportsbetting', 'stripe.press.poor charlies almanack book',
'the biggest name in slot machines: pragmatic play',
'the conversational ai orchestration leader in latin america',
'the esports integrity commission (esic) and moonton games have introduced a mandatory',
'this website is for sale', 'time to play guide', 't&c apply, 18+',
'top cryptocurrency casinos in canada for crypto play in 2026',
'trusted & safe gambling sites for real money in 2026',
'up to £4500 plus up to 1000 free spins', 'us online gambling guide overview',
'verified by dmca.com', 'video slots com casino review videoslots.com free spins bonus no deposit required uk players',
'view top casinos on the right', 'voice.com z.com rocket.com nfts.com porn.com'
];
for (const pat of stillJunkPatterns) {
try {
var p = await pool.query("DELETE FROM casinos WHERE LOWER(TRIM(casino_name)) ILIKE $1", [pat.toLowerCase()]);
if (p.rowCount > 0) console.log('Deleted', p.rowCount, 'matching:', JSON.stringify(pat.substring(0, 50)));
} catch(e) {}
}
await pool.query("DELETE FROM casinos WHERE casino_name ~* '\\bhttps?://'", []);
console.log('\nAfter aggressive cleanup, entries remaining in DB:');
var s = await pool.query('SELECT COUNT(*) FROM casinos;');
console.log('Total:', s.rows[0].count);
const samples = await pool.query(
"SELECT DISTINCT casino_name FROM casinos WHERE LENGTH(TRIM(casino_name)) BETWEEN 3 AND 40 ORDER BY casino_name LIMIT 60"
);
console.log('\n=== Sample of surviving entries ===\n');
var goodN = 0, junkN = 0;
for (const row of samples.rows) {
var name = row.casino_name || '';
if (!name.match(/^[a-zA-Z]/)) continue;
var stillJunk =
name.length < 3 || name.length > 40 ||
!/[a-zA-Z]/.test(name[0]) ||
(name.match(/[a-zA-Z]/g) || []).length / name.length < 0.5;
if (stillJunk) { console.log('❌', JSON.stringify(name, "utf8")); junkN++; }
else {
var ok =
!/(about|home|menu|nav|terms|privacy|contact|help)/i.test(name) &&
!(name.length < 5 && /^[a-z]+$/i.test(name))
;
if (!ok) { console.log('❌', JSON.stringify(name)); junkN++; }
else { console.log('✅', name.replace(/[^a-zA-Z0-9\s&]/g, '').trim()); goodN++; }
}
}
console.log('\nGood: ' + goodN + ', Still bad: ' + junkN);
await pool.end();
})();