Initial commit
This commit is contained in:
@@ -0,0 +1,84 @@
|
||||
const { pool } = require('./src/db');
|
||||
|
||||
(async () => {
|
||||
console.log('=== Aggressive final cleanup pass ===\n');
|
||||
|
||||
var stillJunkPatterns = [
|
||||
'about us', 'achievements', 'ai.com', 'american football',
|
||||
'antislapp laws australia', 'arsenal', 'author avatar', 'author', 'avatar photo', 'background',
|
||||
'bc.math.msu.su', 'best online casino', 'bookie betting sites', 'bonus icons',
|
||||
'casino bonus codes not working (tried everything)', 'cloud data engineering & ai solutions',
|
||||
'contact us', 'deposit methods', 'dmca protected', 'dmca.com protection status',
|
||||
'dmca.com protection', 'european patentpending modular indoor', 'estates.com',
|
||||
'fast and easy transfers', 'fb.com', 'frictionless affordability checks',
|
||||
'full t&c apply, 18+', 'free bonuses free spins free giveaways or anything related to gambling',
|
||||
'games reviews sites in the uk for real money', 'get bonus →', 'healthinsurance.com',
|
||||
'how we rank online casinos', 'how we help you choose the right gambling site',
|
||||
'icon-chevron-down-white', 'immediate payment', 'infingame has published operational insights on player engagement across sweepstakes platforms',
|
||||
'it can pay big to find slots that have win multipliers',
|
||||
'join one of the best online casinos in the world', 'kryto bonus bis 3.000 plus 133 freispiele',
|
||||
'licensed-casino.com', 'licenz und spielerschutz bei exclusive casino',
|
||||
'lucky rebel', 'malta gaming authority', 'menu-img-games.png', 'mt.svg',
|
||||
'online gambling for me', 'our guide on online slot rtps read review play now',
|
||||
'paysafecard casinos', 'paytm deposit', 'play responsibily', 'porn.com',
|
||||
'quality casinos', 'quick and easy deposits', 'rakebit', 'read more about how we rate gambling operators',
|
||||
'safe and secure transactions', 'secure & trusted gambling sites for real money in the usa',
|
||||
'see more details', 'sex.com', 'sidepot', 'sign up today to play at any of our recommended gambling sites!',
|
||||
'sitemap', 'sportsbetting', 'stripe.press.poor charlies almanack book',
|
||||
'the biggest name in slot machines: pragmatic play',
|
||||
'the conversational ai orchestration leader in latin america',
|
||||
'the esports integrity commission (esic) and moonton games have introduced a mandatory',
|
||||
'this website is for sale', 'time to play guide', 't&c apply, 18+',
|
||||
'top cryptocurrency casinos in canada for crypto play in 2026',
|
||||
'trusted & safe gambling sites for real money in 2026',
|
||||
'up to £4500 plus up to 1000 free spins', 'us online gambling guide overview',
|
||||
'verified by dmca.com', 'video slots com casino review videoslots.com free spins bonus no deposit required uk players',
|
||||
'view top casinos on the right', 'voice.com z.com rocket.com nfts.com porn.com'
|
||||
];
|
||||
|
||||
for (const pat of stillJunkPatterns) {
|
||||
try {
|
||||
var p = await pool.query("DELETE FROM casinos WHERE LOWER(TRIM(casino_name)) ILIKE $1", [pat.toLowerCase()]);
|
||||
if (p.rowCount > 0) console.log('Deleted', p.rowCount, 'matching:', JSON.stringify(pat.substring(0, 50)));
|
||||
} catch(e) {}
|
||||
}
|
||||
|
||||
await pool.query("DELETE FROM casinos WHERE casino_name ~* '\\bhttps?://'", []);
|
||||
|
||||
console.log('\nAfter aggressive cleanup, entries remaining in DB:');
|
||||
var s = await pool.query('SELECT COUNT(*) FROM casinos;');
|
||||
console.log('Total:', s.rows[0].count);
|
||||
|
||||
const samples = await pool.query(
|
||||
"SELECT DISTINCT casino_name FROM casinos WHERE LENGTH(TRIM(casino_name)) BETWEEN 3 AND 40 ORDER BY casino_name LIMIT 60"
|
||||
);
|
||||
|
||||
console.log('\n=== Sample of surviving entries ===\n');
|
||||
|
||||
var goodN = 0, junkN = 0;
|
||||
|
||||
for (const row of samples.rows) {
|
||||
var name = row.casino_name || '';
|
||||
|
||||
if (!name.match(/^[a-zA-Z]/)) continue;
|
||||
|
||||
var stillJunk =
|
||||
name.length < 3 || name.length > 40 ||
|
||||
!/[a-zA-Z]/.test(name[0]) ||
|
||||
(name.match(/[a-zA-Z]/g) || []).length / name.length < 0.5;
|
||||
|
||||
if (stillJunk) { console.log('❌', JSON.stringify(name, "utf8")); junkN++; }
|
||||
else {
|
||||
var ok =
|
||||
!/(about|home|menu|nav|terms|privacy|contact|help)/i.test(name) &&
|
||||
!(name.length < 5 && /^[a-z]+$/i.test(name))
|
||||
;
|
||||
if (!ok) { console.log('❌', JSON.stringify(name)); junkN++; }
|
||||
else { console.log('✅', name.replace(/[^a-zA-Z0-9\s&]/g, '').trim()); goodN++; }
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\nGood: ' + goodN + ', Still bad: ' + junkN);
|
||||
|
||||
await pool.end();
|
||||
})();
|
||||
Reference in New Issue
Block a user