93 lines
3.1 KiB
JavaScript
93 lines
3.1 KiB
JavaScript
const { pool } = require('./src/db');
|
|
|
|
(async () => {
|
|
console.log('Cleaning junk entries from casinos table...\n');
|
|
|
|
// Step 1: Delete rows whose name matches exact known junk patterns
|
|
var toDeletePatterns = [
|
|
'', 'menu', 'nav', 'home', 'login', 'signup', 'register', 'account',
|
|
'support', 'help', 'contact us', 'about us', 'terms', 'privacy policy',
|
|
'top online casinos review list',
|
|
'gambleaware', 'casino bonus codes not working',
|
|
'free spins no deposit', 'welcome bonus',
|
|
'crypto casino', 'bitcoin', 'best online casinos'
|
|
];
|
|
|
|
for (const pattern of toDeletePatterns) {
|
|
try {
|
|
const r = await pool.query(
|
|
"DELETE FROM casinos WHERE LOWER(TRIM(casino_name)) = $1",
|
|
[pattern.toLowerCase()]
|
|
);
|
|
if (r.rowCount > 0) {
|
|
console.log(' Deleted', r.rowCount, 'matching:', JSON.stringify(pattern));
|
|
}
|
|
} catch(e) {}
|
|
}
|
|
|
|
// Step 2: Delete rows where name matches LIKE patterns for junk text
|
|
var likePatterns = [
|
|
'%casino bonus codes not working% ',
|
|
'%online casinos & casino sites',
|
|
'dmca%',
|
|
'crypto bonus%',
|
|
'%willkommenspaket%',
|
|
'lizenz und spielerschutz bei exclusive casino',
|
|
'krypto bonus bis%',
|
|
'%deposit match up to%',
|
|
'%bonus und freispiele%',
|
|
'%exclusive bonuses%',
|
|
'%casino reps%',
|
|
'%casino bonus codes%',
|
|
'get bonus%',
|
|
'visit%casino%'
|
|
];
|
|
|
|
for (const pat of likePatterns) {
|
|
try {
|
|
const r = await pool.query(
|
|
"DELETE FROM casinos WHERE LOWER(casino_name) LIKE $1",
|
|
[pat.toLowerCase()]
|
|
);
|
|
if (r.rowCount > 0) {
|
|
console.log(' Deleted', r.rowCount, 'matching:', JSON.stringify(pat));
|
|
}
|
|
} catch(e) {}
|
|
}
|
|
|
|
// Step 3: Delete rows where name looks like phone numbers or prices or is too long
|
|
await pool.query(
|
|
"DELETE FROM casinos WHERE casino_name ~* '^\\d+\\s?\\d+' OR LOWER(casino_name) LIKE '%trustpilot%' OR LENGTH(casino_name) > 100"
|
|
);
|
|
|
|
// Step 4: Delete rows starting with symbols/emoji/special chars (not real brand names)
|
|
await pool.query(
|
|
'DELETE FROM casinos WHERE casino_name ~* \'^[\\+\\-\\>\\.]+\' OR casino_name ~ \'^\\\\s+$\''
|
|
);
|
|
|
|
// Count what remains per crawl
|
|
const countResult = await pool.query(
|
|
"SELECT c.id, c.site_name, COUNT(*) as cnt FROM crawls c LEFT JOIN casinos ca ON ca.crawl_id = c.id GROUP BY c.id, c.site_name ORDER BY c.id DESC LIMIT 30"
|
|
);
|
|
|
|
console.log('\n=== Remaining entries per recent crawl ===');
|
|
for (const row of countResult.rows) {
|
|
const name = row.site_name || '[failed]';
|
|
console.log('Crawl#', String(row.id).padEnd(4), '|', name.padEnd(25), '|', row.cnt, 'entries saved');
|
|
}
|
|
|
|
// Show sample of kept names after cleanup
|
|
const keptNames = await pool.query(
|
|
"SELECT DISTINCT casino_name FROM casinos WHERE LENGTH(TRIM(casino_name)) > 3 ORDER BY casino_name LIMIT 100"
|
|
);
|
|
|
|
console.log('\n=== Sample of valid names remaining in DB ===');
|
|
for (const row of keptNames.rows) {
|
|
const n = row.casino_name;
|
|
if (!n || !/[a-zA-Z]/.test(n[0])) continue; // skip remaining junk
|
|
console.log(' ', n);
|
|
}
|
|
|
|
await pool.end();
|
|
})();
|