const { pool } = require('./src/db'); (async () => { console.log('Cleaning junk entries from casinos table...\n'); // Step 1: Delete rows whose name matches exact known junk patterns var toDeletePatterns = [ '', 'menu', 'nav', 'home', 'login', 'signup', 'register', 'account', 'support', 'help', 'contact us', 'about us', 'terms', 'privacy policy', 'top online casinos review list', 'gambleaware', 'casino bonus codes not working', 'free spins no deposit', 'welcome bonus', 'crypto casino', 'bitcoin', 'best online casinos' ]; for (const pattern of toDeletePatterns) { try { const r = await pool.query( "DELETE FROM casinos WHERE LOWER(TRIM(casino_name)) = $1", [pattern.toLowerCase()] ); if (r.rowCount > 0) { console.log(' Deleted', r.rowCount, 'matching:', JSON.stringify(pattern)); } } catch(e) {} } // Step 2: Delete rows where name matches LIKE patterns for junk text var likePatterns = [ '%casino bonus codes not working% ', '%online casinos & casino sites', 'dmca%', 'crypto bonus%', '%willkommenspaket%', 'lizenz und spielerschutz bei exclusive casino', 'krypto bonus bis%', '%deposit match up to%', '%bonus und freispiele%', '%exclusive bonuses%', '%casino reps%', '%casino bonus codes%', 'get bonus%', 'visit%casino%' ]; for (const pat of likePatterns) { try { const r = await pool.query( "DELETE FROM casinos WHERE LOWER(casino_name) LIKE $1", [pat.toLowerCase()] ); if (r.rowCount > 0) { console.log(' Deleted', r.rowCount, 'matching:', JSON.stringify(pat)); } } catch(e) {} } // Step 3: Delete rows where name looks like phone numbers or prices or is too long await pool.query( "DELETE FROM casinos WHERE casino_name ~* '^\\d+\\s?\\d+' OR LOWER(casino_name) LIKE '%trustpilot%' OR LENGTH(casino_name) > 100" ); // Step 4: Delete rows starting with symbols/emoji/special chars (not real brand names) await pool.query( 'DELETE FROM casinos WHERE casino_name ~* \'^[\\+\\-\\>\\.]+\' OR casino_name ~ \'^\\\\s+$\'' ); // Count what remains per crawl const countResult = await pool.query( "SELECT c.id, c.site_name, COUNT(*) as cnt FROM crawls c LEFT JOIN casinos ca ON ca.crawl_id = c.id GROUP BY c.id, c.site_name ORDER BY c.id DESC LIMIT 30" ); console.log('\n=== Remaining entries per recent crawl ==='); for (const row of countResult.rows) { const name = row.site_name || '[failed]'; console.log('Crawl#', String(row.id).padEnd(4), '|', name.padEnd(25), '|', row.cnt, 'entries saved'); } // Show sample of kept names after cleanup const keptNames = await pool.query( "SELECT DISTINCT casino_name FROM casinos WHERE LENGTH(TRIM(casino_name)) > 3 ORDER BY casino_name LIMIT 100" ); console.log('\n=== Sample of valid names remaining in DB ==='); for (const row of keptNames.rows) { const n = row.casino_name; if (!n || !/[a-zA-Z]/.test(n[0])) continue; // skip remaining junk console.log(' ', n); } await pool.end(); })();