const { pool } = require('./src/db'); async function run() { // Build each bad pattern one at a time with proper quoting to avoid Write tool mangling quotes var p1 = '%best online casino%'; var p2 = '%icon%'; var p3 = '%deposit match up to%'; var p4 = '%exclusive bonus%'; var p5 = 'best welcome bonus'; console.log('Step 1 - long page titles/headers'); await pool.query("DELETE FROM casinos WHERE LOWER(TRIM(casino_name)) LIKE $1 AND LENGTH(TRIM(casino_name)) > 15", [p1]); console.log('Step 2 - icon SVG text scraped as brand names'); await pool.query("DELETE FROM casinos WHERE LOWER(TRIM(casino_name)) LIKE $1", [p2]); console.log('Step 3 - pricing/promotional bonus text'); await pool.query("DELETE FROM casinos WHERE LOWER(TRIM(casino_name)) LIKE $1 OR LOWER(TRIM(casino_name)) LIKE $2", [p3, p4]); console.log('Step 4 - specific known junk strings from DB sample'); await pool.query("DELETE FROM casinos WHERE LOWER(TRIM(casino_name)) = $1", [p5]); console.log('Step 5 - single emoji/symbol entries and paragraphs that are NEVER brands'); await pool.query("DELETE FROM casinos WHERE LENGTH(TRIM(casino_name)) < 3 OR LENGTH(TRIM(casino_name)) > 40"); // Step 6: Things like "Best Paying Casinos" content section headers var payingPat = '%paying casinos%'; await pool.query("DELETE FROM casinos WHERE LOWER(TRIM(casino_name)) LIKE $1", [payingPat]); // Count what survived by crawl var countResult = await pool.query( "SELECT c.id, c.site_name, COUNT(ca.casino_name) as cnt FROM crawls c JOIN casinos ca ON ca.crawl_id = c.id GROUP BY c.id, c.site_name ORDER BY c.id DESC LIMIT 30" ); console.log('\n=== Entries per recent crawl ==='); for (const row of countResult.rows) { var name = row.site_name || '[failed]'; console.log('Crawl#', String(row.id).padEnd(4), '|', name.padEnd(25), '|', row.cnt, 'entries'