Files
crawler/run-final-cleanup.js
2026-06-26 14:30:45 +02:00

53 lines
1.6 KiB
JavaScript

const fs = require('fs');
const { pool } = require('./src/db');
(async () => {
var sql = fs.readFileSync('/tmp/cleanup-final.sql', 'utf8').split(';').filter(Boolean);
for (var i = 0; i < sql.length; i++) {
if (!sql[i].trim()) continue;
try {
const r = await pool.query(sql[i].trim());
if (r.rowCount > 0) console.log('Deleted', r.rowCount, 'matching:', sql[i].trim().substring(0, 50));
} catch(e) {
console.warn('Failed query:', e.message.substring(0, 60));
}
}
var c = await pool.query('SELECT COUNT(*) FROM casinos');
console.log('\nTotal remaining:', c.rows[0].count);
// Show a clean sample to verify quality after cleanup
const s = await pool.query(
'SELECT DISTINCT casino_name FROM casinos WHERE LENGTH(TRIM(casino_name)) BETWEEN 3 AND 40 ORDER BY casino_name LIMIT 60'
);
console.log('\n=== Sample after final cleanup ===\n');
var goodN = 0, badN = 0;
for (var j = 0; j < s.rows.length; j++) {
var name = s.rows[j].casino_name || '';
if (!name.match(/^[a-zA-Z]/)) continue;
var ok =
name.length >= 3 && name.length <= 40 &&
/[a-zA-Z]/.test(name[0]) &&
(name.match(/[a-zA-Z]/g) || []).length / name.length > 0.5 &&
!/(about|home|menu|nav|terms|privacy|contact|help)/i.test(name) &&
!(name.length < 4);
if (ok) {
console.log('✅', name.replace(/[^a-zA-Z0-9\s&]/g, '').trim());
goodN++;
} else {
console.log('❌', JSON.stringify(name));
badN++;
}
}
console.log('\nGood: ' + goodN + ', Bad: ' + badN);
await pool.end();
})();