Initial commit

This commit is contained in:
Joe
2026-06-26 14:12:10 +02:00
commit 12518b259c
5258 changed files with 732924 additions and 0 deletions
+38
View File
@@ -0,0 +1,38 @@
const { pool } = require('./src/db');
async function run() {
// Build each bad pattern one at a time with proper quoting to avoid Write tool mangling quotes
var p1 = '%best online casino%';
var p2 = '%icon%';
var p3 = '%deposit match up to%';
var p4 = '%exclusive bonus%';
var p5 = 'best welcome bonus';
console.log('Step 1 - long page titles/headers');
await pool.query("DELETE FROM casinos WHERE LOWER(TRIM(casino_name)) LIKE $1 AND LENGTH(TRIM(casino_name)) > 15", [p1]);
console.log('Step 2 - icon SVG text scraped as brand names');
await pool.query("DELETE FROM casinos WHERE LOWER(TRIM(casino_name)) LIKE $1", [p2]);
console.log('Step 3 - pricing/promotional bonus text');
await pool.query("DELETE FROM casinos WHERE LOWER(TRIM(casino_name)) LIKE $1 OR LOWER(TRIM(casino_name)) LIKE $2", [p3, p4]);
console.log('Step 4 - specific known junk strings from DB sample');
await pool.query("DELETE FROM casinos WHERE LOWER(TRIM(casino_name)) = $1", [p5]);
console.log('Step 5 - single emoji/symbol entries and paragraphs that are NEVER brands');
await pool.query("DELETE FROM casinos WHERE LENGTH(TRIM(casino_name)) < 3 OR LENGTH(TRIM(casino_name)) > 40");
// Step 6: Things like "Best Paying Casinos" content section headers
var payingPat = '%paying casinos%';
await pool.query("DELETE FROM casinos WHERE LOWER(TRIM(casino_name)) LIKE $1", [payingPat]);
// Count what survived by crawl
var countResult = await pool.query(
"SELECT c.id, c.site_name, COUNT(ca.casino_name) as cnt FROM crawls c JOIN casinos ca ON ca.crawl_id = c.id GROUP BY c.id, c.site_name ORDER BY c.id DESC LIMIT 30"
);
console.log('\n=== Entries per recent crawl ===');
for (const row of countResult.rows) {
var name = row.site_name || '[failed]';
console.log('Crawl#', String(row.id).padEnd(4), '|', name.padEnd(25), '|', row.cnt, 'entries'