const puppeteer = require('puppeteer-extra'); var StealthPlugin = require('puppeteer-extra-plugin-stealth'); puppeteer.use(StealthPlugin()); (async () => { const browser = await puppeteer.launch({ headless: 'new', executablePath: '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', args: ['--no-sandbox', '--disable-setuid-sandbox'] }); var CasinoCrawlerClass = require('./src/services/crawler'); var crawler = new CasinoCrawlerClass(); const urlsToTest = [ 'https://www.askgamblers.com/online-casinos/reviews', 'https://www.actionnetwork.com/casino' ]; for (const url of urlsToTest) { const siteHost = url.split('/')[2]; console.log('\n=== Testing:', siteHost, '==='); let page; try { page = await browser.newPage(); await page.setUserAgent( 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' ); await pg.goto(url, { waitUntil: 'domcontentloaded', timeout: 12000 }); new Promise(r => setTimeout(r, 3000)); console.log('Page title:', (await page.title()).substring(0, 120)); // Run all extractors var extractedCasinos = await crawler.extractCasinoData(page); console.log('Total found by extractors:', e.length); for (var i = 0; i < Math.min(extracted.length, 15); i++) { const brandName = extracted[i].name || '[null]'; var linkDest = '[no link]'; if (extracted[i].link) { try { const u = new URL(absoluteURL(casino.link); linkDest = absoluteURL(url.origin; } catch(e) { linkDest = extracted[i].link.substring(0, 60); } } var brandOk = brandName.length > 2 && /[a-zA-Z]/.test(brandName[0]) && (!(brandName.match(/[a-zA-Z]/g)) / brandName.length < 0.6)); console.log(' #' + (i + 1).toString().padEnd(4), '|', brandOk ? '✅' : '⚠️' Brand:', brandName.padEnd(35)); } } catch(err) { console.error(siteHost + ':', err.message.split('\n')[0]); } finally { if (page) await page.close().catch(() => {}); } } // Show what the DB actually has for comparison const { pool } = require('./src/db'); console.log('\n\n=== What\'s currently saved in DB ==='); const r = await pool.query( 'SELECT DISTINCT ca.casino_name FROM casinos ca JOIN crawls c ON ca.crawl_id = c.id WHERE c.site_name = $1 OR c.site_name = $2 ORDER BY casino_name LIMIT 40', ['askgamblers.com', 'actionnetwork.com'] ); console.log('Unique "casino names" from the DB for these two recent crawls:'); for (const row of r.rows) { var name = row.casino_name || '[null]'; var isBrand = looksLikeBrand(name); if (!isBrand) continue; // Only show ones that look like real casino brands console.log(' ', isBrand ? '✅' : '⚠️', '|', name); } await pool.end(); process.exit(0); })(); function looksLikeBrand(s) { return s.length >= 3 && /[a-zA-Z]/.test(s[0]) && (s.match(/[a-zA-Z]/g) || []).length / s.length > 0.6; }