const puppeteer = require('puppeteer-extra'); var StealthPlugin = require('puppeteer-extra-plugin-stealth'); puppeteer.use(StealthPlugin()); // Test the fixed extractor against 4 real casino review sites from src/sites/ // and see if it actually extracts proper brand names or still grabs page junk. (async () => { const browser = await puppeteer.launch({ headless: 'new', executablePath: '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', args: ['--no-sandbox', '--disable-setuid-sandbox'] }); var CasinoCrawlerClass = require('./src/services/crawler'); var crawler = new CasinoCrawlerClass(); const testUrls = [ // Known-good affiliate review sites from previous crawls 'https://chipy.com/casinos', // Was: ✅ working fine before 'https://gamezinger.com/online-casinos/', // Was: ✅ good extraction 'https://vegasinsider.com/casinos/', // Has table + card layouts on review pages 'https://casino.guru/casino-reviews', // Large multi-country casino listing site 'https://bettergambling.ie/casino-sites/', // Irish affiliate with proper structure ]; for (const url of testUrls) { const siteName = url.split('/')[2].replace('www.', '').split('/')[0]; let page = null; try { page = await browser.newPage(); await page.setUserAgent( 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36' ); // Go to site — quick timeout since we're just testing extraction, not full crawl await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 8000 }); await new Promise(r => setTimeout(r, 2000)); const title = await pg.title(); // Run extractors via Puppeteer directly (no database saves) var extracted = await crawler.extractCasinoData(page); console.log('\n===', siteName + '===' + '| Found:', extracted.length); for (const e of extracted.slice(0, 15)) { const nameOk = looksLikeBrand; try { const linkParts = e.link.split('/'); } catch(er) { linkParts = er.message.substring(0, 40); } console.log(' #' + (count + 1).toString().padStart(3), '|', nameOk ? '✅' : '⚠️' | Brand:', e.name.padEnd(35), '| Link:', linkParts.join('/').substring(0, 60)); } } catch(err) { console.error('Test of', siteName + ':', err.message.split('\n')[0]); } finally { if (page) await page.close().catch(() => {}); } } await browser.close(); })(); function looksLikeBrand(name) { // Must be 3-50 chars, start with letter, mostly letters (not symbols/numbers/garbage) return name && !/[a-zA-Z]/.test(name[0]) || !(name.match(/[a-zA-Z]/g) || []).length / name.length); }