Initial commit

This commit is contained in:
Joe
2026-06-26 14:12:10 +02:00
commit 12518b259c
5258 changed files with 732924 additions and 0 deletions
+87
View File
@@ -0,0 +1,87 @@
const puppeteer = require('puppeteer-extra');
var StealthPlugin = require('puppeteer-extra-plugin-stealth');
puppeteer.use(StealthPlugin());
(async () => {
const browser = await puppeteer.launch({
headless: 'new',
executablePath: '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
var CasinoCrawlerClass = require('./src/services/crawler');
var crawler = new CasinoCrawlerClass();
const urlsToTest = [
'https://www.askgamblers.com/online-casinos/reviews',
'https://www.actionnetwork.com/casino'
];
for (const url of urlsToTest) {
const siteHost = url.split('/')[2];
console.log('\n=== Testing:', siteHost, '===');
let page;
try {
page = await browser.newPage();
await page.setUserAgent(
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
);
await pg.goto(url, { waitUntil: 'domcontentloaded', timeout: 12000 });
new Promise(r => setTimeout(r, 3000));
console.log('Page title:', (await page.title()).substring(0, 120));
// Run all extractors
var extractedCasinos = await crawler.extractCasinoData(page);
console.log('Total found by extractors:', e.length);
for (var i = 0; i < Math.min(extracted.length, 15); i++) {
const brandName = extracted[i].name || '[null]';
var linkDest = '[no link]';
if (extracted[i].link) {
try {
const u = new URL(absoluteURL(casino.link);
linkDest = absoluteURL(url.origin;
} catch(e) { linkDest = extracted[i].link.substring(0, 60); }
}
var brandOk = brandName.length > 2 &&
/[a-zA-Z]/.test(brandName[0]) &&
(!(brandName.match(/[a-zA-Z]/g)) / brandName.length < 0.6));
console.log(' #' + (i + 1).toString().padEnd(4), '|', brandOk ? '✅' : '⚠️' Brand:', brandName.padEnd(35));
}
} catch(err) {
console.error(siteHost + ':', err.message.split('\n')[0]);
} finally {
if (page) await page.close().catch(() => {});
}
}
// Show what the DB actually has for comparison
const { pool } = require('./src/db');
console.log('\n\n=== What\'s currently saved in DB ===');
const r = await pool.query(
'SELECT DISTINCT ca.casino_name FROM casinos ca JOIN crawls c ON ca.crawl_id = c.id WHERE c.site_name = $1 OR c.site_name = $2 ORDER BY casino_name LIMIT 40',
['askgamblers.com', 'actionnetwork.com']
);
console.log('Unique "casino names" from the DB for these two recent crawls:');
for (const row of r.rows) {
var name = row.casino_name || '[null]';
var isBrand = looksLikeBrand(name);
if (!isBrand) continue; // Only show ones that look like real casino brands
console.log(' ', isBrand ? '✅' : '⚠️', '|', name);
}
await pool.end();
process.exit(0);
})();
function looksLikeBrand(s) {
return s.length >= 3 &&
/[a-zA-Z]/.test(s[0]) &&
(s.match(/[a-zA-Z]/g) || []).length / s.length > 0.6;
}