function fetchProducts() { // First, make a request to idealo.fr and check for bot detection const idealoHomeUrl = 'https://www.idealo.fr/'; try { const homePageOptions = { 'method': 'get', 'headers': getHeaders(), 'muteHttpExceptions': true, }; const homePageResponse = UrlFetchApp.fetch(idealoHomeUrl, homePageOptions); const homePageHtml = homePageResponse.getContentText(); if (!homePageHtml.includes('')) { Logger.log('Bot detected, exiting'); return; // Exit the function early } Logger.log('Bot check passed, proceeding with product fetching'); } catch (error) { Logger.log('Error fetching idealo.fr home page: ' + error); return; // Exit the function if there's an error } // Fetch categories to try const categories = fetchCategoriesToTry(); if (!categories || categories.length === 0) { Logger.log('No categories to process. Exiting...'); return; } for (const category of categories) { Logger.log(`Processing category: ${category.idealoId}`); const baseUrl = 'https://www.idealo.fr/csr/api/v2/modules/searchResult'; const params = { idealoId: category.idealoId, locale: 'fr_FR', priceFrom: 0, priceTo: 1000000 }; let pageIndex = 0; let hasMoreItems = true; let totalItems = 0; let allProductsInCategory = []; Logger.log('Starting to fetch products...'); while (hasMoreItems) { const url = `${baseUrl}?categoryId=${params.idealoId}&locale=${params.locale}&pageIndex=${pageIndex}&priceFrom=${params.priceFrom}&priceTo=${params.priceTo}&itemStates=EXCLUDE_USED`; try { const options = { 'method': 'get', 'headers': getHeaders(), 'muteHttpExceptions': true, }; const response = UrlFetchApp.fetch(url, options); const responseCode = response.getResponseCode(); if (responseCode !== 200) { Logger.log('Error: Non-200 response code. Stopping execution.'); break; } const jsonResponse = JSON.parse(response.getContentText()); const items = jsonResponse.items; if (items.length === 0) { hasMoreItems = false; Logger.log('No more items. Stopping execution.'); break; } allProductsInCategory = allProductsInCategory.concat(processItems(items)); totalItems += items.length; pageIndex++; Logger.log('Moving to next page. Total items processed so far: ' + totalItems); } catch (error) { Logger.log('Error fetching data: ' + error); hasMoreItems = false; } } Logger.log(`Finished fetching products for category ${category.id}. Total items processed: ${totalItems}`); updatePrices(allProductsInCategory); updateCategoryCheckedAt(category.idealoId); } } function fetchCategoriesToTry() { try { const response = UrlFetchApp.fetch('http://164.132.203.174:3003/categories/to-try-check', { method: "GET", muteHttpExceptions: true, }); if (response.getResponseCode() !== 200) { Logger.log('Failed to fetch categories. Status code:', response.getResponseCode()); return null; } return JSON.parse(response.getContentText()); } catch (error) { Logger.log('Error fetching categories:', error); return null; } } function updateCategoryCheckedAt(idealoId) { try { const response = UrlFetchApp.fetch(`http://164.132.203.174:3003/categories/checked?idealoId=${encodeURIComponent(idealoId)}`, { method: "PUT", headers: { "Content-Type": "application/json" }, payload: JSON.stringify({}), muteHttpExceptions: false, }); if (response.getResponseCode() !== 200) { Logger.log(`Failed to update checkedAt for category ${idealoId}. Status code:`, response.getResponseCode()); } else { Logger.log(`Successfully updated checkedAt for category ${idealoId}`); } } catch (error) { Logger.log(`Error updating checkedAt for category ${idealoId}:`, error); } } function processItems(items) { return items .filter(item => item.offerInfo.offerCount >= 3) .map(item => { const formattedPrice = item.offerInfo ? item.offerInfo.formattedPrice : 'N/A'; const decimalPrice = parsePrice(formattedPrice); const url = buildProductUrl(item.id, item.title); return { url: url, price: decimalPrice }; }); } function buildProductUrl(mainProductId, title) { const slugifiedTitle = slugify(title); return `https://www.idealo.fr/prix/${mainProductId}/${slugifiedTitle}.html`; } function slugify(text) { return text .toString() .toLowerCase() .trim() .replace(/\s+/g, '-') .replace(/[^\w\-]+/g, '') .replace(/\-\-+/g, '-'); } function updatePrices(products) { const BATCH_SIZE = 500; for (let i = 0; i < products.length; i += BATCH_SIZE) { const productBatch = products.slice(i, i + BATCH_SIZE); try { const response = UrlFetchApp.fetch('http://164.132.203.174:3003/products/prices', { method: 'POST', headers: { 'Content-Type': 'application/json', }, payload: JSON.stringify({ products: productBatch }), muteHttpExceptions: false, }); if (response.getResponseCode() !== 200) { Logger.log('Failed to send product prices. Status code:', response.getResponseCode()); } else { Logger.log(`Successfully updated prices for ${productBatch.length} products`); } } catch (error) { Logger.log('Error sending product prices:', error); } } } function parsePrice(formattedPrice) { if (formattedPrice === 'N/A') return null; // Remove the currency symbol, remove spaces, and replace comma with dot const priceString = formattedPrice.replace(' €', '').replace(/\s/g, '').replace(',', '.'); return parseFloat(priceString); } function getHeaders() { return { "User-Agent": "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", "Cache-Control": "no-cache", "DNT": "1", "From": "googlebot(at)googlebot.com", "If-Modified-Since": "Mon, 26 Dec 2022 12:00:00 GMT", "Referer": "https://www.google.com/", "TE": "Trailers", "Upgrade-Insecure-Requests": "1" }; } fetchProducts();