const cheerio = require('cheerio'); const rp = require('request-promise'); const fs = require('fs'); const BASE_URL = "https://www.skroutz.gr"; const SHOP_URL = "/m.DasHome.10646.html"; const queue = []; var _timer = null; function exec () { if(!queue.length) { _timer = null; return; } var i = queue.shift(); console.log("Performing Query", i[2].uri); rp(i[2]).then((d) => i[0](d)).catch((e) => i[1](e)); setTimeout(exec, 1000); } function httpsGet(uri) { let options = { uri, headers: { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36' }, transform: (body) => cheerio.load(body) }; // return rp(options); return new Promise((res,rej) => { queue.push([res, rej, options]); if(!_timer) { _timer = setTimeout(exec, 100); } }); } class Crawler { constructor() { this.baseURL = BASE_URL; } getCategories() { return httpsGet(this.baseURL + SHOP_URL) .then(($) => { let categories = $(".super-categories .card:not(.manufacturer-card) h2 a").toArray(); let quantity = $(".super-categories .card:not(.manufacturer-card) h2 a span").toArray(); let links = $(".super-categories .card:not(.manufacturer-card) h2 a").toArray(); categories = categories.map((i) => $(i).attr('title')); quantity = quantity.map((i) => $(i).text().replace('(','').replace(')','')); //remove parenthesis links = links.map((i) => $(i).attr('href')); let objects = categories.map((item, index) => { return { name: item, quantity: parseInt(quantity[index]), link: this.baseURL + links[index] }; }); return objects; }) .catch((e) => console.log("ERROR:::::",e)); } getProducts(url) { return httpsGet(url) .then(($) => { let cards = $("ol#sku-list li").toArray().map((item) => $(item).data('skuid')); const anchors = $("ol#sku-list li div.price a.js-sku-link").toArray(); let links = anchors.map((item) => $(item).attr('href')); let prices = anchors.map((item) => $(item).text()); let titles = anchors.map((item) => $(item).attr('title')); let result = []; for (let i = 0; i < cards.length; i ++) { result.push({ link : this.baseURL + links[i], sku : cards[i], price: prices[i], title: titles[i] }); } return result; }) .catch((er) => console.log("ERROR PRODUCT GET : " , er)); } getAllProducts(url, total) { console.log(url); let pages = Math.ceil(total/48); let res = []; for(let j=1;j<=pages;j++) { res.push( this.getProducts(url + "?page=" + j) ) } return Promise.all(res).then((values) => { let a = [].concat.apply([], values); return a; }) .catch((e) => console.log("ALL PRODUCTS ERROR:::::", url)); } getCompanies(url) { console.log(url); return httpsGet(url) .then(($) => { let companies = $("ol#prices li").toArray(); let names = $("ol#prices li .shop .shop-name").toArray().map((i) => $(i).text()); let prices = $("ol#prices li .price a.product-link").toArray().map((i) => parseFloat($(i).text().replace('€', '').replace(',','.'))); let res = []; for (let i=0;i { result.categories = categories; return Promise.all(categories.map((cat) => this.getAllProducts(cat.link, cat.quantity))); }) .then((skus) => { result.skus = skus; return Promise.all(skus.map((sku) => this.getCompanies(sku.link))) }) .then((companies) => { result.companies = companies; return result; }) } } let crawler = new Crawler(); crawler.retrieve().then((data) => { fs.writeFileSync("data", JSON.stringify(data)); console.log("SUCCESS123123"); console.log(data); console.log("SUCCESS123123"); }); //crawler.getCategories(); // crawler.getAllProducts('/c/1158/kourtines/m/10646/Das-Home.html', 49); // crawler.getCompanies('/s/12890875/Das-Home-%CE%A3%CE%B5%CF%84-Ninjago-Lego-5008-160x260.html');