const cheerio = require('cheerio'); const request = require('request'); const rp = require('request-promise'); const fs = require('fs'); const Actions = require('klapi/systems/ActionSystem'); // const rp = rpi.defaults({'proxy': "http://anonymous:anonymous@93.109.241.246:54325"}) const BASE_URL = "https://www.skroutz.gr"; const SHOP_URL = "/m.DasHome.10646.html"; const queue = []; var _timer = null; function exec () { if(!queue.length) { _timer = null; return; } var i = queue.shift(); console.log("Performing Query", i[2].uri); rp(i[2]).then((d) => i[0](d)).catch((e) => i[1](e)); setTimeout(exec, 4000 + (2000* Math.random())); } function httpsGet(uri) { let options = { uri, tunnel: false, // proxy: "http://93.109.241.246:54325", tunnel:false, headers: { 'user-agent': 'Chrome/74.0.3729.169' }, transform: (body) => cheerio.load(body) }; // return rp(options); return new Promise((res,rej) => { queue.push([res, rej, options]); if(!_timer) { _timer = setTimeout(exec, 100); } }); } class Crawler { constructor() { this.baseURL = BASE_URL; } getCategories(limit = 9999) { return httpsGet(this.baseURL + SHOP_URL) .then(($) => { let categories = $(".super-categories .card:not(.manufacturer-card) h2 a").toArray(); let quantity = $(".super-categories .card:not(.manufacturer-card) h2 a span").toArray(); let links = $(".super-categories .card:not(.manufacturer-card) h2 a").toArray(); categories = categories.map((i) => $(i).attr('title')); quantity = quantity.map((i) => $(i).text().replace('(','').replace(')','')); //remove parenthesis links = links.map((i) => $(i).attr('href')); let objects = categories.slice(0,limit).map((item, index) => { let obj = { name: item, quantity: parseInt(quantity[index]), link: this.baseURL + links[index] }; return obj; }); Actions.emit('crawlCategory', objects); return objects; }) .catch((e) => console.log("ERROR:::::",e)); } getProducts(url, category) { return httpsGet(url) .then(($) => { let cards = $("ol#sku-list li").toArray().map((item) => $(item).data('skuid')); const anchors = $("ol#sku-list li div.price a.js-sku-link").toArray(); let links = anchors.map((item) => $(item).attr('href')); let prices = anchors.map((item) => $(item).text()); let titles = anchors.map((item) => $(item).attr('title')); let result = []; for (let i = 0; i < cards.length; i ++) { if(!links[i] || links[i] === "undefined")continue; result.push({ link : this.baseURL + links[i], sku : cards[i], price: parseFloat(prices[i]), title: titles[i], category }); } Actions.emit('crawlSKU', result); return result; }) .catch((er) => console.log("ERROR PRODUCT GET : " , er)); } getAllProducts(url, total, name) { // console.log(url); if(url === undefined) return Promise.resolve([]); let pages = Math.ceil(total/48); let res = []; for(let j=1;j<=pages;j++) { res.push( this.getProducts(url + "?page=" + j, name) ) } return Promise.all(res).then((values) => { let a = [].concat.apply([], values); return a; }) .catch((e) => console.log("ALL PRODUCTS ERROR:::::", url)); } getCompanies(url, sku) { // console.log(url); if(url === undefined) return Promise.resolve([]); return httpsGet(url) .then(($) => { let companies = $("ol#prices li").toArray(); let names = $("ol#prices li .shop .shop-name").toArray().map((i) => $(i).text()); let prices = $("ol#prices li .price a.product-link").toArray().map((i) => parseFloat($(i).text().replace('€', '').replace(',','.'))); let res = []; for (let i=0;i { result.categories = categories; return Promise.all(categories.map((cat) => { // console.log("CATEGORY",cat); return this.getAllProducts(cat.link, cat.quantity, cat.name) })).then((values) => { let a = [].concat.apply([], values); return a; }); }) .then((skus) => { result.skus = skus; return Promise.all(skus.map((sku) => this.getCompanies(sku.link, sku.sku))).then((values) => { let a = [].concat.apply([], values); return a; }) }) .then((companies) => { result.companies = companies; return result; }) } } // let crawler = new Crawler(); // crawler.retrieve().then((data) => { // fs.writeFileSync("data", JSON.stringify(data)); // console.log("SUCCESS123123"); // console.log(data); // console.log("SUCCESS123123"); // }); //crawler.getCategories(); // crawler.getAllProducts(BASE_URL + '/c/2729/anostromata/m/10646/Das-Home.html', 49); // crawler.getCompanies('/s/12890875/Das-Home-%CE%A3%CE%B5%CF%84-Ninjago-Lego-5008-160x260.html'); module.exports = new Crawler();