const express = require('express'); const path = require('path'); const Module = require('klapi/base/Module'); const OneToOne = require('klapi/base/OneToOne'); const RepositorySystem = require('klapi/systems/RepositorySystem'); const RoutingSystem = require('klapi/systems/RoutingSystem'); const Actions = require('klapi/systems/ActionSystem'); const Crawler = require('./Crawler'); const util = require('util'); class CrawlerModule extends Module { constructor(){ super(); this.Categories = RepositorySystem.create('Categories'); this.SKU = RepositorySystem.create('SKU'); this.Products = RepositorySystem.create('Products'); this.init(); this.searching = false; } init() { let routes = [ { type: 'custom', method: 'get', endpoint: '/init', customFn: (req,res) => { this.search(req) .then((data) => { res.send(data); }) .catch((e) => { res.send(e); }); } }, { type: 'custom', method: 'get', endpoint: '/product/:id', customFn: (req,res) => { this.updateProduct(req) .then((data) => { res.send(data); }) .catch((e) => { console.log(e); res.status(404).send(e); }); } } ]; this.router = RoutingSystem.loadRoutes(routes, undefined, '/crawler'); //console.log(this.router.stack); Actions.on('crawlCategory', (data) => this.updateCategories(data)); Actions.on('crawlSKU', (data) => this.updateSKUs(data)); Actions.on('crawlProduct', (data) => this.updateProducts(data)); } updateProduct(req) { let id = req.params.id; return this.SKU.get('id', id) .then((items) => { if(!items.length) throw new Error('No such product'); let item = items[0]; let {link, sku} = item; return { link, sku }; }) .then(({ link, sku }) => Crawler.getCompanies(link, sku)) } updateCategories(cats) { // I wrap knex as 'connection' let RAW = this.Categories.knex(); let queries = cats.map((cat) => { // console.log(cat); let s1 = this.Categories.raw().insert(cat).toString().toString(); let s2 = this.Categories.raw().update(cat).toString().replace(/^update\s.*\sset\s/i, ''); let u = util.format('%s ON DUPLICATE KEY UPDATE %s', s1, s2 ); return RAW.raw(u); }); return Promise.all(queries) // Once every query is written } updateSKUs(skus) { // console.log("UPDATING SKUS"); // console.log(skus); // I wrap knex as 'connection' let queries = skus.map((sku) => { let s1 = this.SKU.raw().insert(sku).toString().toString(); let s2 = this.SKU.raw().update(sku).toString().replace(/^update\s.*\sset\s/i, ''); let u = util.format('%s ON DUPLICATE KEY UPDATE %s', s1, s2 ); return this.SKU.knex().raw(u); }); return Promise.all(queries) // Once every query is written } updateProducts(prods) { // I wrap knex as 'connection' let skuID = prods[0].sku; let names = prods.map(prod => prod.name); let queries = prods.map((prod) => { // console.log(prod); let s1 = this.Products.raw().insert(prod).toString().toString(); let s2 = this.Products.raw().update(prod).toString().replace(/^update\s.*\sset\s/i, ''); let u = util.format('%s ON DUPLICATE KEY UPDATE %s', s1, s2 ); return this.Products.knex().raw(u); }); return Promise.all(queries) .then(() => { console.log("DELETING") console.log(skuID, names) console.log("END ``` DELETING") return this.Products.raw().where('SKU', skuID).whereNotIn('name', names).del(); }) // Once every query is written } search() { if(this.searching) { throw new Error("Already searching"); } this.searching = true; console.log("Searching..."); return Crawler.retrieve(1).then((d) => { this.searching = false; }); // .then((data) => this.updateCategories(data.categories).then(() => data)) // .then((data) => this.updateSKUs(data.skus).then(() => data)) // .then((data) => this.updateProducts(data.products).then(() => data)); return Crawler.getCompanies('https://www.skroutz.gr/s/19085873/2838-%CE%A0%CE%B5%CF%84%CF%83%CE%AD%CF%84%CE%B1-G-P-C-90%CE%A7170-%CE%98%CE%B1%CE%BB%CE%AC%CF%83%CF%83%CE%B7%CF%82.html') .then((skus) => this.updateProducts(skus)) .catch((r) => console.log("ERRORROROROROROO", r)); return Crawler.getAllProducts('https://www.skroutz.gr/c/1706/petsetes_thalassis/m/10646/Das-Home.html', 41) .then((skus) => this.updateSKUs(skus)) .catch((r) => console.log("ERRORROROROROROO", r)); return Crawler.getCategories() .then((cats) => this.updateCategories(cats)) .then(() => console.log("OK")) .catch((r) => console.log("ERRORROROROROROO", r)); } } module.exports = CrawlerModule;