123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164 |
- const express = require('express');
- const path = require('path');
- const Module = require('klapi/base/Module');
- const OneToOne = require('klapi/base/OneToOne');
- const RepositorySystem = require('klapi/systems/RepositorySystem');
- const RoutingSystem = require('klapi/systems/RoutingSystem');
- const Actions = require('klapi/systems/ActionSystem');
- const Crawler = require('./Crawler');
- const util = require('util');
- class CrawlerModule extends Module {
- constructor(){
- super();
- this.Categories = RepositorySystem.create('Categories');
- this.SKU = RepositorySystem.create('SKU');
- this.Products = RepositorySystem.create('Products');
- this.init();
- this.searching = false;
- }
- init() {
- let routes = [
- {
- type: 'custom',
- method: 'get',
- endpoint: '/init',
- customFn: (req,res) => {
- this.search(req)
- .then((data) => {
- res.send(data);
- })
- .catch((e) => {
- res.send(e);
- });
- }
- },
- {
- type: 'custom',
- method: 'get',
- endpoint: '/product/:id',
- customFn: (req,res) => {
- this.updateProduct(req)
- .then((data) => {
- res.send(data);
- })
- .catch((e) => {
- console.log(e);
- res.status(404).send(e);
- });
- }
- }
- ];
- this.router = RoutingSystem.loadRoutes(routes, undefined, '/crawler');
- //console.log(this.router.stack);
- Actions.on('crawlCategory', (data) => this.updateCategories(data));
- Actions.on('crawlSKU', (data) => this.updateSKUs(data));
- Actions.on('crawlProduct', (data) => this.updateProducts(data));
- }
- updateProduct(req) {
- let id = req.params.id;
- return this.SKU.get('id', id)
- .then((items) => {
- if(!items.length)
- throw new Error('No such product');
- let item = items[0];
- let {link, sku} = item;
- return { link, sku };
- })
- .then(({ link, sku }) => Crawler.getCompanies(link, sku))
- }
- updateCategories(cats) {
- // I wrap knex as 'connection'
- let RAW = this.Categories.knex();
- let queries = cats.map((cat) => {
- // console.log(cat);
- let s1 = this.Categories.raw().insert(cat).toString().toString();
- let s2 = this.Categories.raw().update(cat).toString().replace(/^update\s.*\sset\s/i, '');
- let u = util.format('%s ON DUPLICATE KEY UPDATE %s',
- s1,
- s2
- );
- return RAW.raw(u);
- });
- return Promise.all(queries) // Once every query is written
- }
- updateSKUs(skus) {
- // console.log("UPDATING SKUS");
- // console.log(skus);
- // I wrap knex as 'connection'
- let queries = skus.map((sku) => {
- let s1 = this.SKU.raw().insert(sku).toString().toString();
- let s2 = this.SKU.raw().update(sku).toString().replace(/^update\s.*\sset\s/i, '');
- let u = util.format('%s ON DUPLICATE KEY UPDATE %s',
- s1,
- s2
- );
- return this.SKU.knex().raw(u);
- });
- return Promise.all(queries) // Once every query is written
- }
- updateProducts(prods) {
- // I wrap knex as 'connection'
- let skuID = prods[0].sku;
- let names = prods.map(prod => prod.name);
- let queries = prods.map((prod) => {
- // console.log(prod);
- let s1 = this.Products.raw().insert(prod).toString().toString();
- let s2 = this.Products.raw().update(prod).toString().replace(/^update\s.*\sset\s/i, '');
- let u = util.format('%s ON DUPLICATE KEY UPDATE %s',
- s1,
- s2
- );
- return this.Products.knex().raw(u);
- });
- return Promise.all(queries)
- .then(() => {
- console.log("DELETING")
- console.log(skuID, names)
- console.log("END ``` DELETING")
- return this.Products.raw().where('SKU', skuID).whereNotIn('name', names).del();
- }) // Once every query is written
- }
- search() {
- if(this.searching) {
- throw new Error("Already searching");
- }
-
- this.searching = true;
- console.log("Searching...");
- return Crawler.retrieve(1).then((d) => {
- this.searching = false;
- });
- // .then((data) => this.updateCategories(data.categories).then(() => data))
- // .then((data) => this.updateSKUs(data.skus).then(() => data))
- // .then((data) => this.updateProducts(data.products).then(() => data));
- return Crawler.getCompanies('https://www.skroutz.gr/s/19085873/2838-%CE%A0%CE%B5%CF%84%CF%83%CE%AD%CF%84%CE%B1-G-P-C-90%CE%A7170-%CE%98%CE%B1%CE%BB%CE%AC%CF%83%CF%83%CE%B7%CF%82.html')
- .then((skus) => this.updateProducts(skus))
- .catch((r) => console.log("ERRORROROROROROO", r));
- return Crawler.getAllProducts('https://www.skroutz.gr/c/1706/petsetes_thalassis/m/10646/Das-Home.html', 41)
- .then((skus) => this.updateSKUs(skus))
- .catch((r) => console.log("ERRORROROROROROO", r));
- return Crawler.getCategories()
- .then((cats) => this.updateCategories(cats))
- .then(() => console.log("OK"))
- .catch((r) => console.log("ERRORROROROROROO", r));
- }
- }
- module.exports = CrawlerModule;
|