index.js 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. const express = require('express');
  2. const path = require('path');
  3. const Module = require('klapi/base/Module');
  4. const OneToOne = require('klapi/base/OneToOne');
  5. const RepositorySystem = require('klapi/systems/RepositorySystem');
  6. const RoutingSystem = require('klapi/systems/RoutingSystem');
  7. const Actions = require('klapi/systems/ActionSystem');
  8. const Crawler = require('./Crawler');
  9. const util = require('util');
  10. class CrawlerModule extends Module {
  11. constructor(){
  12. super();
  13. this.Categories = RepositorySystem.create('Categories');
  14. this.SKU = RepositorySystem.create('SKU');
  15. this.Products = RepositorySystem.create('Products');
  16. this.init();
  17. this.searching = false;
  18. }
  19. init() {
  20. let routes = [
  21. {
  22. type: 'custom',
  23. method: 'get',
  24. endpoint: '/init',
  25. customFn: (req,res) => {
  26. this.search(req)
  27. .then((data) => {
  28. res.send(data);
  29. })
  30. .catch((e) => {
  31. res.send(e);
  32. });
  33. }
  34. },
  35. {
  36. type: 'custom',
  37. method: 'get',
  38. endpoint: '/product/:id',
  39. customFn: (req,res) => {
  40. this.updateProduct(req)
  41. .then((data) => {
  42. res.send(data);
  43. })
  44. .catch((e) => {
  45. console.log(e);
  46. res.status(404).send(e);
  47. });
  48. }
  49. }
  50. ];
  51. this.router = RoutingSystem.loadRoutes(routes, undefined, '/crawler');
  52. //console.log(this.router.stack);
  53. Actions.on('crawlCategory', (data) => this.updateCategories(data));
  54. Actions.on('crawlSKU', (data) => this.updateSKUs(data));
  55. Actions.on('crawlProduct', (data) => this.updateProducts(data));
  56. }
  57. updateProduct(req) {
  58. let id = req.params.id;
  59. return this.SKU.get('id', id)
  60. .then((items) => {
  61. if(!items.length)
  62. throw new Error('No such product');
  63. let item = items[0];
  64. let {link, sku} = item;
  65. return { link, sku };
  66. })
  67. .then(({ link, sku }) => Crawler.getCompanies(link, sku))
  68. }
  69. updateCategories(cats) {
  70. // I wrap knex as 'connection'
  71. let RAW = this.Categories.knex();
  72. let queries = cats.map((cat) => {
  73. // console.log(cat);
  74. let s1 = this.Categories.raw().insert(cat).toString().toString();
  75. let s2 = this.Categories.raw().update(cat).toString().replace(/^update\s.*\sset\s/i, '');
  76. let u = util.format('%s ON DUPLICATE KEY UPDATE %s',
  77. s1,
  78. s2
  79. );
  80. return RAW.raw(u);
  81. });
  82. return Promise.all(queries) // Once every query is written
  83. }
  84. updateSKUs(skus) {
  85. // console.log("UPDATING SKUS");
  86. // console.log(skus);
  87. // I wrap knex as 'connection'
  88. let queries = skus.map((sku) => {
  89. let s1 = this.SKU.raw().insert(sku).toString().toString();
  90. let s2 = this.SKU.raw().update(sku).toString().replace(/^update\s.*\sset\s/i, '');
  91. let u = util.format('%s ON DUPLICATE KEY UPDATE %s',
  92. s1,
  93. s2
  94. );
  95. return this.SKU.knex().raw(u);
  96. });
  97. return Promise.all(queries) // Once every query is written
  98. }
  99. updateProducts(prods) {
  100. // I wrap knex as 'connection'
  101. let skuID = prods[0].sku;
  102. let names = prods.map(prod => prod.name);
  103. let queries = prods.map((prod) => {
  104. // console.log(prod);
  105. let s1 = this.Products.raw().insert(prod).toString().toString();
  106. let s2 = this.Products.raw().update(prod).toString().replace(/^update\s.*\sset\s/i, '');
  107. let u = util.format('%s ON DUPLICATE KEY UPDATE %s',
  108. s1,
  109. s2
  110. );
  111. return this.Products.knex().raw(u);
  112. });
  113. return Promise.all(queries)
  114. .then(() => {
  115. console.log("DELETING")
  116. console.log(skuID, names)
  117. console.log("END ``` DELETING")
  118. return this.Products.raw().where('SKU', skuID).whereNotIn('name', names).del();
  119. }) // Once every query is written
  120. }
  121. search() {
  122. if(this.searching) {
  123. throw new Error("Already searching");
  124. }
  125. this.searching = true;
  126. console.log("Searching...");
  127. return Crawler.retrieve(1).then((d) => {
  128. this.searching = false;
  129. });
  130. // .then((data) => this.updateCategories(data.categories).then(() => data))
  131. // .then((data) => this.updateSKUs(data.skus).then(() => data))
  132. // .then((data) => this.updateProducts(data.products).then(() => data));
  133. return Crawler.getCompanies('https://www.skroutz.gr/s/19085873/2838-%CE%A0%CE%B5%CF%84%CF%83%CE%AD%CF%84%CE%B1-G-P-C-90%CE%A7170-%CE%98%CE%B1%CE%BB%CE%AC%CF%83%CF%83%CE%B7%CF%82.html')
  134. .then((skus) => this.updateProducts(skus))
  135. .catch((r) => console.log("ERRORROROROROROO", r));
  136. return Crawler.getAllProducts('https://www.skroutz.gr/c/1706/petsetes_thalassis/m/10646/Das-Home.html', 41)
  137. .then((skus) => this.updateSKUs(skus))
  138. .catch((r) => console.log("ERRORROROROROROO", r));
  139. return Crawler.getCategories()
  140. .then((cats) => this.updateCategories(cats))
  141. .then(() => console.log("OK"))
  142. .catch((r) => console.log("ERRORROROROROROO", r));
  143. }
  144. }
  145. module.exports = CrawlerModule;