nextup/serverInit.js at master · rawrin/nextup · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
/*
// **
//  *      _____  _
//  *     |  __ \| |
//  *     | |__) | | __ _ _ __
//  *     |  ___/| |/ _` | '_ \
//  *     | |    | | (_| | | | |
//  *     |_|    |_|\__,_|_| |_|
//  *
//  *

1. Populate master dictionary by querying DB
   - IF DB is empty, then move all scrapArchived to json (async ok!)
   - alternative, read archived from mongoDB (archive = true, false)
2. After master dict (promise), execute initiate / cron check hacker news rss feed for new news/sites to scrap
   - if there are new urls and save as file.json to json folder or mongoDB as alternative
   - read directory and batchInsert
   - move inserted file to archive

*/

// from commonly used
var consoleStart               = require('./helpers/commonlyUsed.js').consoleStart;

// from batchOp.js
var clearNeo4jDBAsync          = require('./batchOp.js').clearNeo4jDBAsync;
var populateMasterDictAsync    = require('./batchOp.js').populateMasterDictAsync;
var populateMasterDoclistAsync = require('./batchOp.js').populateMasterDoclistAsync;
var insertBatchRec             = require('./batchOp.js').insertBatchRec;

// from cronBatchInsert.js
var moveJson                = require('./cronBatchInsert.js').moveJson;
var readJsonDir             = require('./cronBatchInsert.js').readJsonDir;
var dirPaths                = require('./cronBatchInsert.js').dirPaths;
var toFilenameList          = require('./cronBatchInsert.js').toFilenameList;
var checkEmptyDB            = require('./cronBatchInsert.js').checkEmptyDB;
var startCron               = require('./cronBatchInsert.js').startCron;

// rss reader and .json saver
var readabilityRequestCron = require('./scrape.js').readabilityRequestCron;
var popCron                = require('./scrape.js').popCron;
var populateMasterRssQueue = require('./scrape.js').populateMasterRssQueue;

// which dir to use
// var theDir = dirPaths.dummyJSON;
var theDir = dirPaths.jsonDir;
// var rssURL = "https://news.ycombinator.com/bigrss";


// move files from archive to original directory, remove in production
moveJson()
.then(function (movedFiles) {
  consoleStart(movedFiles,'files moved from archive to: ' + theDir);
})
// clear database for testing purposes, remove in production
.then(function () {
  console.log('neo4j cleared?');
  return clearNeo4jDBAsync();
})
// REAL functions begin here, everything before is for testing and can be cleared
// assuming FRESH db
.then(function () {
  console.log('empty db checking?');
  return checkEmptyDB();
})

// checkEmptyDB()
.then(function (isNeo4jEmpty) {
  // consoleStart(isNeo4jEmpty, "isNeo4jEmpty ??")
  // if the database is empty, then move archive files back to json folder to be inserted
  if (isNeo4jEmpty) {
    console.log('if neo4j is empty, move json');
    return moveJson();

  // else if it is NOT empty, then populate the master dictionary with words
  } else {
    console.log('if noe4j is not empty, populate master');
    return populateMasterDictAsync();
  }
})
.catch(function (err) {
  consoleStart(err, "Dict pop error");
})
// if db is not empty, populate master doc list
.then(function (results) {
  console.log();
  return populateMasterDoclistAsync();
})
// read json directory for files to insert
.then(function (results) {
  // returns a promisified array of *parsed* json document objects;
  return readJsonDir(theDir);
})
.catch(function (err) {
  consoleStart(err, "serverInit readJsonDir() errored out!");
})
// batch insert json files
.then(function (docList) {
  var filenames = toFilenameList(docList);
  consoleStart(filenames, "files to move to archive after batch insert");
  // should return a list of filenames that were inserted
  insertBatchRec("result", "response", docList, 0);
  // return moveJson(theDir, dirPaths.scrapeArchive, filenames);
  return moveJson(theDir, dirPaths.scrapeArchive, filenames);
})
.catch(function (err) {
  consoleStart(err, "serverInit moveJson() errored out!");
})
.then(function (movedFiles) {
  return consoleStart(movedFiles, 'moved to scrapeArchive from: ' + theDir);
})
// now that the initial population / dictionary word retrieval of the neo4j database is finished, the cron job can start?
.then(function () {
  console.log('executing cron jobs');
  populateMasterRssQueue();
  readabilityRequestCron();
  popCron();
  startCron();
})
.catch(function (err) {
  consoleStart(err, 'catch all errors');
});