forked from nextup/nextup
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathserverInit.js
More file actions
122 lines (111 loc) · 4.34 KB
/
serverInit.js
File metadata and controls
122 lines (111 loc) · 4.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
/*
// **
// * _____ _
// * | __ \| |
// * | |__) | | __ _ _ __
// * | ___/| |/ _` | '_ \
// * | | | | (_| | | | |
// * |_| |_|\__,_|_| |_|
// *
// *
1. Populate master dictionary by querying DB
- IF DB is empty, then move all scrapArchived to json (async ok!)
- alternative, read archived from mongoDB (archive = true, false)
2. After master dict (promise), execute initiate / cron check hacker news rss feed for new news/sites to scrap
- if there are new urls and save as file.json to json folder or mongoDB as alternative
- read directory and batchInsert
- move inserted file to archive
*/
// from commonly used
var consoleStart = require('./helpers/commonlyUsed.js').consoleStart;
// from batchOp.js
var clearNeo4jDBAsync = require('./batchOp.js').clearNeo4jDBAsync;
var populateMasterDictAsync = require('./batchOp.js').populateMasterDictAsync;
var populateMasterDoclistAsync = require('./batchOp.js').populateMasterDoclistAsync;
var insertBatchRec = require('./batchOp.js').insertBatchRec;
// from cronBatchInsert.js
var moveJson = require('./cronBatchInsert.js').moveJson;
var readJsonDir = require('./cronBatchInsert.js').readJsonDir;
var dirPaths = require('./cronBatchInsert.js').dirPaths;
var toFilenameList = require('./cronBatchInsert.js').toFilenameList;
var checkEmptyDB = require('./cronBatchInsert.js').checkEmptyDB;
var startCron = require('./cronBatchInsert.js').startCron;
// rss reader and .json saver
var readabilityRequestCron = require('./scrape.js').readabilityRequestCron;
var popCron = require('./scrape.js').popCron;
var populateMasterRssQueue = require('./scrape.js').populateMasterRssQueue;
// which dir to use
// var theDir = dirPaths.dummyJSON;
var theDir = dirPaths.jsonDir;
// var rssURL = "https://news.ycombinator.com/bigrss";
// move files from archive to original directory, remove in production
moveJson()
.then(function (movedFiles) {
consoleStart(movedFiles,'files moved from archive to: ' + theDir);
})
// clear database for testing purposes, remove in production
.then(function () {
console.log('neo4j cleared?');
return clearNeo4jDBAsync();
})
// REAL functions begin here, everything before is for testing and can be cleared
// assuming FRESH db
.then(function () {
console.log('empty db checking?');
return checkEmptyDB();
})
// checkEmptyDB()
.then(function (isNeo4jEmpty) {
// consoleStart(isNeo4jEmpty, "isNeo4jEmpty ??")
// if the database is empty, then move archive files back to json folder to be inserted
if (isNeo4jEmpty) {
console.log('if neo4j is empty, move json');
return moveJson();
// else if it is NOT empty, then populate the master dictionary with words
} else {
console.log('if noe4j is not empty, populate master');
return populateMasterDictAsync();
}
})
.catch(function (err) {
consoleStart(err, "Dict pop error");
})
// if db is not empty, populate master doc list
.then(function (results) {
console.log();
return populateMasterDoclistAsync();
})
// read json directory for files to insert
.then(function (results) {
// returns a promisified array of *parsed* json document objects;
return readJsonDir(theDir);
})
.catch(function (err) {
consoleStart(err, "serverInit readJsonDir() errored out!");
})
// batch insert json files
.then(function (docList) {
var filenames = toFilenameList(docList);
consoleStart(filenames, "files to move to archive after batch insert");
// should return a list of filenames that were inserted
insertBatchRec("result", "response", docList, 0);
// return moveJson(theDir, dirPaths.scrapeArchive, filenames);
return moveJson(theDir, dirPaths.scrapeArchive, filenames);
})
.catch(function (err) {
consoleStart(err, "serverInit moveJson() errored out!");
})
.then(function (movedFiles) {
return consoleStart(movedFiles, 'moved to scrapeArchive from: ' + theDir);
})
// now that the initial population / dictionary word retrieval of the neo4j database is finished, the cron job can start?
.then(function () {
console.log('executing cron jobs');
populateMasterRssQueue();
readabilityRequestCron();
popCron();
startCron();
})
.catch(function (err) {
consoleStart(err, 'catch all errors');
});