-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscripts.py
More file actions
executable file
·73 lines (68 loc) · 2.46 KB
/
scripts.py
File metadata and controls
executable file
·73 lines (68 loc) · 2.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import re
from bs4 import BeautifulSoup
from torrequest import TorRequest
from fake_useragent import UserAgent
import pandas as pd
import os
class Countries:
'''
GETS LINKS FOR INDEED.COM FOR EACH COMPANY, WRITE TO CSV OR RETURN DATA FRAME'''
def __init__(self,site):
self.tr=TorRequest(password='249smuss')
self.tr.reset_identity()
header = {'User-Agent':UserAgent().firefox}
self.response= self.tr.get(site,headers=header)
self.response = BeautifulSoup(self.response.text, 'html5lib')
def getLink(self,write='no'):
links=[x.next_element.next_element for x in self.response.find_all(src=re.compile('(/images/flags/)+(...png)+'))]
links = [(x.text,'/'.join([x['href'],'jobs?q=(JOB)&l=(CITY)'])) for x in links if x.text != 'United States']
links.append(('United States','https://www.indeed.com/jobs?q=(JOB)&l=(CITY)'))
links = pd.DataFrame(links,columns=['Country','Link'])
if write.lower() == 'yes':
links.to_csv('Countries.csv')
return(None)
else:
return(links)
class Load_Data:
def __init__(self):
self.countries = 'Countries.csv'
def Load_Countries(self):
if os.path.exists(self.countries):
self.countries = pd.read_csv(self.countries)
elif not os.path.exists(self.countries):
'''if true, need to show a message to show progress'''
Countries('https://www.indeed.com/worldwide?&mobRdr=1').getLink('yes')
self.countries = pd.read_csv(self.countries)
return(self.countries)
class Entry:
def __init__(self,file):
self.file = file
if not os.path.exists(self.file):
self.entries = pd.DataFrame(columns=['job','country','city','cover','exceptions','cv'])
elif os.path.exists(self.file):
self.entries = pd.read_csv(self.file,usecols=[1,2,3,4,5,6])
def __append__(self,data):
self.entries.loc[len(self.entries)] = data
def __read__(self):
return(self.entries)
def __drop__(self,TakeAt):
if len(self.entries) != 1:
self.entries.drop(self.entries.index[TakeAt],inplace=True)
self.entries.reset_index(drop=True, inplace=True)
elif len(self.entries) == 1:
return("Can't delete the last item")
def __write__(self):
if os.path.exists(self.file):
os.remove(self.file)
self.entries.to_csv(self.file)
def __delete__(self):
if os.path.exists(self.file):
os.remove(self.file)
def __getline__(self,line):
return(self.entries.loc[line])
def __setattr__(self,prop,val,read=True):
super().__setattr__(prop,val)
if prop == 'self.file':
self.file = val
if read == True:
self.entries = pd.read_csv(self.file)