-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjob_search.py
84 lines (73 loc) · 2.98 KB
/
job_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# import request and Beautiful Soup
import requests
from bs4 import BeautifulSoup
import argparse
def scrape_jobs(location=None):
"""Scrapes Data Scientist job postings from Monster, optionally by location.
:param location: Where the job is located
:type location: str
:return: all job posting from first page that matches the search result
:rtype: BeautifulSoup Object
"""
if location:
url = (f'https://www.monster.com/jobs/search/'
f'?q=Data-Scientist&where={location}&rad=200&tm=14')
else:
url = 'https://www.monster.com/jobs/search/?q=Data-Scientist'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
results = soup.find(id='ResultsContainer')
return results
def filter_jobs_by_keyword(results, word):
"""Filter job posting by word and print matching job title and link
:param results: Parsed HTML container with all job listings
:type results: BeautifulSoup Object
:param word: keyword to filter by
:return: None - meant to just print results
:rtype: None
"""
data_scientist_jobs = results.find_all('h2', string=lambda text: word in text.lower())
for ds_jobs in data_scientist_jobs:
link = ds_jobs.find('a')['href']
print(ds_jobs.text.strip())
print(f'Apply here: {link}\n')
def print_all_jobs(results):
"""Print all jobs return by the search
The printed job details are job title, company name, job location and link
:param results: Parsed HTML container with all job listing
:type results: BeautifulSoup Object
:return: None - meant to just print results
:rtype: None
"""
job_elems = results.find_all('section', class_='card-content')
for job_elem in job_elems:
title_elem = job_elem.find('h2', class_='title')
company_elem = job_elem.find('div', class_='company')
location_elem = job_elem.find('div', class_='location')
if None in (title_elem, company_elem, location_elem):
continue
# print(job_elem.prettify()) # to inspect the 'None' element
print(title_elem.text.strip())
print(company_elem.text.strip())
print(location_elem.text.strip())
link_elem = title_elem.find('a')
print(link_elem['href'])
print()
# USE THE SCRIPT AS A COMMAND-LINE INTERFACE
#______________________________________________________________________________________
my_parser = argparse.ArgumentParser(
prog='jobs', description='Find Data Scientist Jobs'
)
my_parser.add_argument(
'-location', metavar='location', type=str, help='The location of the job'
)
my_parser.add_argument(
'-word', metavar='word', type=str, help='What keyword to filter by'
)
args = my_parser.parse_args()
location, keyword = args.location, args.word
results = scrape_jobs(location)
if keyword:
filter_jobs_by_keyword(results, keyword.lower())
else:
print_all_jobs(results)