Skip to content

Commit

Permalink
fix(indeed): check both dicts for compensation
Browse files Browse the repository at this point in the history
  • Loading branch information
cullenwatson committed Feb 9, 2024
1 parent 5e21774 commit 2b72381
Showing 1 changed file with 41 additions and 14 deletions.
55 changes: 41 additions & 14 deletions src/jobspy/scrapers/indeed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,19 +103,6 @@ def process_job(job: dict, job_detailed: dict) -> JobPost | None:
self.seen_urls.add(job_url)
description = job_detailed['description']['html']

compensation = None
comp = job_detailed['compensation']['baseSalary']
if comp:
interval = CompensationInterval.get_interval(comp['unitOfWork'])
interval = self.get_correct_interval(comp['unitOfWork'])
if interval:
compensation = Compensation(
interval=interval,
min_amount=round(comp['range'].get('min', 0), 2) if comp['range'].get(
'min') is not None else None,
max_amount = round(comp['range'].get('max', 0), 2) if comp['range'].get('max') is not None else None,
currency=job_detailed['compensation']['currencyCode']
)

job_type = IndeedScraper.get_job_type(job)
timestamp_seconds = job["pubDate"] / 1000
Expand All @@ -133,7 +120,7 @@ def process_job(job: dict, job_detailed: dict) -> JobPost | None:
country=self.country,
),
job_type=job_type,
compensation=compensation,
compensation=self.get_compensation(job, job_detailed),
date_posted=date_posted,
job_url=job_url_client,
emails=extract_emails_from_text(description) if description else None,
Expand Down Expand Up @@ -255,6 +242,44 @@ def get_job_type(job: dict) -> list[JobType] | None:
job_types.append(job_type)
return job_types

@staticmethod
def get_compensation(job: dict, job_detailed: dict) -> Compensation:
"""
Parses the job to get
:param job:
:param job_detailed:
:return: compensation object
"""
comp = job_detailed['compensation']['baseSalary']
if comp:
interval = IndeedScraper.get_correct_interval(comp['unitOfWork'])
if interval:
return Compensation(
interval=interval,
min_amount=round(comp['range'].get('min'), 2) if comp['range'].get('min') is not None else None,
max_amount=round(comp['range'].get('max'), 2) if comp['range'].get('max') is not None else None,
currency=job_detailed['compensation']['currencyCode']
)

extracted_salary = job.get("extractedSalary")
compensation = None
if extracted_salary:
salary_snippet = job.get("salarySnippet")
currency = salary_snippet.get("currency") if salary_snippet else None
interval = (extracted_salary.get("type"),)
if isinstance(interval, tuple):
interval = interval[0]

interval = interval.upper()
if interval in CompensationInterval.__members__:
compensation = Compensation(
interval=CompensationInterval[interval],
min_amount=int(extracted_salary.get("min")),
max_amount=int(extracted_salary.get("max")),
currency=currency,
)
return compensation

@staticmethod
def parse_jobs(soup: BeautifulSoup) -> dict:
"""
Expand Down Expand Up @@ -453,8 +478,10 @@ def get_job_details(job_keys: list[str]) -> dict:
@staticmethod
def get_correct_interval(interval: str) -> CompensationInterval:
interval_mapping = {
"DAY": "DAILY",
"YEAR": "YEARLY",
"HOUR": "HOURLY",
"WEEK": "WEEKLY",
"MONTH": "MONTHLY"
}
mapped_interval = interval_mapping.get(interval.upper(), None)
Expand Down

0 comments on commit 2b72381

Please sign in to comment.