Skip to content

Commit

Permalink
fix(indeed): return on error
Browse files Browse the repository at this point in the history
  • Loading branch information
cullenwatson committed Mar 1, 2024
1 parent c2bb884 commit c85f2c6
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 14 deletions.
20 changes: 15 additions & 5 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "python-jobspy"
version = "1.1.45"
version = "1.1.46"
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
authors = ["Zachary Hampton <[email protected]>", "Cullen Watson <[email protected]>"]
homepage = "https://github.com/Bunsly/JobSpy"
Expand All @@ -13,12 +13,12 @@ packages = [
[tool.poetry.dependencies]
python = "^3.10"
requests = "^2.31.0"
tls-client = "*"
beautifulsoup4 = "^4.12.2"
pandas = "^2.1.0"
NUMPY = "1.24.2"
pydantic = "^2.3.0"
html2text = "^2020.1.16"
tls-client = "^1.0.1"


[tool.poetry.group.dev.dependencies]
Expand Down
2 changes: 1 addition & 1 deletion src/jobspy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def worker(site):

# Desired column order
desired_order = [
"job_url_hyper" if 'hyperlinks' in locals() or 'hyperlinks' in globals() else "job_url",
"job_url_hyper" if hyperlinks else "job_url",
"site",
"title",
"company",
Expand Down
14 changes: 8 additions & 6 deletions src/jobspy/scrapers/indeed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ def scrape(self, scraper_input: ScraperInput) -> JobResponse:
if not new_jobs:
break


if len(self.seen_urls) > scraper_input.results_wanted:
job_list = job_list[:scraper_input.results_wanted]

Expand Down Expand Up @@ -124,12 +123,15 @@ def _scrape_page(self, page: int=0) -> list[JobPost]:
return job_list

jobs = IndeedScraper._parse_jobs(soup)
if not jobs:
return []
if (
not jobs.get("metaData", {})
.get("mosaicProviderJobCardsModel", {})
.get("results")
):
raise IndeedException("No jobs found.")
logger.error("Indeed - No jobs found.")
return []

jobs = jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]
job_keys = [job['jobkey'] for job in jobs]
Expand Down Expand Up @@ -302,11 +304,11 @@ def find_mosaic_script() -> Tag | None:
jobs = json.loads(m.group(1).strip())
return jobs
else:
raise IndeedException("Could not find mosaic provider job cards data")
logger.warning(f'Indeed: Could not find mosaic provider job cards data')
return {}
else:
raise IndeedException(
"Could not find any results for the search"
)
logger.warning(f"Indeed: Could not parse any jobs on the page")
return {}

@staticmethod
def _is_job_remote(job: dict, job_detailed: dict, description: str) -> bool:
Expand Down

0 comments on commit c85f2c6

Please sign in to comment.