fix(indeed): return on error

cullenwatson · Mar 1, 2024 · c85f2c6 · c85f2c6
1 parent c2bb884
commit c85f2c6
Show file tree

Hide file tree

Showing 4 changed files with 26 additions and 14 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "python-jobspy"
-version = "1.1.45"
+version = "1.1.46"
 description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
 authors = ["Zachary Hampton <[email protected]>", "Cullen Watson <[email protected]>"]
 homepage = "https://github.com/Bunsly/JobSpy"
@@ -13,12 +13,12 @@ packages = [
 [tool.poetry.dependencies]
 python = "^3.10"
 requests = "^2.31.0"
-tls-client = "*"
 beautifulsoup4 = "^4.12.2"
 pandas = "^2.1.0"
 NUMPY = "1.24.2"
 pydantic = "^2.3.0"
 html2text = "^2020.1.16"
+tls-client = "^1.0.1"
 
 
 [tool.poetry.group.dev.dependencies]

diff --git a/src/jobspy/__init__.py b/src/jobspy/__init__.py
@@ -160,7 +160,7 @@ def worker(site):
 
         # Desired column order
         desired_order = [
-            "job_url_hyper" if 'hyperlinks' in locals() or 'hyperlinks' in globals() else "job_url",
+            "job_url_hyper" if hyperlinks else "job_url",
             "site",
             "title",
             "company",

diff --git a/src/jobspy/scrapers/indeed/__init__.py b/src/jobspy/scrapers/indeed/__init__.py
@@ -82,7 +82,6 @@ def scrape(self, scraper_input: ScraperInput) -> JobResponse:
             if not new_jobs:
                 break
 
-
         if len(self.seen_urls) > scraper_input.results_wanted:
             job_list = job_list[:scraper_input.results_wanted]
 
@@ -124,12 +123,15 @@ def _scrape_page(self, page: int=0) -> list[JobPost]:
             return job_list
 
         jobs = IndeedScraper._parse_jobs(soup)
+        if not jobs:
+            return []
         if (
             not jobs.get("metaData", {})
             .get("mosaicProviderJobCardsModel", {})
             .get("results")
         ):
-            raise IndeedException("No jobs found.")
+            logger.error("Indeed - No jobs found.")
+            return []
 
         jobs = jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]
         job_keys = [job['jobkey'] for job in jobs]
@@ -302,11 +304,11 @@ def find_mosaic_script() -> Tag | None:
                 jobs = json.loads(m.group(1).strip())
                 return jobs
             else:
-                raise IndeedException("Could not find mosaic provider job cards data")
+                logger.warning(f'Indeed: Could not find mosaic provider job cards data')
+                return {}
         else:
-            raise IndeedException(
-                "Could not find any results for the search"
-            )
+            logger.warning(f"Indeed: Could not parse any jobs on the page")
+            return {}
 
     @staticmethod
     def _is_job_remote(job: dict, job_detailed: dict, description: str) -> bool: