FEAT: Allow LinkedIn scraper to get external job apply url (#140)

cullenwatson · Apr 30, 2024 · 8dd08ed · 8dd08ed
1 parent 5d3df73
commit 8dd08ed
Show file tree

Hide file tree

Showing 6 changed files with 134 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -38,7 +38,8 @@ jobs = scrape_jobs(
     location="Dallas, TX",
     results_wanted=20,
     hours_old=72, # (only Linkedin/Indeed is hour specific, others round up to days old)
-    country_indeed='USA'  # only needed for indeed / glassdoor
+    country_indeed='USA',  # only needed for indeed / glassdoor
+    # linkedin_fetch_description=True # get full description and direct job url for linkedin (slower)
 )
 print(f"Found {len(jobs)} jobs")
 print(jobs.head())
@@ -70,7 +71,7 @@ Optional
 ├── is_remote (bool)
 ├── results_wanted (int): number of job results to retrieve for each site specified in 'site_name'
 ├── easy_apply (bool): filters for jobs that are hosted on the job board site (LinkedIn & Indeed do not allow pairing this with hours_old)
-├── linkedin_fetch_description (bool): fetches full description for LinkedIn (slower)
+├── linkedin_fetch_description (bool): fetches full description and direct job url for LinkedIn (slower)
 ├── linkedin_company_ids (list[int]): searches for linkedin jobs with specific company ids
 ├── description_format (str): markdown, html (Format type of the job descriptions. Default is markdown.)
 ├── country_indeed (str): filters the country on Indeed (see below for correct spelling)

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "python-jobspy"
-version = "1.1.51"
+version = "1.1.52"
 description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
 authors = ["Zachary Hampton <[email protected]>", "Cullen Watson <[email protected]>"]
 homepage = "https://github.com/Bunsly/JobSpy"
@@ -19,6 +19,7 @@ NUMPY = "1.24.2"
 pydantic = "^2.3.0"
 tls-client = "^1.0.1"
 markdownify = "^0.11.6"
+regex = "^2024.4.28"
 
 
 [tool.poetry.group.dev.dependencies]

diff --git a/src/jobspy/scrapers/indeed/__init__.py b/src/jobspy/scrapers/indeed/__init__.py
@@ -119,7 +119,7 @@ def _scrape_page(self, cursor: str | None) -> Tuple[list[JobPost], str | None]:
         )
         if response.status_code != 200:
             logger.info(
-                f"Indeed responded with status code: {response.status_code} (submit GitHub issue if this appears to be a beg)"
+                f"Indeed responded with status code: {response.status_code} (submit GitHub issue if this appears to be a bug)"
             )
             return jobs, new_cursor
         data = response.json()