enh(glassdoor): easy apply filter (#92)

cullenwatson · Feb 2, 2024 · bbe46fe · bbe46fe
1 parent b97c73f
commit bbe46fe
Show file tree

Hide file tree

Showing 4 changed files with 11 additions and 19 deletions.
diff --git a/README.md b/README.md
@@ -69,7 +69,7 @@ Optional
 ├── is_remote (bool)
 ├── full_description (bool): fetches full description for Indeed / LinkedIn (much slower)
 ├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type'
-├── easy_apply (bool): filters for jobs that are hosted on LinkedIn
+├── easy_apply (bool): filters for jobs that are hosted on LinkedIn, Glassdoor
 ├── country_indeed (enum): filters the country on Indeed (see below for correct spelling)
 ├── offset (num): starts the search from an offset (e.g. 25 will start the search from the 25th result)
 ```

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "python-jobspy"
-version = "1.1.36"
+version = "1.1.37"
 description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
 authors = ["Zachary Hampton <[email protected]>", "Cullen Watson <[email protected]>"]
 homepage = "https://github.com/Bunsly/JobSpy"
@@ -13,7 +13,7 @@ packages = [
 [tool.poetry.dependencies]
 python = "^3.10"
 requests = "^2.31.0"
-tls-client = "^0.2.1"
+tls-client = "*"
 beautifulsoup4 = "^4.12.2"
 pandas = "^2.1.0"
 NUMPY = "1.24.2"

diff --git a/src/jobspy/scrapers/glassdoor/__init__.py b/src/jobspy/scrapers/glassdoor/__init__.py
@@ -88,13 +88,14 @@ def fetch_jobs_page(
     def process_job(self, job_data):
         """Processes a single job and fetches its description."""
         job_id = job_data["jobview"]["job"]["listingId"]
-        job_url = f'{self.url}/job-listing/?jl={job_id}'
+        job_url = f'{self.url}job-listing/j?jl={job_id}'
         if job_url in self.seen_urls:
             return None
         self.seen_urls.add(job_url)
         job = job_data["jobview"]
         title = job["job"]["jobTitleText"]
         company_name = job["header"]["employerNameFromSearch"]
+        company_id = job_data['jobview']['header']['employer']['id']
         location_name = job["header"].get("locationName", "")
         location_type = job["header"].get("locationType", "")
         age_in_days = job["header"].get("ageInDays")
@@ -115,6 +116,7 @@ def process_job(self, job_data):
 
         job_post = JobPost(
             title=title,
+            company_url=f"{self.url}Overview/W-EI_IE{company_id}.htm" if company_id else None,
             company_name=company_name,
             date_posted=date_posted,
             job_url=job_url,
@@ -258,7 +260,7 @@ def add_payload(
             "operationName": "JobSearchResultsQuery",
             "variables": {
                 "excludeJobListingIds": [],
-                "filterParams": [],
+                "filterParams": [{"filterKey": "applicationType", "values": "1"}] if scraper_input.easy_apply else [],
                 "keyword": scraper_input.search_term,
                 "numJobsToShow": 30,
                 "locationType": location_type,