From 409fe272f30652330ea159986969d2a58f03f554 Mon Sep 17 00:00:00 2001 From: Cullen Watson Date: Mon, 12 Feb 2024 10:58:46 -0600 Subject: [PATCH] chore: remove unused func --- src/jobspy/scrapers/indeed/__init__.py | 38 +++++--------------------- 1 file changed, 7 insertions(+), 31 deletions(-) diff --git a/src/jobspy/scrapers/indeed/__init__.py b/src/jobspy/scrapers/indeed/__init__.py index 4c2cc756..5b05cbd4 100644 --- a/src/jobspy/scrapers/indeed/__init__.py +++ b/src/jobspy/scrapers/indeed/__init__.py @@ -49,7 +49,7 @@ def __init__(self, proxy: str | None = None): def scrape_page( self, scraper_input: ScraperInput, page: int - ) -> tuple[list[JobPost], int]: + ) -> list[JobPost]: """ Scrapes a page of Indeed for jobs with scraper_input criteria :param scraper_input: @@ -57,7 +57,6 @@ def scrape_page( :return: jobs found on page, total number of jobs found for search """ job_list = [] - total_num_jobs = 0 self.country = scraper_input.country domain = self.country.indeed_domain_value self.url = f"https://{domain}.indeed.com" @@ -80,12 +79,11 @@ def scrape_page( logger.error(f'Indeed: Bad proxy') else: logger.error(f'Indeed: {str(e)}') - return job_list, total_num_jobs + return job_list soup = BeautifulSoup(response.content, "html.parser") - total_num_jobs = IndeedScraper.total_jobs(soup) if "did not match any jobs" in response.text: - return job_list, total_num_jobs + return job_list jobs = IndeedScraper.parse_jobs( soup @@ -147,7 +145,7 @@ def process_job(job: dict, job_detailed: dict) -> JobPost | None: job_list = [result.result() for result in job_results if result.result()] - return job_list, total_num_jobs + return job_list def scrape(self, scraper_input: ScraperInput) -> JobResponse: """ @@ -155,7 +153,7 @@ def scrape(self, scraper_input: ScraperInput) -> JobResponse: :param scraper_input: :return: job_response """ - job_list, total_results = self.scrape_page(scraper_input, 0) + job_list = self.scrape_page(scraper_input, 0) pages_processed = 1 while len(self.seen_urls) < scraper_input.results_wanted: @@ -169,7 +167,7 @@ def scrape(self, scraper_input: ScraperInput) -> JobResponse: ] for future in futures: - jobs, _ = future.result() + jobs = future.result() if jobs: job_list += jobs new_jobs = True @@ -184,11 +182,7 @@ def scrape(self, scraper_input: ScraperInput) -> JobResponse: if len(self.seen_urls) > scraper_input.results_wanted: job_list = job_list[:scraper_input.results_wanted] - job_response = JobResponse( - jobs=job_list, - total_results=total_results, - ) - return job_response + return JobResponse(jobs=job_list) @staticmethod def get_job_type(job: dict) -> list[JobType] | None: @@ -288,24 +282,6 @@ def find_mosaic_script() -> Tag | None: "Could not find any results for the search" ) - @staticmethod - def total_jobs(soup: BeautifulSoup) -> int: - """ - Parses the total jobs for that search from soup object - :param soup: - :return: total_num_jobs - """ - script = soup.find("script", string=lambda t: t and "window._initialData" in t) - - pattern = re.compile(r"window._initialData\s*=\s*({.*})\s*;", re.DOTALL) - match = pattern.search(script.string) - total_num_jobs = 0 - if match: - json_str = match.group(1) - data = json.loads(json_str) - total_num_jobs = int(data["searchTitleBarModel"]["totalNumResults"]) - return total_num_jobs - @staticmethod def get_headers(): return {