Skip to content

Commit

Permalink
feat: utils.py
Browse files Browse the repository at this point in the history
  • Loading branch information
cullenwatson committed Oct 10, 2023
1 parent e5353e6 commit 90fa4a4
Showing 1 changed file with 44 additions and 0 deletions.
44 changes: 44 additions & 0 deletions src/jobspy/scrapers/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import re
import tls_client


def count_urgent_words(description: str) -> int:
"""
Count the number of urgent words or phrases in a job description.
"""
urgent_patterns = re.compile(
r"\burgen(t|cy)|\bimmediate(ly)?\b|start asap|\bhiring (now|immediate(ly)?)\b",
re.IGNORECASE,
)
matches = re.findall(urgent_patterns, description)
count = len(matches)

return count


def extract_emails_from_text(text: str) -> list[str] | None:
if not text:
return None
email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
return email_regex.findall(text)


def create_session(proxy: str | None = None):
"""
Creates a tls client session
:return: A session object with or without proxies.
"""
session = tls_client.Session(
client_identifier="chrome112",
random_tls_extension_order=True,
)
session.proxies = proxy
# TODO multiple proxies
# if self.proxies:
# session.proxies = {
# "http": random.choice(self.proxies),
# "https": random.choice(self.proxies),
# }

return session

0 comments on commit 90fa4a4

Please sign in to comment.