Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2211,6 +2211,13 @@ print(serps)
</tbody>
</table>

To set the domain and host language you can use these parameters. This will search for "bmw" on the German Google domain and return the results in German.

```python
df = seo.get_serps("bmw", pages=1, domain="google.de", host_language="de")
```



#### Create an ABCD classification of Google Search Console data
The `classify_pages()` function returns an ABCD classification of Google Search Console data. This calculates the cumulative sum of clicks and then categorises pages using the ABC algorithm (the first 80% are classed A, the next 10% are classed B, and the final 10% are classed C, with the zero click pages classed D).
Expand Down
45 changes: 35 additions & 10 deletions ecommercetools/seo/google_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,21 @@ def _get_source(url: str):
print(e)


def _get_site_results(url: str):
def _get_site_results(url: str, domain="google.co.uk", hl="en"):
"""Return the source of a site:url search.

Args:
url: URL of page to append to site: query
domain: Google domain to use (default is google.co.uk)
hl: Language to use (default is English)

Returns:
response (str): HTML of page.
"""

try:
query = urllib.parse.quote_plus(url)
response = _get_source("https://www.google.co.uk/search?q=site%3A" + query + "&num=100")
response = _get_source("https://www."+ domain + "/search?q=site%3A" + query + "&num=100" + "&hl=" + hl)

return response
except requests.exceptions.RequestException as e:
Expand Down Expand Up @@ -116,28 +118,41 @@ def get_indexed_pages(urls: list):
return df


def _get_results(query: str):
def _get_results(query: str, domain="google.co.uk", hl="en"):
"""Return the source of a search.

Args:
query: Search query term.
domain: Google domain to use (default is google.co.uk)
hl: Language to use (default is English)

Returns:
response (str): HTML of page.
"""

query = urllib.parse.quote_plus(query)
response = _get_source("https://www.google.co.uk/search?q=" + query + "&num=100")
url = "https://www." + domain + "/search?q=" + query + "&num=100" + "&hl=" + hl
response = _get_source(url)

return response


def _get_next_page(response, domain="google.co.uk"):
"""Get the URL for the next page of results."""
def _get_next_page(response, domain="google.co.uk", hl="en"):
"""Get the URL for the next page of results.

Args:
response: HTML of page.
domain: Google domain to use (default is google.co.uk)
hl: Language to use (default is English)

Returns:
url (str): URL of next page of results.

"""

css_identifier_next = "#pnnext"
next_page_url = response.html.find(css_identifier_next, first=True).attrs['href']
next_page = "https://www." + domain + next_page_url
next_page = "https://www." + domain + next_page_url + "&hl=" + hl

return next_page

Expand Down Expand Up @@ -209,22 +224,32 @@ def _parse_search_results(response):
def get_serps(query: str,
output="dataframe",
pages=1,
domain="google.co.uk"):
domain="google.co.uk",
host_language="en"):
"""Return the Google search results for a given query.

Args:
query (string): Query term to search Google for.
output (string, optional): Optional output format (dataframe or dictionary).
pages (int, optional): Optional number of pages to return.
domain (string, optional): Optional Google domain (default is google.co.uk).
host_language (string, optional): Optional host_language (default is en).

Returns:
results (dict): Results of query.
"""

response = _get_results(query)
if domain not in ["google.co.uk", "google.com", "google.co.in", "google.com.au", "google.com.br", "google.ca",
"google.com.mx", "google.co.nz", "google.com.ph", "google.pl", "google.com.sa", "google.com.sg",
"google.co.za", "google.es", "google.fr", "google.de", "google.it", "google.co.jp", "google.com.tw",
"google.com.tr", "google.com.vn"]:
raise ValueError("Domain must be a valid Google domain in the format of google.co.uk or google.com")
else:
pass

response = _get_results(query, domain=domain, hl=host_language)
results = _parse_search_results(response)
next_page = _get_next_page(response)
next_page = _get_next_page(response, domain=domain, hl=host_language)

page = 1
while page <= pages:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
setup(
name='ecommercetools',
packages=find_namespace_packages(include=['ecommercetools.*']),
version='0.42.8',
version='0.42.9',
license='MIT',
description='EcommerceTools is a data science toolkit for ecommerce, marketing science, and Python SEO.',
long_description=long_description,
Expand Down