File tree 2 files changed +74
-0
lines changed
2 files changed +74
-0
lines changed Original file line number Diff line number Diff line change
1
+ # Jumia webpage scraper(https://www.jumia.co.ke/)
2
+
3
+ A Script to scrape name and price from Jumia Kenya.
4
+
5
+ ## Requirements
6
+ 1 . Python 3.6 or above
7
+ 2 . BeautifulSoup
8
+ 3 . Requests
9
+ 4 . Json
10
+
11
+ ## Usage
12
+ This script can be used to scrape name and price of any product from Jumia Kenya.
13
+ To use this script, run the following command in your terminal:
14
+ ``` python index.py ```
15
+ The script will prompt you to enter the url of the products you want to scrape.
16
+ Use the url of the main page of the products you want to scrape.
17
+ For example, if you want to scrape all the products in the category "Phones and Tablets", use the url of the main page of the category.
18
+ The script will then scrape the name and price of all the products in the category and save them in a json file.
19
+
20
+ ## Author
21
+ [ Brian Koech]
Original file line number Diff line number Diff line change
1
+ #scrap jumia website
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import json
5
+
6
+ #function to get the product details
7
+ def get_product_details (product_link ):
8
+ #get the product page
9
+ data = {}
10
+ all_data = []
11
+ product_page = requests .get (product_link )
12
+ product_soup = BeautifulSoup (product_page .content , 'html.parser' )
13
+ articles = product_soup .find_all ('article' , class_ = 'prd' )
14
+ for article in articles :
15
+ name = article .find ('div' , class_ = 'name' )
16
+ if not name :
17
+ name = article .find ('h3' , class_ = 'name' )
18
+
19
+ if name :
20
+ name = name .text .strip ()
21
+
22
+ price = article .find ('div' , class_ = 'prc' )
23
+ if price :
24
+ price = price .text .strip ()
25
+
26
+ data = {
27
+ 'name' : name ,
28
+ 'price' : price ,
29
+ }
30
+ print (data )
31
+
32
+ all_data .append (data )
33
+ return all_data
34
+
35
+
36
+
37
+ #write the product details to a json file
38
+ def write_to_json (product_details ):
39
+ with open ('jumia.json' , 'w' ) as file :
40
+ json .dump (product_details , file , indent = 4 )
41
+
42
+ #function to get the page links
43
+ def get_page_links ():
44
+ #get the page
45
+ url = input ('Enter the jumia url of the page you want to scrape(select a url of a main category): ' )
46
+ products = get_product_details (url )
47
+ write_to_json (products )
48
+ print ('Done' )
49
+
50
+
51
+ if __name__ == '__main__' :
52
+ # scrape_product_details
53
+ get_page_links ()
You can’t perform that action at this time.
0 commit comments