-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit e75d32d
Showing
5 changed files
with
279 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
pip-wheel-metadata/ | ||
share/python-wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.nox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
*.py,cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
db.sqlite3-journal | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# IPython | ||
profile_default/ | ||
ipython_config.py | ||
|
||
# pyenv | ||
.python-version | ||
|
||
# pipenv | ||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | ||
# However, in case of collaboration, if having platform-specific dependencies or dependencies | ||
# having no cross-platform support, pipenv may install dependencies that don't work, or not | ||
# install all needed dependencies. | ||
#Pipfile.lock | ||
|
||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow | ||
__pypackages__/ | ||
|
||
# Celery stuff | ||
celerybeat-schedule | ||
celerybeat.pid | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
.dmypy.json | ||
dmypy.json | ||
|
||
# Pyre type checker | ||
.pyre/ | ||
|
||
# Папки, создаваемые средой разработки | ||
.idea | ||
.DS_Store | ||
.AppleDouble | ||
.LSOverride | ||
|
||
*.sublime-project | ||
*.sublime-workspace | ||
|
||
.vscode/ | ||
*.code-workspace | ||
|
||
# Local History for Visual Studio Code | ||
.history/ | ||
|
||
.mypy_cache | ||
task_parsing.txt | ||
work_logs.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# Парсинг данных с карточки товара Wildberries | ||
|
||
Поиск данных осуществляется через GET запрос к API Wildberries, в качестве параметра передается Артикул товара на Маркетплейсе. | ||
|
||
## Как работать проектом | ||
1. Клонируйте репозиторий и перейдите корневую директорию: | ||
``` | ||
git clone https://github.com/klikovskiy/wildberries_product | ||
cd wildberries_product | ||
``` | ||
2. Установите зависимости проекта. | ||
``` | ||
pip install -r requirements.txt | ||
``` | ||
3. Создайте файл task_parsing.txt. Поместите туда артикулы товаров, каждый с новой строки, пример ниже. | ||
``` | ||
12345612 | ||
65432121 | ||
09876512 | ||
``` | ||
4. Запустите скрипт parsing_wb.py | ||
5. Если все сделано правильно, увидите лог выполнения. | ||
``` | ||
2023-03-30 01:00:00,00, INFO, root, Выполняю поиск данных по артикулу: 12345612. | ||
2023-03-30 01:00:00,00, INFO, root, Выполняю поиск данных по артикулу: 65432121. | ||
2023-03-30 01:00:00,00, INFO, root, Выполняю поиск данных по артикулу: 09876512. | ||
``` | ||
|
||
_В скрипте учтена обработка часто возникающих ошибок. | ||
Ведется логирование действий в файл work_logs.log. | ||
При желании, можно включить более подробное логирование | ||
в параметре ```level=logging.INFO```, переключив на ```level=logging.DEBUG```_ | ||
|
||
|
||
|
||
## Требования к проекту | ||
- Python 3.0+ | ||
|
||
_Проект размещен в ознакомительных целях. | ||
Не провожу консультации, как его запустить или что-то сделать!_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
import logging | ||
import time | ||
|
||
import pandas as pd | ||
import requests | ||
from fake_useragent import UserAgent | ||
|
||
logging.basicConfig( | ||
level=logging.DEBUG, | ||
format='%(asctime)s, %(levelname)s, %(name)s, %(message)s', | ||
handlers=[logging.FileHandler('work_logs.log', mode='w', encoding='utf-8'), | ||
logging.StreamHandler()] | ||
) | ||
|
||
|
||
def xlsx_writer(data, file_nam='result.xlsx', sheet_name='wildberries'): | ||
"""Записывает данные в .xlsx документ.""" | ||
df = pd.DataFrame(data) | ||
df.to_excel(file_nam, index=False, sheet_name=sheet_name) | ||
|
||
|
||
def load_vender_code(): | ||
"""Загружает список артикулов.""" | ||
try: | ||
with open('task_parsing.txt', 'r') as open_file: | ||
lines = [int(result.strip()) for result in open_file.readlines()] | ||
if lines: | ||
return lines | ||
raise ValueError('Добавьте хотя бы 1 артикул в "task_parsing.txt"') | ||
except FileNotFoundError: | ||
logging.critical('Отсутствует файл "task_parsing.txt"') | ||
except ValueError: | ||
logging.critical('Можно добавлять только числовые артикулы!') | ||
|
||
|
||
def parsing_product(delay_vendor=0.5): | ||
"""Поиск и агрегация данных о товаре.""" | ||
results = { | ||
'Артикул': [], | ||
'Название': [], | ||
'Брэнд': [], | ||
'Стоимость': [], | ||
'Размер скидки': [], | ||
'Размер СПП': [], | ||
'URL товара': [], | ||
'Количество отзывов': [], | ||
'Рейтинг': [], | ||
} | ||
load_data = load_vender_code() | ||
if load_data: | ||
for vendor_code in load_data: | ||
time.sleep(delay_vendor) | ||
logging.info(f'Выполняю поиск данных по артикулу: {vendor_code}.') | ||
headers = {'user-agent': UserAgent(use_external_data=True).chrome} | ||
response = requests.get(url=f'https://card.wb.ru/cards/detail' | ||
f'?spp=18&locale=ru&lang=ru&curr=rub' | ||
f'&nm={vendor_code}', headers=headers) | ||
|
||
if response.status_code == 200: | ||
json_data = response.json().get('data') | ||
if json_data: | ||
for product in json_data.get('products'): | ||
results['Артикул'].append(vendor_code) | ||
results['Название'].append(product.get('name')) | ||
results['Брэнд'].append(product.get('brand')) | ||
results['Стоимость'].append(product.get('priceU')) | ||
results['Размер скидки'].append( | ||
product.get('extended').get('basicSale') | ||
) | ||
results['Размер СПП'].append( | ||
product.get('extended').get('clientSale') | ||
) | ||
results['URL товара'].append( | ||
f'https://www.wildberries.ru/catalog/' | ||
f'{vendor_code}/detail.aspx' | ||
) | ||
results['Количество отзывов'].append( | ||
product.get('feedbacks') | ||
) | ||
results['Рейтинг'].append(product.get('rating')) | ||
|
||
else: | ||
logging.warning(f'Проблемы с получением данных, ' | ||
f'артикул {vendor_code}') | ||
xlsx_writer(results) | ||
else: | ||
logging.critical('Ошибка загрузки данных!') | ||
|
||
|
||
parsing_product() |
Binary file not shown.
Binary file not shown.