diff --git a/.DS_Store b/.DS_Store index e46b719..bc8df8f 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/.env b/.env deleted file mode 100644 index e69de29..0000000 diff --git a/.gitignore b/.gitignore index e69de29..6bdf68c 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1,55 @@ +# === Python build artifacts === +*.pyc +*.pyo +*.pyd +__pycache__/ +*.log + +# === SQLite & output files === +*.sqlite3 +*.db +output.json + +# === Environment variables === +.env +.env.* +*.env + +# === Virtual environments === +venv/ +.venv/ +.env/ + +# === VSCode project settings === +.vscode/ + +# === macOS system files === +*.DS_Store +*.egg-info/ + +# === Pytest and test cache === +htmlcov/ +.coverage +.cache/ +pytest_cache/ +.tox/ + +# === Jupyter Notebook === +.ipynb_checkpoints/ + +# === Django migration artifacts (optional to ignore) === +# Uncomment the lines below if you want to regenerate migrations often +# **/migrations/*.py +# **/migrations/*.pyc +# !**/migrations/__init__.py + +# === FastAPI-specific artifacts === +fastapi_email/email_db.sqlite3 + +# === IDE-specific === +.idea/ +*.sublime-project +*.sublime-workspace + +# === GitHub Codespaces or devcontainers === +.devcontainer/ diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..b2d293c --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,14 @@ +{ + "python.analysis.extraPaths": [ + "./webscraper/ABC" + ], + "python.testing.unittestArgs": [ + "-v", + "-s", + "./webscraper", + "-p", + "*test*.py" + ], + "python.testing.pytestEnabled": false, + "python.testing.unittestEnabled": true +} \ No newline at end of file diff --git a/README.md b/README.md index e4a5606..d70d692 100644 --- a/README.md +++ b/README.md @@ -25,3 +25,15 @@ if __name__ == "__main__": ##what file needs to be located and what variables would need to be changed if you wanted to scrape another website? -If you wanted to scrape another website, you need to locate the file main.py and change the variables “scraper” and “pages” to whatever website you wanted and the new URl paths. As well ensure the website allows scraping. + + + +Documentation on connecting the database to vscode with the postgres extension + +1. Install the PostgreSQL Extension in VSCode +2. Make sure PostgreSQL is Running Locally +3. click the extension on the left sidebar +4. click the plus button and create a new connection +5. fill in the needed information, server = localhost, database = cheaper_local, User = postgres, port = 5432 (default), password = the password you made when installing PostgreSQL +7. You should be connected now and see a message and see the conencted database in the extension now. + \ No newline at end of file diff --git a/accounts/__pycache__/__init__.cpython-312.pyc b/accounts/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 228acde..0000000 Binary files a/accounts/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/accounts/__pycache__/admin.cpython-312.pyc b/accounts/__pycache__/admin.cpython-312.pyc deleted file mode 100644 index 5a69a0d..0000000 Binary files a/accounts/__pycache__/admin.cpython-312.pyc and /dev/null differ diff --git a/accounts/__pycache__/apps.cpython-312.pyc b/accounts/__pycache__/apps.cpython-312.pyc deleted file mode 100644 index 33636a4..0000000 Binary files a/accounts/__pycache__/apps.cpython-312.pyc and /dev/null differ diff --git a/accounts/__pycache__/models.cpython-312.pyc b/accounts/__pycache__/models.cpython-312.pyc deleted file mode 100644 index 8e8be97..0000000 Binary files a/accounts/__pycache__/models.cpython-312.pyc and /dev/null differ diff --git a/accounts/migrations/0002_remove_product_name_remove_product_source_url_and_more.py b/accounts/migrations/0002_remove_product_name_remove_product_source_url_and_more.py new file mode 100644 index 0000000..e478a27 --- /dev/null +++ b/accounts/migrations/0002_remove_product_name_remove_product_source_url_and_more.py @@ -0,0 +1,55 @@ +# Generated by Django 5.2 on 2025-05-05 19:14 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("accounts", "0001_initial"), + ] + + operations = [ + migrations.RemoveField( + model_name="product", + name="name", + ), + migrations.RemoveField( + model_name="product", + name="source_url", + ), + migrations.RemoveField( + model_name="useraccount", + name="password", + ), + migrations.AddField( + model_name="product", + name="product_name", + field=models.CharField(default="Unnamed Product", max_length=255), + ), + migrations.AddField( + model_name="product", + name="url", + field=models.TextField(default="https://example.com"), + ), + migrations.AddField( + model_name="product", + name="user", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + to="accounts.useraccount", + ), + ), + migrations.AddField( + model_name="useraccount", + name="password_hash", + field=models.CharField(default="defaultpass123", max_length=100), + ), + migrations.AlterField( + model_name="product", + name="price", + field=models.DecimalField(decimal_places=2, default=0.0, max_digits=10), + ), + ] diff --git a/accounts/migrations/__pycache__/0001_initial.cpython-312.pyc b/accounts/migrations/__pycache__/0001_initial.cpython-312.pyc deleted file mode 100644 index 2a79d5d..0000000 Binary files a/accounts/migrations/__pycache__/0001_initial.cpython-312.pyc and /dev/null differ diff --git a/accounts/migrations/__pycache__/__init__.cpython-312.pyc b/accounts/migrations/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index f9a196d..0000000 Binary files a/accounts/migrations/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/accounts/models.py b/accounts/models.py index 3ff6537..28e8f2b 100644 --- a/accounts/models.py +++ b/accounts/models.py @@ -14,7 +14,8 @@ def validate_email(value): class UserAccount(models.Model): email = models.EmailField(max_length=50, unique=True) - password = models.CharField(max_length=100) + password_hash = models.CharField(max_length=100) + # password_hash = models.CharField(max_length=100, default='defaultpass123') # added default def clean(self): validate_email(self.email) @@ -22,10 +23,12 @@ def clean(self): def __str__(self): return self.email + class Product(models.Model): - name = models.CharField(max_length=200) - price = models.CharField(max_length=10) - source_url = models.URLField(max_length=150) + product_name = models.CharField(max_length=255, default='Unnamed Product') + price = models.DecimalField(max_digits=10, decimal_places=2, default=0.00) + url = models.TextField(default='https://example.com') + user = models.ForeignKey(UserAccount, on_delete=models.CASCADE, null=True) def __str__(self): - return self.name \ No newline at end of file + return self.product_name diff --git a/accounts/views.py b/accounts/views.py index 91ea44a..2ebc3d3 100644 --- a/accounts/views.py +++ b/accounts/views.py @@ -1,3 +1,10 @@ from django.shortcuts import render +from django.http import JsonResponse +from .models import Product + +def product_list(request): + products = Product.objects.all() + data = [{"name": p.product_name, "price": float(p.price), "url": p.url} for p in products] + return JsonResponse(data, safe=False) # Create your views here. diff --git a/cheaper.egg-info/PKG-INFO b/cheaper.egg-info/PKG-INFO new file mode 100644 index 0000000..95cccb0 --- /dev/null +++ b/cheaper.egg-info/PKG-INFO @@ -0,0 +1,13 @@ +Metadata-Version: 2.1 +Name: cheaper +Version: 0.1 +Summary: cheaper for now +Classifier: Programming Language :: Python :: 3 +Classifier: Operating System :: OS Independent +Requires-Python: >=3.10 +Requires-Dist: beautifulsoup4 +Requires-Dist: lxml +Requires-Dist: flask +Requires-Dist: pandas +Requires-Dist: numpy +Requires-Dist: requests diff --git a/cheaper.egg-info/SOURCES.txt b/cheaper.egg-info/SOURCES.txt new file mode 100644 index 0000000..066dd89 --- /dev/null +++ b/cheaper.egg-info/SOURCES.txt @@ -0,0 +1,22 @@ +README.md +setup.py +accounts/__init__.py +accounts/admin.py +accounts/apps.py +accounts/models.py +accounts/tests.py +accounts/views.py +accounts/migrations/0001_initial.py +accounts/migrations/0002_remove_product_name_remove_product_source_url_and_more.py +accounts/migrations/__init__.py +cheaper/__init__.py +cheaper/asgi.py +cheaper/settings.py +cheaper/urls.py +cheaper/wsgi.py +cheaper.egg-info/PKG-INFO +cheaper.egg-info/SOURCES.txt +cheaper.egg-info/dependency_links.txt +cheaper.egg-info/entry_points.txt +cheaper.egg-info/requires.txt +cheaper.egg-info/top_level.txt \ No newline at end of file diff --git a/cheaper.egg-info/dependency_links.txt b/cheaper.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/cheaper.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/cheaper.egg-info/entry_points.txt b/cheaper.egg-info/entry_points.txt new file mode 100644 index 0000000..8808079 --- /dev/null +++ b/cheaper.egg-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +cheaper = webscraper.main:main diff --git a/cheaper.egg-info/requires.txt b/cheaper.egg-info/requires.txt new file mode 100644 index 0000000..bf6938f --- /dev/null +++ b/cheaper.egg-info/requires.txt @@ -0,0 +1,6 @@ +beautifulsoup4 +lxml +flask +pandas +numpy +requests diff --git a/cheaper.egg-info/top_level.txt b/cheaper.egg-info/top_level.txt new file mode 100644 index 0000000..719490c --- /dev/null +++ b/cheaper.egg-info/top_level.txt @@ -0,0 +1,2 @@ +accounts +cheaper diff --git a/cheaper/__pycache__/__init__.cpython-312.pyc b/cheaper/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 65ab433..0000000 Binary files a/cheaper/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/cheaper/__pycache__/settings.cpython-312.pyc b/cheaper/__pycache__/settings.cpython-312.pyc deleted file mode 100644 index 048f033..0000000 Binary files a/cheaper/__pycache__/settings.cpython-312.pyc and /dev/null differ diff --git a/cheaper/__pycache__/urls.cpython-312.pyc b/cheaper/__pycache__/urls.cpython-312.pyc deleted file mode 100644 index 8e83205..0000000 Binary files a/cheaper/__pycache__/urls.cpython-312.pyc and /dev/null differ diff --git a/cheaper/urls.py b/cheaper/urls.py index 49e4fb1..3a5aea7 100644 --- a/cheaper/urls.py +++ b/cheaper/urls.py @@ -16,7 +16,9 @@ """ from django.contrib import admin from django.urls import path +from accounts.views import product_list urlpatterns = [ path('admin/', admin.site.urls), + path('', product_list, name='product_list'), # This sets the homepage ] diff --git a/db.sqlite3 b/db.sqlite3 index 9a49d22..fc5a943 100644 Binary files a/db.sqlite3 and b/db.sqlite3 differ diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..c26de2c --- /dev/null +++ b/setup.py @@ -0,0 +1,28 @@ +from setuptools import setup, find_packages + +setup( + name='cheaper', + version='0.1', + packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]), + include_package_data=True, + install_requires=[ + "beautifulsoup4", + "lxml", + "flask", + "pandas", + "numpy", + "requests", + ], + entry_points={ + 'console_scripts': [ + 'cheaper=webscraper.main:main', + ], + }, + + description='cheaper for now', + classifiers=[ + 'Programming Language :: Python :: 3', + 'Operating System :: OS Independent', + ], + python_requires='>=3.10', +) diff --git a/webscraper/.DS_Store b/webscraper/.DS_Store index 33e2dd0..48d5023 100644 Binary files a/webscraper/.DS_Store and b/webscraper/.DS_Store differ diff --git a/webscraper/ABC/Ebay_API.py b/webscraper/ABC/Ebay_API.py new file mode 100644 index 0000000..2be5a07 --- /dev/null +++ b/webscraper/ABC/Ebay_API.py @@ -0,0 +1,15 @@ +from abc import ABC,abstractmethod + +class EbayApi(ABC): + + @abstractmethod + def retrieve_access_token() -> str: + """ retrieves the user access token for sandbox environment it's a long line + of text, numbers, symbols + """ + pass + + @abstractmethod + def retrieve_ebay_response(httprequest:str,query:str) -> dict: + """ retrieves a json of large data with category ids, names, parentcategorynodes """ + pass \ No newline at end of file diff --git a/webscraper/ABC/__pycache__/base_scraper.cpython-311.pyc b/webscraper/ABC/__pycache__/base_scraper.cpython-311.pyc deleted file mode 100644 index 2dc46e3..0000000 Binary files a/webscraper/ABC/__pycache__/base_scraper.cpython-311.pyc and /dev/null differ diff --git a/webscraper/api/EbayAPI.py b/webscraper/api/EbayAPI.py new file mode 100644 index 0000000..15f78e3 --- /dev/null +++ b/webscraper/api/EbayAPI.py @@ -0,0 +1,55 @@ +import requests +from requests.auth import HTTPBasicAuth +from dotenv import load_dotenv +import os + + +load_dotenv() #initialize + +class EbayAPI: + + client_secret_key = os.getenv("clientsecret") + client_id_key = os.getenv("clientid") + + get_user_key = HTTPBasicAuth(client_id_key, client_secret_key) + + + def retrieve_access_token(): + try: + response = requests.post("https://api.sandbox.ebay.com/identity/v1/oauth2/token", + headers = {"Content-Type":"application/x-www-form-urlencoded"}, + data = { + "grant_type": "client_credentials", + "scope": "https://api.ebay.com/oauth/api_scope" + }, + auth=EbayAPI.get_user_key + ) + access_token = response.json().get("access_token") + status_code = response.status_code + if(status_code == 404): + raise Exception("404 error here") + return access_token + except Exception as e: + raise e + + def retrieve_ebay_response(httprequest:str,query:str): + auth = EbayAPI.retrieve_access_token() + try: + response = requests.get(httprequest, + headers={ + "Authorization": f"Bearer {auth}", + "Content-Type": "application/json" + }, + params= { + "q": query, + "category_tree_id": 0 + } + ) + status_code = response.status_code + if(status_code == 404): + raise Exception("not found 404 error") + + return response.json() + except Exception as e: + raise e + diff --git a/webscraper/api/__pycache__/interface.cpython-311.pyc b/webscraper/api/__pycache__/interface.cpython-311.pyc deleted file mode 100644 index 1b9240d..0000000 Binary files a/webscraper/api/__pycache__/interface.cpython-311.pyc and /dev/null differ diff --git a/webscraper/api/__pycache__/routes.cpython-311.pyc b/webscraper/api/__pycache__/routes.cpython-311.pyc deleted file mode 100644 index 5e18603..0000000 Binary files a/webscraper/api/__pycache__/routes.cpython-311.pyc and /dev/null differ diff --git a/webscraper/api/tests/__pycache__/test_routes.cpython-311.pyc b/webscraper/api/tests/__pycache__/test_routes.cpython-311.pyc deleted file mode 100644 index 1ce37af..0000000 Binary files a/webscraper/api/tests/__pycache__/test_routes.cpython-311.pyc and /dev/null differ diff --git a/webscraper/api/tests/test_ebay_api.py b/webscraper/api/tests/test_ebay_api.py new file mode 100644 index 0000000..7651c13 --- /dev/null +++ b/webscraper/api/tests/test_ebay_api.py @@ -0,0 +1,35 @@ +import unittest +from unittest.mock import patch,Mock +import requests +from webscraper.api.EbayAPI import EbayAPI + +class EbayTestApi(unittest.TestCase): + + def setUp(self): + self.EbayAPI = EbayAPI + + + def test_retrieve_access_token(self): + self.EbayAPI.retrieve_access_token() + self.assertEqual(type(self.EbayAPI.retrieve_access_token()),str) + + @patch("webscraper.api.EbayAPI.requests.post") + def test_retrieve_access_token_invalid(self,mock_post): + mock_response = Mock() + mock_response.status_code = 404 + mock_response.json.return_value ={"error": "not found"} + mock_post.return_value = mock_response + + with self.assertRaises(Exception): + self.EbayAPI.retrieve_access_token() + + + + @patch("webscraper.api.EbayAPI.requests.get") + def test_retrieve_ebay_response_invalid(self,mock_get): + self.EbayAPI.retrieve_ebay_response("https://test","item") + self.assertRaises(Exception) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/webscraper/src/main.py b/webscraper/main.py similarity index 84% rename from webscraper/src/main.py rename to webscraper/main.py index 4a27839..258536c 100644 --- a/webscraper/src/main.py +++ b/webscraper/main.py @@ -1,19 +1,23 @@ - import json #import time // for testing # i added htese imports below becasue when i ran it it wasnt finding the folders import sys import os sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) -from src.Cheaper_Scraper import CheaperScraper +from webscraper.src.Cheaper_Scraper import CheaperScraper + def main(): + + + + # Set up the scraper for a simple legal-to-scrape website scraper = CheaperScraper("https://books.toscrape.com", - user_agent="CheaperBot/0.1", - delay=2.0) - + user_agent="CheaperBot/0.1", + delay=2.0) + # Define which pages you want to scrape (you can use "/" for homepage) pages = ["/"] diff --git a/webscraper/src/__pycache__/CheaperScraper.cpython-39.pyc b/webscraper/src/__pycache__/CheaperScraper.cpython-39.pyc deleted file mode 100644 index f60b091..0000000 Binary files a/webscraper/src/__pycache__/CheaperScraper.cpython-39.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/Cheaper_Scraper.cpython-311.pyc b/webscraper/src/__pycache__/Cheaper_Scraper.cpython-311.pyc deleted file mode 100644 index cc3aa5e..0000000 Binary files a/webscraper/src/__pycache__/Cheaper_Scraper.cpython-311.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/Cheaper_Scraper.cpython-39.pyc b/webscraper/src/__pycache__/Cheaper_Scraper.cpython-39.pyc deleted file mode 100644 index 036324a..0000000 Binary files a/webscraper/src/__pycache__/Cheaper_Scraper.cpython-39.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/__init__.cpython-311.pyc b/webscraper/src/__pycache__/__init__.cpython-311.pyc deleted file mode 100644 index c420941..0000000 Binary files a/webscraper/src/__pycache__/__init__.cpython-311.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/fetch_utils.cpython-311.pyc b/webscraper/src/__pycache__/fetch_utils.cpython-311.pyc index 4a1d1ac..843bd32 100644 Binary files a/webscraper/src/__pycache__/fetch_utils.cpython-311.pyc and b/webscraper/src/__pycache__/fetch_utils.cpython-311.pyc differ diff --git a/webscraper/src/__pycache__/robot_check.cpython-311.pyc b/webscraper/src/__pycache__/robot_check.cpython-311.pyc deleted file mode 100644 index e7f181b..0000000 Binary files a/webscraper/src/__pycache__/robot_check.cpython-311.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/robot_check.cpython-39.pyc b/webscraper/src/__pycache__/robot_check.cpython-39.pyc deleted file mode 100644 index d1557b9..0000000 Binary files a/webscraper/src/__pycache__/robot_check.cpython-39.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/test_cheaper_scraper.cpython-311.pyc b/webscraper/src/__pycache__/test_cheaper_scraper.cpython-311.pyc deleted file mode 100644 index ef900b2..0000000 Binary files a/webscraper/src/__pycache__/test_cheaper_scraper.cpython-311.pyc and /dev/null differ