Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
1,140 changes: 1,088 additions & 52 deletions TP1/1 - Practical number 1.ipynb

Large diffs are not rendered by default.

Binary file added TP2 and 3/.DS_Store
Binary file not shown.
7 changes: 7 additions & 0 deletions TP2 and 3/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
venv/
*.pyc
__pycache__/
.env
src/data/
src/models/
credentials.json
Binary file added TP2 and 3/config/.DS_Store
Binary file not shown.
7 changes: 7 additions & 0 deletions TP2 and 3/config/dev/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"ENV_NAME": "development",
"DEBUG": true,
"DATABASE_URI": "sqlite:///dev.db",
"API_URL": "http://localhost:8080"
}

Empty file removed TP2 and 3/config/prd
Empty file.
7 changes: 7 additions & 0 deletions TP2 and 3/config/prd/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"ENV_NAME": "production",
"DEBUG": false,
"DATABASE_URI": "postgresql://user:password@prod-db:5432/app",
"API_URL": "https://api.myapp.com"
}

Binary file added TP2 and 3/services/.DS_Store
Binary file not shown.
19 changes: 17 additions & 2 deletions TP2 and 3/services/epf-flower-data-science/main.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,23 @@
import os
from fastapi import FastAPI
import uvicorn

from fastapi.responses import RedirectResponse
from src.app import get_application
from src.api.routes import data

# Set the path to kaggle.json explicitly
kaggle_json_path = os.path.join(os.path.dirname(__file__), "kaggle.json")

# Set the environment variable for the Kaggle API to the directory of kaggle.json
os.environ['KAGGLE_CONFIG_DIR'] = os.path.dirname(kaggle_json_path)

app = get_application()

@app.get("/", include_in_schema=False)
async def redirect_to_docs():
return RedirectResponse(url="/docs")

app.include_router(data.router, prefix="api/data", tags=["Dataset"])

if __name__ == "__main__":
uvicorn.run("main:app", debug=True, reload=True, port=8080)
uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
2 changes: 2 additions & 0 deletions TP2 and 3/services/epf-flower-data-science/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ fastapi-utils==0.2.1
pydantic==1.10
opendatasets
pytest
firebase-admin

3 changes: 3 additions & 0 deletions TP2 and 3/services/epf-flower-data-science/src/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from src.services.parameters import get_parameters, update_parameters

__all__ = ['get_parameters', 'update_parameters']
File renamed without changes.
12 changes: 10 additions & 2 deletions TP2 and 3/services/epf-flower-data-science/src/api/router.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
"""API Router for Fast API."""
from fastapi import APIRouter

from .routes import parameters
from fastapi.responses import RedirectResponse
from src.api.routes import hello
from src.api.routes import data

router = APIRouter()

@router.get("/", include_in_schema=False)
async def root():
return RedirectResponse(url="/docs")

router.include_router(hello.router, tags=["Hello"])
router.include_router(data.router, prefix="/data", tags=["Dataset"])
router.include_router(parameters.router, tags=["parameters"])

133 changes: 133 additions & 0 deletions TP2 and 3/services/epf-flower-data-science/src/api/routes/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import os
from fastapi import APIRouter
import pandas as pd
from kaggle.api.kaggle_api_extended import KaggleApi
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import joblib
from pydantic import BaseModel
from fastapi import APIRouter, HTTPException
from src.schemas.message import Parameter
from src.services.parameters import get_parameters, update_parameters


# Constants
KAGGLE_CONFIG_DIR = os.path.expanduser("~/.kaggle")
DATASET_PATH = "src/data/Iris.csv"
PROCESSED_DATASET_PATH = "src/data/processed_iris.csv"
MODEL_FILE_PATH = "src/data/random_forest_model.pkl"
ENCODER_FILE_PATH = "src/data/label_encoder.pkl"
MODEL_FILE_PATH = "src/data/random_forest_model.pkl"
ENCODER_FILE_PATH = "src/data/label_encoder.pkl"

class IrisFeatures(BaseModel):
SepalLengthCm: float
SepalWidthCm: float
PetalLengthCm: float
PetalWidthCm: float


router = APIRouter()

@router.get("/download-dataset")
async def download_dataset():
"""Downloads the Iris dataset from Kaggle."""
try:
api = KaggleApi()
api.authenticate()
api.dataset_download_files('uciml/iris', path='src/data/', unzip=True)
return {"message": "Dataset downloaded successfully."}
except Exception as e:
return {"error": str(e)}

@router.get("/load-dataset")
async def load_dataset():
"""Loads the Iris dataset and returns it as JSON."""
try:
df = pd.read_csv(DATASET_PATH)
return {"data": df.to_dict(orient="records")}
except Exception as e:
return {"error": str(e)}

@router.get("/process-dataset")
async def process_dataset():
"""Processes the Iris dataset."""
try:
df = pd.read_csv(DATASET_PATH)
if 'Species' not in df.columns:
return {"error": "La colonne 'Species' n'existe pas dans le dataset."}
if df.isnull().sum().any():
return {"error": "Le dataset contient des valeurs manquantes."}

label_encoder = LabelEncoder()
df['Species'] = label_encoder.fit_transform(df['Species'])

scaler = StandardScaler()
df_features = df.drop(columns=['Id', 'Species'])
X_scaled = scaler.fit_transform(df_features)
processed_df = pd.DataFrame(X_scaled, columns=df_features.columns)

processed_df.to_csv(PROCESSED_DATASET_PATH, index=False)
return {
"message": "Dataset processed successfully.",
"example_data": processed_df.head(5).to_dict(orient="records")
}
except Exception as e:
return {"error": str(e)}

@router.get("/split-dataset")
async def split_dataset():
"""Splits the Iris dataset into training and testing sets."""
try:
df = pd.read_csv(PROCESSED_DATASET_PATH)
train, test = train_test_split(df, test_size=0.2, random_state=42)
return {
"train": train.to_dict(orient="records"),
"test": test.to_dict(orient="records")
}
except Exception as e:
return {"error": str(e)}

@router.post("/train-model")
async def train_model():
"""Trains the classification model using the Iris dataset."""
try:
df = pd.read_csv(DATASET_PATH)
if df.empty:
return {"error": "Le dataset est vide."}

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(df['Species'])

processed_df = pd.read_csv(PROCESSED_DATASET_PATH)
if processed_df.empty:
return {"error": "Le dataset prétraité est vide."}

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(processed_df, y_encoded)

joblib.dump(model, MODEL_FILE_PATH)
joblib.dump(label_encoder, ENCODER_FILE_PATH)

return {"message": "Model trained and saved successfully."}
except Exception as e:
return {"error": str(e)}

@router.post("/predict")
async def predict(features: IrisFeatures):
"""Makes predictions using the trained model."""
try:
if not os.path.exists(MODEL_FILE_PATH) or not os.path.exists(ENCODER_FILE_PATH):
raise HTTPException(status_code=404, detail="Model or encoder file not found. Please train the model first.")

model = joblib.load(MODEL_FILE_PATH)
label_encoder = joblib.load(ENCODER_FILE_PATH)

input_data = pd.DataFrame([features.dict()])
prediction_encoded = model.predict(input_data)
predicted_species = label_encoder.inverse_transform(prediction_encoded)

return {"predicted_species": predicted_species[0]}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Prediction error: {str(e)}")
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

router = APIRouter()


@router.get("/hello/{name}", name="Demo route", response_model=MessageResponse)
def hello(name: str) -> MessageResponse:
return MessageResponse(message=f"Hello {name}, from fastapi test route !")
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from fastapi import APIRouter
from src.schemas.message import Parameter
from src.services.parameters import get_parameters, update_parameters
from fastapi import HTTPException

router = APIRouter()

@router.get("/parameters")
async def read_parameters():
params = get_parameters()
if not params:
raise HTTPException(status_code=404, detail="Parameters not found")
return params

@router.put("/parameters")
async def update_params(params: Parameter):
update_parameters(params.dict())
return {"message": "Parameters updated successfully"}

6 changes: 3 additions & 3 deletions TP2 and 3/services/epf-flower-data-science/src/app.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
from fastapi import FastAPI
from starlette.middleware.cors import CORSMiddleware

from src.api.router import router


def get_application() -> FastAPI:
application = FastAPI(
title="epf-flower-data-science",
description="""Fast API""",
description="Fast API",
version="1.0.0",
redoc_url=None,
)
Expand All @@ -22,3 +20,5 @@ def get_application() -> FastAPI:

application.include_router(router)
return application

app = get_application()
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import os
import json

def load_config(env: str):
config_path = f"./config/{env}/config.json"
with open(config_path, "r") as f:
return json.load(f)

current_env = os.getenv("API", "dev")
config = load_config(current_env)
print(f"Environnement actuel : {config['API']}")

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"model_type": "RandomForestClassifier",
"parameters": {
"n_estimators": 260,
"max_depth": 5,
"random_state": 42
}
}
Loading