-
-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #45 from sayantikabanik/ai_models
Generating analytics using `gpt-4o-mini` Model
- Loading branch information
Showing
4 changed files
with
153 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import os | ||
import pandas as pd | ||
from openai import OpenAI | ||
import intake | ||
from analytics_framework import INTAKE_LOC | ||
from pathlib import Path | ||
|
||
# Data read via intake catalog | ||
CATALOG_LOC = Path.joinpath(INTAKE_LOC, "catalog_entry.yml") | ||
catalog = intake.open_catalog(CATALOG_LOC) | ||
|
||
# Load the token and endpoint from environment variables | ||
token = os.environ["GITHUB_TOKEN"] | ||
endpoint = "https://models.inference.ai.azure.com" | ||
model_name = "gpt-4o-mini" | ||
|
||
# Initialize OpenAI client | ||
client = OpenAI( | ||
base_url=endpoint, | ||
api_key=token, | ||
) | ||
|
||
|
||
def analyze_data(intake_catalog_entry): | ||
# Load the data via intake | ||
try: | ||
df_input = catalog[intake_catalog_entry].read() | ||
print(f"Data loaded successfully {df_input.head()}") | ||
except Exception as e: | ||
print(f"Error loading data: {e}") | ||
return | ||
|
||
# Prepare the data for analysis (simple description of the dataset) | ||
summary = df_input.describe().to_string() | ||
|
||
# Create the system and user messages for the model | ||
messages = [ | ||
{ | ||
"role": "system", | ||
"content": "You are a helpful assistant skilled in analyzing data.", | ||
}, | ||
{ | ||
"role": "user", | ||
"content": f"Here is a summary of my data:\n{summary}\nProvide an analysis of this dataset, " | ||
f"display in html format along with the dataset provided.", | ||
} | ||
] | ||
|
||
# Generate a response from the GPT-4 model | ||
try: | ||
response = client.chat.completions.create( | ||
messages=messages, | ||
model=model_name, | ||
temperature=1.0, | ||
max_tokens=1000, | ||
top_p=1.0 | ||
) | ||
|
||
# Output the analysis from the model | ||
print(response.choices[0].message.content) | ||
except Exception as e: | ||
print(f"Error generating response: {e}") | ||
|
||
|
||
# Example usage | ||
intake_catalog_entry = "address_sample" | ||
analyze_data(intake_catalog_entry) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
<head> | ||
<meta charset="UTF-8"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||
<title>Data Analysis Summary</title> | ||
<style> | ||
body { | ||
font-family: Arial, sans-serif; | ||
margin: 20px; | ||
} | ||
h1 { | ||
color: #4A90E2; | ||
} | ||
table { | ||
width: 50%; | ||
border-collapse: collapse; | ||
margin: 10px 0; | ||
} | ||
th, td { | ||
border: 1px solid #dddddd; | ||
text-align: left; | ||
padding: 8px; | ||
} | ||
th { | ||
background-color: #f2f2f2; | ||
} | ||
</style> | ||
</head> | ||
<body> | ||
|
||
<h1>Data Analysis Summary</h1> | ||
|
||
<table> | ||
<tr> | ||
<th>Statistic</th> | ||
<th>Value</th> | ||
</tr> | ||
<tr> | ||
<td>Count</td> | ||
<td>5</td> | ||
</tr> | ||
<tr> | ||
<td>Mean</td> | ||
<td>21769.80</td> | ||
</tr> | ||
<tr> | ||
<td>Standard Deviation</td> | ||
<td>39059.21</td> | ||
</tr> | ||
<tr> | ||
<td>Minimum</td> | ||
<td>123</td> | ||
</tr> | ||
<tr> | ||
<td>25th Percentile</td> | ||
<td>298</td> | ||
</tr> | ||
<tr> | ||
<td>Median (50th Percentile)</td> | ||
<td>8075</td> | ||
</tr> | ||
<tr> | ||
<td>75th Percentile</td> | ||
<td>9119</td> | ||
</tr> | ||
<tr> | ||
<td>Maximum</td> | ||
<td>91234</td> | ||
</tr> | ||
</table> | ||
|
||
<h2>Analysis</h2> | ||
<p>The dataset consists of 5 observations. The mean value is significantly skewed by a few extreme values, particularly the maximum value of 91234, which is substantially higher than the other values. The standard deviation (39059.21) indicates high variability in the data.</p> | ||
<p>Looking at the spread of the data:</p> | ||
<ul> | ||
<li>The minimum value is 123, while the maximum value is 91234, showing that there is a wide range of values.</li> | ||
<li>The 25th percentile (298), median (8075), and 75th percentile (9119) suggest a skewed distribution, as most of the data points are towards the lower end of the scale.</li> | ||
</ul> | ||
|
||
<p>This indicates that while there are some higher values, they are outliers compared to the rest of the data. Such outliers can affect overall analysis and should be treated accordingly depending on the context of the study.</p> | ||
|
||
</body> | ||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,3 +31,4 @@ dependencies: | |
- pip: | ||
- mitoinstaller | ||
- quarto | ||
- openai |