-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Michael Galarnyk
committed
Nov 12, 2023
1 parent
0dcde1b
commit cd916a8
Showing
34 changed files
with
5,352 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
208 changes: 208 additions & 0 deletions
208
Pandas/.ipynb_checkpoints/AggregateFunctions-checkpoint.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Import libraries\n", | ||
"import pandas as pd\n", | ||
"import numpy as np" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Load Excel File\n", | ||
"filename = 'data/car_financing.xlsx'\n", | ||
"df = pd.read_excel(filename)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"## Filtering \n", | ||
"car_filter = df['car_type']=='Toyota Sienna'\n", | ||
"interest_filter = df['interest_rate']==0.0702\n", | ||
"df = df.loc[car_filter & interest_filter, :]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Approach 1 dictionary substitution using rename method\n", | ||
"df = df.rename(columns={'Starting Balance': 'starting_balance',\n", | ||
" 'Interest Paid': 'interest_paid', \n", | ||
" 'Principal Paid': 'principal_paid',\n", | ||
" 'New Balance': 'new_balance'})" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Approach 2 list replacement\n", | ||
"# Only changing Month -> month, but we need to list the rest of the columns\n", | ||
"df.columns = ['month',\n", | ||
" 'starting_balance',\n", | ||
" 'Repayment',\n", | ||
" 'interest_paid',\n", | ||
" 'principal_paid',\n", | ||
" 'new_balance',\n", | ||
" 'term',\n", | ||
" 'interest_rate',\n", | ||
" 'car_type']" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Approach 1\n", | ||
"# This approach allows you to drop multiple columns at a time \n", | ||
"df = df.drop(columns=['term'])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Approach 2 use the del command\n", | ||
"del df['Repayment']" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"df.shape" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Aggregate Methods\n", | ||
"It is often a good idea to compute summary statistics." | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Aggregate Method | Description\n", | ||
"--- | --- \n", | ||
"sum | sum of values\n", | ||
"cumsum | cumulative sum\n", | ||
"mean | mean of values\n", | ||
"median | arithmetic median of values\n", | ||
"min | minimum\n", | ||
"max | maximum\n", | ||
"mode | mode\n", | ||
"std | unbiased standard deviation\n", | ||
"var | unbiased variance\n", | ||
"quantile | compute rank-based statistics of elements" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"df.head()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# sum the values in a column\n", | ||
"# total amount of interest paid over the course of the loan\n", | ||
"df['interest_paid'].sum()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# sum all the values across all columns\n", | ||
"df.sum()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"'Toyota Sienna' + 'Toyota Sienna'" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Notice that by default it seems like the sum function ignores missing values. \n", | ||
"help(df['interest_paid'].sum)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# The info method gives the column datatypes + number of non-null values\n", | ||
"# Notice that we seem to have 60 non-null values for all but the Interest Paid column. \n", | ||
"df.info()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"anaconda-cloud": {}, | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.7" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 1 | ||
} |
125 changes: 125 additions & 0 deletions
125
Pandas/.ipynb_checkpoints/BasicOperations-checkpoint.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Import libraries\n", | ||
"import pandas as pd\n", | ||
"import numpy as np" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"filename = 'data/car_financing.xlsx'\n", | ||
"df = pd.read_excel(filename)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Basic Operations\n", | ||
"\n", | ||
"1. Assure that you have correctly loaded the data. \n", | ||
"2. See what kind of data you have. \n", | ||
"3. Check the validity of your data." | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Viewing the first and last 5 rows" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Select top N number of records (default = 5)\n", | ||
"df.head()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Select bottom N number of records (default = 5)\n", | ||
"df.tail()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Check the column data types" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Check the column data types using the dtypes attribute\n", | ||
"# For example, you can wrongly assume the values in one of your columns is \n", | ||
"# a int64 instead of a string. \n", | ||
"df.dtypes" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Use the shape attribute to get the number of rows and columns in your dataframe\n", | ||
"df.shape" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# The info method gives the column datatypes + number of non-null values\n", | ||
"# Notice that we seem to have 408 non-null values for all but the Interest Paid column. \n", | ||
"df.info()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"anaconda-cloud": {}, | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.7" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 1 | ||
} |
Oops, something went wrong.