Skip to content

Commit cd916a8

Browse files
author
Michael Galarnyk
committed
Added old pandas material to github
1 parent 0dcde1b commit cd916a8

34 files changed

+5352
-0
lines changed

.DS_Store

0 Bytes
Binary file not shown.

Pandas/.DS_Store

10 KB
Binary file not shown.
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"# Import libraries\n",
10+
"import pandas as pd\n",
11+
"import numpy as np"
12+
]
13+
},
14+
{
15+
"cell_type": "code",
16+
"execution_count": null,
17+
"metadata": {},
18+
"outputs": [],
19+
"source": [
20+
"# Load Excel File\n",
21+
"filename = 'data/car_financing.xlsx'\n",
22+
"df = pd.read_excel(filename)"
23+
]
24+
},
25+
{
26+
"cell_type": "code",
27+
"execution_count": null,
28+
"metadata": {},
29+
"outputs": [],
30+
"source": [
31+
"## Filtering \n",
32+
"car_filter = df['car_type']=='Toyota Sienna'\n",
33+
"interest_filter = df['interest_rate']==0.0702\n",
34+
"df = df.loc[car_filter & interest_filter, :]"
35+
]
36+
},
37+
{
38+
"cell_type": "code",
39+
"execution_count": null,
40+
"metadata": {},
41+
"outputs": [],
42+
"source": [
43+
"# Approach 1 dictionary substitution using rename method\n",
44+
"df = df.rename(columns={'Starting Balance': 'starting_balance',\n",
45+
" 'Interest Paid': 'interest_paid', \n",
46+
" 'Principal Paid': 'principal_paid',\n",
47+
" 'New Balance': 'new_balance'})"
48+
]
49+
},
50+
{
51+
"cell_type": "code",
52+
"execution_count": null,
53+
"metadata": {},
54+
"outputs": [],
55+
"source": [
56+
"# Approach 2 list replacement\n",
57+
"# Only changing Month -> month, but we need to list the rest of the columns\n",
58+
"df.columns = ['month',\n",
59+
" 'starting_balance',\n",
60+
" 'Repayment',\n",
61+
" 'interest_paid',\n",
62+
" 'principal_paid',\n",
63+
" 'new_balance',\n",
64+
" 'term',\n",
65+
" 'interest_rate',\n",
66+
" 'car_type']"
67+
]
68+
},
69+
{
70+
"cell_type": "code",
71+
"execution_count": null,
72+
"metadata": {},
73+
"outputs": [],
74+
"source": [
75+
"# Approach 1\n",
76+
"# This approach allows you to drop multiple columns at a time \n",
77+
"df = df.drop(columns=['term'])"
78+
]
79+
},
80+
{
81+
"cell_type": "code",
82+
"execution_count": null,
83+
"metadata": {},
84+
"outputs": [],
85+
"source": [
86+
"# Approach 2 use the del command\n",
87+
"del df['Repayment']"
88+
]
89+
},
90+
{
91+
"cell_type": "code",
92+
"execution_count": null,
93+
"metadata": {},
94+
"outputs": [],
95+
"source": [
96+
"df.shape"
97+
]
98+
},
99+
{
100+
"cell_type": "markdown",
101+
"metadata": {},
102+
"source": [
103+
"## Aggregate Methods\n",
104+
"It is often a good idea to compute summary statistics."
105+
]
106+
},
107+
{
108+
"cell_type": "markdown",
109+
"metadata": {},
110+
"source": [
111+
"Aggregate Method | Description\n",
112+
"--- | --- \n",
113+
"sum | sum of values\n",
114+
"cumsum | cumulative sum\n",
115+
"mean | mean of values\n",
116+
"median | arithmetic median of values\n",
117+
"min | minimum\n",
118+
"max | maximum\n",
119+
"mode | mode\n",
120+
"std | unbiased standard deviation\n",
121+
"var | unbiased variance\n",
122+
"quantile | compute rank-based statistics of elements"
123+
]
124+
},
125+
{
126+
"cell_type": "code",
127+
"execution_count": null,
128+
"metadata": {},
129+
"outputs": [],
130+
"source": [
131+
"df.head()"
132+
]
133+
},
134+
{
135+
"cell_type": "code",
136+
"execution_count": null,
137+
"metadata": {},
138+
"outputs": [],
139+
"source": [
140+
"# sum the values in a column\n",
141+
"# total amount of interest paid over the course of the loan\n",
142+
"df['interest_paid'].sum()"
143+
]
144+
},
145+
{
146+
"cell_type": "code",
147+
"execution_count": null,
148+
"metadata": {},
149+
"outputs": [],
150+
"source": [
151+
"# sum all the values across all columns\n",
152+
"df.sum()"
153+
]
154+
},
155+
{
156+
"cell_type": "code",
157+
"execution_count": null,
158+
"metadata": {},
159+
"outputs": [],
160+
"source": [
161+
"'Toyota Sienna' + 'Toyota Sienna'"
162+
]
163+
},
164+
{
165+
"cell_type": "code",
166+
"execution_count": null,
167+
"metadata": {},
168+
"outputs": [],
169+
"source": [
170+
"# Notice that by default it seems like the sum function ignores missing values. \n",
171+
"help(df['interest_paid'].sum)"
172+
]
173+
},
174+
{
175+
"cell_type": "code",
176+
"execution_count": null,
177+
"metadata": {},
178+
"outputs": [],
179+
"source": [
180+
"# The info method gives the column datatypes + number of non-null values\n",
181+
"# Notice that we seem to have 60 non-null values for all but the Interest Paid column. \n",
182+
"df.info()"
183+
]
184+
}
185+
],
186+
"metadata": {
187+
"anaconda-cloud": {},
188+
"kernelspec": {
189+
"display_name": "Python 3 (ipykernel)",
190+
"language": "python",
191+
"name": "python3"
192+
},
193+
"language_info": {
194+
"codemirror_mode": {
195+
"name": "ipython",
196+
"version": 3
197+
},
198+
"file_extension": ".py",
199+
"mimetype": "text/x-python",
200+
"name": "python",
201+
"nbconvert_exporter": "python",
202+
"pygments_lexer": "ipython3",
203+
"version": "3.9.7"
204+
}
205+
},
206+
"nbformat": 4,
207+
"nbformat_minor": 1
208+
}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"# Import libraries\n",
10+
"import pandas as pd\n",
11+
"import numpy as np"
12+
]
13+
},
14+
{
15+
"cell_type": "code",
16+
"execution_count": null,
17+
"metadata": {},
18+
"outputs": [],
19+
"source": [
20+
"filename = 'data/car_financing.xlsx'\n",
21+
"df = pd.read_excel(filename)"
22+
]
23+
},
24+
{
25+
"cell_type": "markdown",
26+
"metadata": {},
27+
"source": [
28+
"## Basic Operations\n",
29+
"\n",
30+
"1. Assure that you have correctly loaded the data. \n",
31+
"2. See what kind of data you have. \n",
32+
"3. Check the validity of your data."
33+
]
34+
},
35+
{
36+
"cell_type": "markdown",
37+
"metadata": {},
38+
"source": [
39+
"### Viewing the first and last 5 rows"
40+
]
41+
},
42+
{
43+
"cell_type": "code",
44+
"execution_count": null,
45+
"metadata": {},
46+
"outputs": [],
47+
"source": [
48+
"# Select top N number of records (default = 5)\n",
49+
"df.head()"
50+
]
51+
},
52+
{
53+
"cell_type": "code",
54+
"execution_count": null,
55+
"metadata": {},
56+
"outputs": [],
57+
"source": [
58+
"# Select bottom N number of records (default = 5)\n",
59+
"df.tail()"
60+
]
61+
},
62+
{
63+
"cell_type": "markdown",
64+
"metadata": {},
65+
"source": [
66+
"### Check the column data types"
67+
]
68+
},
69+
{
70+
"cell_type": "code",
71+
"execution_count": null,
72+
"metadata": {},
73+
"outputs": [],
74+
"source": [
75+
"# Check the column data types using the dtypes attribute\n",
76+
"# For example, you can wrongly assume the values in one of your columns is \n",
77+
"# a int64 instead of a string. \n",
78+
"df.dtypes"
79+
]
80+
},
81+
{
82+
"cell_type": "code",
83+
"execution_count": null,
84+
"metadata": {},
85+
"outputs": [],
86+
"source": [
87+
"# Use the shape attribute to get the number of rows and columns in your dataframe\n",
88+
"df.shape"
89+
]
90+
},
91+
{
92+
"cell_type": "code",
93+
"execution_count": null,
94+
"metadata": {},
95+
"outputs": [],
96+
"source": [
97+
"# The info method gives the column datatypes + number of non-null values\n",
98+
"# Notice that we seem to have 408 non-null values for all but the Interest Paid column. \n",
99+
"df.info()"
100+
]
101+
}
102+
],
103+
"metadata": {
104+
"anaconda-cloud": {},
105+
"kernelspec": {
106+
"display_name": "Python 3 (ipykernel)",
107+
"language": "python",
108+
"name": "python3"
109+
},
110+
"language_info": {
111+
"codemirror_mode": {
112+
"name": "ipython",
113+
"version": 3
114+
},
115+
"file_extension": ".py",
116+
"mimetype": "text/x-python",
117+
"name": "python",
118+
"nbconvert_exporter": "python",
119+
"pygments_lexer": "ipython3",
120+
"version": "3.9.7"
121+
}
122+
},
123+
"nbformat": 4,
124+
"nbformat_minor": 1
125+
}

0 commit comments

Comments
 (0)