mGalarnyk
diff --git a/‎.DS_Store
0 Bytes b/‎.DS_Store
0 Bytes
diff --git a/‎Pandas/.DS_Store
10 KB b/‎Pandas/.DS_Store
10 KB
diff --git a/‎Pandas/.ipynb_checkpoints/AggregateFunctions-checkpoint.ipynb
Lines changed: 208 additions & 0 deletions b/‎Pandas/.ipynb_checkpoints/AggregateFunctions-checkpoint.ipynb
Lines changed: 208 additions & 0 deletions
diff --git a/‎Pandas/.ipynb_checkpoints/BasicOperations-checkpoint.ipynb
Lines changed: 125 additions & 0 deletions b/‎Pandas/.ipynb_checkpoints/BasicOperations-checkpoint.ipynb
Lines changed: 125 additions & 0 deletions
@@ -0,0 +1,208 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import libraries\n",
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load Excel File\n",
+    "filename = 'data/car_financing.xlsx'\n",
+    "df = pd.read_excel(filename)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Filtering \n",
+    "car_filter = df['car_type']=='Toyota Sienna'\n",
+    "interest_filter = df['interest_rate']==0.0702\n",
+    "df = df.loc[car_filter & interest_filter, :]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Approach 1 dictionary substitution using rename method\n",
+    "df = df.rename(columns={'Starting Balance': 'starting_balance',\n",
+    "                        'Interest Paid': 'interest_paid', \n",
+    "                        'Principal Paid': 'principal_paid',\n",
+    "                        'New Balance': 'new_balance'})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Approach 2 list replacement\n",
+    "# Only changing Month -> month, but we need to list the rest of the columns\n",
+    "df.columns = ['month',\n",
+    "              'starting_balance',\n",
+    "              'Repayment',\n",
+    "              'interest_paid',\n",
+    "              'principal_paid',\n",
+    "              'new_balance',\n",
+    "              'term',\n",
+    "              'interest_rate',\n",
+    "              'car_type']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Approach 1\n",
+    "# This approach allows you to drop multiple columns at a time \n",
+    "df = df.drop(columns=['term'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Approach 2 use the del command\n",
+    "del df['Repayment']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Aggregate Methods\n",
+    "It is often a good idea to compute summary statistics."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Aggregate Method | Description\n",
+    "--- | --- \n",
+    "sum | sum of values\n",
+    "cumsum | cumulative sum\n",
+    "mean | mean of values\n",
+    "median | arithmetic median of values\n",
+    "min | minimum\n",
+    "max | maximum\n",
+    "mode | mode\n",
+    "std | unbiased standard deviation\n",
+    "var | unbiased variance\n",
+    "quantile | compute rank-based statistics of elements"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# sum the values in a column\n",
+    "# total amount of interest paid over the course of the loan\n",
+    "df['interest_paid'].sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# sum all the values across all columns\n",
+    "df.sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "'Toyota Sienna' + 'Toyota Sienna'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Notice that by default it seems like the sum function ignores missing values. \n",
+    "help(df['interest_paid'].sum)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The info method gives the column datatypes + number of non-null values\n",
+    "# Notice that we seem to have 60 non-null values for all but the Interest Paid column. \n",
+    "df.info()"
+   ]
+  }
+ ],
+ "metadata": {
+  "anaconda-cloud": {},
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
@@ -0,0 +1,125 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import libraries\n",
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filename = 'data/car_financing.xlsx'\n",
+    "df = pd.read_excel(filename)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Basic Operations\n",
+    "\n",
+    "1. Assure that you have correctly loaded the data. \n",
+    "2. See what kind of data you have. \n",
+    "3. Check the validity of your data."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Viewing the first and last 5 rows"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Select top N number of records (default = 5)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Select bottom N number of records (default = 5)\n",
+    "df.tail()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Check the column data types"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Check the column data types using the dtypes attribute\n",
+    "# For example, you can wrongly assume the values in one of your columns is \n",
+    "# a int64 instead of a string. \n",
+    "df.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Use the shape attribute to get the number of rows and columns in your dataframe\n",
+    "df.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The info method gives the column datatypes + number of non-null values\n",
+    "# Notice that we seem to have 408 non-null values for all but the Interest Paid column. \n",
+    "df.info()"
+   ]
+  }
+ ],
+ "metadata": {
+  "anaconda-cloud": {},
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}