From e637348f05049a6f6275974e1ecfbc3f71d3bc88 Mon Sep 17 00:00:00 2001 From: Kumar Saurabh Date: Mon, 18 Mar 2019 23:24:15 +0530 Subject: [PATCH] Pulling JSON from url Following things are implemented- 1. Pulled JSON from url : https://envirocar.org/api/stable/tracks/52714d4ee4b000fe0582a292/measurements 2. Discarded unnecessary data 3. Adding custom Headers and created dataframe. 4. Done Data description Now it is ready for further statistical work. --- notebooks/enviroCar_python.ipynb | 197 +++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 notebooks/enviroCar_python.ipynb diff --git a/notebooks/enviroCar_python.ipynb b/notebooks/enviroCar_python.ipynb new file mode 100644 index 0000000..4076454 --- /dev/null +++ b/notebooks/enviroCar_python.ipynb @@ -0,0 +1,197 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from pandas.io.json import json_normalize\n", + "import json\n", + "import urllib.request, json " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with urllib.request.urlopen(\"https://envirocar.org/api/stable/tracks/52714d4ee4b000fe0582a292/measurements\") as url:\n", + " data_json = json.loads(url.read().decode())\n", + " #print(data)\n", + "data=data_json['features']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Preparing column list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "good_columns = [\n", + " \"Coordinates\",\n", + " \"Geometry Type\",\n", + " \"ID\",\n", + " \"CO2(kg/h)\",\n", + " \"Calculated MAF\",\n", + " \"Consumption\",\n", + " \"GPS Accuracy\",\n", + " \"GPS Altitude\",\n", + " \"GPS Bearing\",\n", + " \"GPS HDOP\",\n", + " \"GPS PDOP\",\n", + " \"GPS Speed\",\n", + " \"GPS VDOP\",\n", + " \"Intake Pressure\",\n", + " \"Intake Temperature\",\n", + " \"Rpm\",\n", + " \"Speed\",\n", + " \"Construction_Year\",\n", + " \"EngineDisplacement\",\n", + " \"FuelType\",\n", + " \"Sensor ID\",\n", + " \"Manufacturer\",\n", + " \"Model\",\n", + " \"Sensor type'\",\n", + " \"Time\",\n", + " \"Track\",\n", + " \"Type\"\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Preparing Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_1 = pd.DataFrame.from_dict(json_normalize(data), orient='columns')\n", + "\n", + "#Filtering columns\n", + "df=df_1.filter(regex='^(?!.*unit).*$', axis=1)\n", + "\n", + "#Attaching custom columns\n", + "df.columns = good_columns\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Convert dates to datetime\n", + "df['Time'] = pd.to_datetime(df.Time,format = '%Y-%m-%dT%H:%M:%SZ')\n", + "\n", + "#Setting Time column as Index\n", + "df.set_index('Time', drop=True)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Extracting Longitude and Latitude from Coordinates\n", + "a['Coordinates'].tolist()\n", + "length=len(a['Coordinates'])\n", + "longitude = list()\n", + "latitude= list()\n", + "for ele in range(10):\n", + " #parts=ele.split(\",\")\n", + " #longitude=parts[1]\n", + " #latitude =parts[2] \n", + " #print(parts[0])\n", + " print(['Coordinates'][ele][0])\n", + " #latitude= (a['Coordinates'][i][1])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Show variable types\n", + "df.dtypes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Descriptive statistics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}