diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..763513e --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.ipynb_checkpoints diff --git a/environment.yml b/environment.yml index 158f661..79a5eae 100644 --- a/environment.yml +++ b/environment.yml @@ -1,4 +1,4 @@ -name: coiled +name: coiled-examples channels: - conda-forge - defaults diff --git a/uber-lyft.ipynb b/uber-lyft.ipynb index 0494af9..fef7b0d 100644 --- a/uber-lyft.ipynb +++ b/uber-lyft.ipynb @@ -28,21 +28,37 @@ "source": [ "import coiled\n", "\n", + "name = \"your_name\" #avoid reusing teamate clusters\n", + "\n", "cluster = coiled.Cluster(\n", " n_workers=30,\n", " account=\"events\",\n", - " name=\"uber-lyft\",\n", + " name=f\"uber-lyft_pydata-seattle_{name}\",\n", " shutdown_on_close=False,\n", ")\n", "\n", "client = cluster.get_client()" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "30f1ed22-0346-462e-b6a9-3e3a385702ae", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "client" + ] + }, { "cell_type": "code", "execution_count": null, "id": "33b598a4-fe0a-43c5-8007-0e955ac193f9", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import dask\n", @@ -76,7 +92,9 @@ "cell_type": "code", "execution_count": null, "id": "95a96932-2109-447c-9eb3-0d235de5e973", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "df = df.persist()\n", @@ -88,7 +106,9 @@ "cell_type": "code", "execution_count": null, "id": "0d1b4b93-3c0d-430b-9d3a-4573d52d991d", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "%time df.base_passenger_fare.sum().compute()" @@ -110,7 +130,6 @@ "cell_type": "markdown", "id": "48f4dbf8-86c6-4cfc-8125-a67ea2ddfbed", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -121,7 +140,9 @@ "cell_type": "code", "execution_count": null, "id": "f0068858-cce5-4939-85df-7251bd0923c2", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "total = df[[\"base_passenger_fare\", \"driver_pay\", \"tips\", \"trip_miles\"]].sum()\n", @@ -133,7 +154,9 @@ "cell_type": "code", "execution_count": null, "id": "e3abf9cf-303d-4b79-a541-c337a8c55f35", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "total" @@ -143,7 +166,9 @@ "cell_type": "code", "execution_count": null, "id": "327134c9-409c-4979-acb7-e987fe86d7aa", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "average" @@ -153,7 +178,9 @@ "cell_type": "code", "execution_count": null, "id": "ae57ee02-b8c3-4d9b-a053-2d866bb6ab14", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "total, average = dask.compute(total, average)" @@ -171,7 +198,9 @@ "cell_type": "code", "execution_count": null, "id": "0779b461-afe8-4f1d-8648-8b92c4f63220", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "df[df.tips != 0].tips.mean().compute()" @@ -237,7 +266,9 @@ "cell_type": "code", "execution_count": null, "id": "b71729eb-f841-433a-b020-0f2b1c425355", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "df.hvfhs_license_num.value_counts().compute()" @@ -254,7 +285,35 @@ "source": [ "df[\"tipped\"] = df.tips != 0\n", "\n", - "df.groupby(\"hvfhs_license_num\").tipped.mean().compute()" + "tip_by_provider = df.groupby(\"hvfhs_license_num\").tipped.mean().compute()\n", + "tip_by_provider" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07cafdb6-dea7-4748-b1cd-0072bb306bbf", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "tip_by_provider" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3877c42-17db-44ed-a674-0699ccecf886", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "provider = {\"HV0002\": \"Juno\", \"HV0005\": \"Lyft\", \"HV0003\": \"Uber\", \"HV0004\": \"Via\"}\n", + "tip_by_provider = tip_by_provider.to_frame().set_index(tip_by_provider.index.map(provider))\n", + "\n", + "tip_by_provider" ] }, {