diff --git a/component_charts.ipynb b/component_charts.ipynb index 1d21c0e..41e16bf 100644 --- a/component_charts.ipynb +++ b/component_charts.ipynb @@ -50,11 +50,10 @@ "source": [ "import altair as alt\n", "\n", + "\n", "def plot_release_count(release_counts):\n", " # check if input contains the right columns\n", - " if not set(\n", - " [\"collection_id\", \"release_type\", \"release_count\", \"ocid_count\"]\n", - " ).issubset(release_counts.columns):\n", + " if not set([\"collection_id\", \"release_type\", \"release_count\", \"ocid_count\"]).issubset(release_counts.columns):\n", " raise ValueError(\n", " \"Input data must contain the following columns: collection_id, release_type, release_count, ocid_count\"\n", " )\n", @@ -103,66 +102,63 @@ "\n", " return chart\n", "\n", + "\n", "def plot_objects_per_stage(objects_per_stage):\n", - " # check if input contains the right columns\n", - " if not set([\"stage\", \"object_count\"]).issubset(objects_per_stage.columns):\n", - " raise ValueError(\"Data must contain columns 'stage' and 'object_count'\")\n", - " # draw chart\n", - " chart = (\n", - " alt.Chart(objects_per_stage)\n", - " .mark_bar(fill=\"#d6e100\")\n", - " .encode(\n", - " x=alt.X(\n", - " \"stage\",\n", - " type=\"ordinal\",\n", - " scale=alt.Scale(\n", - " domain=[\n", - " \"planning\",\n", - " \"tender\",\n", - " \"awards\",\n", - " \"contracts\",\n", - " \"implementation\",\n", - " ]\n", - " ),\n", - " sort=[\"planning\", \"tender\", \"awards\", \"contracts\", \"implementation\"],\n", - " axis=alt.Axis(title=\"stage\", labelAngle=0),\n", - " ),\n", - " y=alt.Y(\n", - " \"object_count\",\n", - " type=\"quantitative\",\n", - " axis=alt.Axis(title=\"number of objects\", format=\"~s\", tickCount=5),\n", - " ),\n", - " tooltip=[\n", - " alt.Tooltip(\"stage\", title=\"stage\"),\n", - " alt.Tooltip(\"object_count\", title=\"number of objects\"),\n", - " ],\n", - " )\n", - " .properties(\n", - " width=600,\n", - " height=350,\n", - " padding=50,\n", - " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", - " )\n", - " .configure_axis(\n", - " titleFontSize=14,\n", - " labelFontSize=14,\n", - " labelPadding=5,\n", - " ticks=False,\n", - " domain=False,\n", - " )\n", - " .configure_view(strokeWidth=0)\n", - " )\n", - " return chart\n", + " # check if input contains the right columns\n", + " if not set([\"stage\", \"object_count\"]).issubset(objects_per_stage.columns):\n", + " raise ValueError(\"Data must contain columns 'stage' and 'object_count'\")\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(objects_per_stage)\n", + " .mark_bar(fill=\"#d6e100\")\n", + " .encode(\n", + " x=alt.X(\n", + " \"stage\",\n", + " type=\"ordinal\",\n", + " scale=alt.Scale(\n", + " domain=[\n", + " \"planning\",\n", + " \"tender\",\n", + " \"awards\",\n", + " \"contracts\",\n", + " \"implementation\",\n", + " ]\n", + " ),\n", + " sort=[\"planning\", \"tender\", \"awards\", \"contracts\", \"implementation\"],\n", + " axis=alt.Axis(title=\"stage\", labelAngle=0),\n", + " ),\n", + " y=alt.Y(\n", + " \"object_count\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"number of objects\", format=\"~s\", tickCount=5),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"stage\", title=\"stage\"),\n", + " alt.Tooltip(\"object_count\", title=\"number of objects\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart\n", "\n", "\n", "def plot_releases_by_month(release_dates):\n", " # check if input contains the right columns\n", - " if not set([\"date\", \"collection_id\", \"release_type\", \"release_count\"]).issubset(\n", - " release_dates.columns\n", - " ):\n", - " raise ValueError(\n", - " \"Data must contain columns 'date', 'collection_id', 'release_type', 'release_count'\"\n", - " )\n", + " if not set([\"date\", \"collection_id\", \"release_type\", \"release_count\"]).issubset(release_dates.columns):\n", + " raise ValueError(\"Data must contain columns 'date', 'collection_id', 'release_type', 'release_count'\")\n", " # check if number of rows is more than 5000\n", " if release_dates.shape[0] > 5000:\n", " alt.data_transformers.disable_max_rows()\n", @@ -172,9 +168,7 @@ " alt.Chart(release_dates)\n", " .mark_line(strokeWidth=3)\n", " .encode(\n", - " x=alt.X(\n", - " \"date\", timeUnit=\"yearmonth\", axis=alt.Axis(title=\"year and month\")\n", - " ),\n", + " x=alt.X(\"date\", timeUnit=\"yearmonth\", axis=alt.Axis(title=\"year and month\")),\n", " y=alt.Y(\n", " \"release_count\",\n", " type=\"quantitative\",\n", @@ -190,9 +184,7 @@ " ),\n", " tooltip=[\n", " alt.Tooltip(\"date\", timeUnit=\"yearmonth\", title=\"date\"),\n", - " alt.Tooltip(\n", - " \"release_count\", aggregate=\"sum\", title=\"number of releases\"\n", - " ),\n", + " alt.Tooltip(\"release_count\", aggregate=\"sum\", title=\"number of releases\"),\n", " alt.Tooltip(\"release_type\", title=\"release type\"),\n", " ],\n", " )\n", @@ -213,6 +205,7 @@ " )\n", " return chart\n", "\n", + "\n", "def plot_objects_per_year(objects_per_year):\n", " # check if input contains the right columns\n", " if not set([\"year\", \"tenders\", \"awards\"]).issubset(objects_per_year.columns):\n", @@ -238,9 +231,7 @@ " \"key\",\n", " type=\"nominal\",\n", " title=\"object type\",\n", - " scale=alt.Scale(\n", - " domain=[\"tenders\", \"awards\"], range=[\"#D6E100\", \"#FB6045\"]\n", - " ),\n", + " scale=alt.Scale(domain=[\"tenders\", \"awards\"], range=[\"#D6E100\", \"#FB6045\"]),\n", " ),\n", " tooltip=[\n", " alt.Tooltip(\"year\", title=\"year\", type=\"quantitative\"),\n", @@ -265,6 +256,7 @@ " )\n", " return chart\n", "\n", + "\n", "def plot_top_buyers(buyers):\n", " # check if input contains the right columns\n", " if not set([\"name\", \"total_tenders\"]).issubset(buyers.columns):\n", @@ -287,9 +279,7 @@ " ),\n", " tooltip=[\n", " alt.Tooltip(\"name\", title=\"buyer\", type=\"nominal\"),\n", - " alt.Tooltip(\n", - " \"total_tenders\", title=\"number of tenders\", type=\"quantitative\"\n", - " ),\n", + " alt.Tooltip(\"total_tenders\", title=\"number of tenders\", type=\"quantitative\"),\n", " ],\n", " )\n", " .properties(\n", @@ -316,4 +306,4 @@ "outputs": [] } ] -} \ No newline at end of file +} diff --git a/component_check_quality.ipynb b/component_check_quality.ipynb index e4e2918..e0a4e9a 100644 --- a/component_check_quality.ipynb +++ b/component_check_quality.ipynb @@ -547,14 +547,7 @@ "id": "yeBOaDsO6uDS" }, "source": [ - "release_count_chart = sns.catplot(\n", - " x=\"release_count\", y=\"ocid_count\", kind=\"bar\", col=\"collection_id\", hue=\"release_type\", data=release_counts\n", - ").set_xticklabels(rotation=90)\n", - "\n", - "for ax in release_count_chart.axes.flat:\n", - " format_thousands(ax.yaxis)\n", - "\n", - "plt.show(release_count_chart)" + "plot_release_count(release_counts)" ], "execution_count": null, "outputs": [] diff --git a/component_environment.ipynb b/component_environment.ipynb index d1d2086..ba69ccb 100644 --- a/component_environment.ipynb +++ b/component_environment.ipynb @@ -143,4 +143,4 @@ "outputs": [] } ] -} \ No newline at end of file +} diff --git a/component_scope_kingfisher.ipynb b/component_scope_kingfisher.ipynb index b265ebe..9c46232 100644 --- a/component_scope_kingfisher.ipynb +++ b/component_scope_kingfisher.ipynb @@ -222,14 +222,7 @@ "id": "mKo6Q4HimvQZ" }, "source": [ - "objects_per_stage_chart = sns.catplot(x=\"stage\", y=\"object_count\", kind=\"bar\", data=objects_per_stage).set_xticklabels(\n", - " rotation=90\n", - ")\n", - "\n", - "for ax in objects_per_stage_chart.axes.flat:\n", - " format_thousands(ax.yaxis)\n", - "\n", - "objects_per_stage" + "plot_objects_per_stage(objects_per_stage)" ], "execution_count": null, "outputs": [] @@ -374,11 +367,7 @@ "release_dates = release_dates.set_index(\"date\")\n", "release_dates = release_dates.groupby([\"collection_id\", \"release_type\"]).resample(\"M\").sum()\n", "\n", - "fig, ax = plt.subplots(figsize=[15, 5])\n", - "sns.lineplot(data=release_dates, x=\"date\", y=\"release_count\", hue=\"collection_id\", style=\"release_type\")\n", - "\n", - "format_thousands(ax.yaxis)\n", - "sns.despine()" + "plot_releases_by_month(release_dates)" ], "execution_count": null, "outputs": [] diff --git a/component_scope_usability.ipynb b/component_scope_usability.ipynb index 2255df2..2be757d 100644 --- a/component_scope_usability.ipynb +++ b/component_scope_usability.ipynb @@ -155,18 +155,7 @@ { "cell_type": "code", "source": [ - "fig = px.bar(\n", - " stages,\n", - " x=\"stage\",\n", - " y=\"object_count\",\n", - " title=\"Stages covered\",\n", - " template=\"plotly_white\",\n", - " text=\"object_count\",\n", - " labels={\"stage\": \"Stage\", \"object_count\": \"Number of releases\"},\n", - ")\n", - "fig.update_traces(marker_color=\"#D6E100\")\n", - "fig.update_layout(width=400, height=350, bargap=0.5)\n", - "fig.show()" + "plot_objects_per_stage(stages)" ], "metadata": { "id": "zGNrN_occdW5" @@ -242,22 +231,7 @@ { "cell_type": "code", "source": [ - "final = pd.melt(dates, id_vars=[\"year\"], value_vars=[\"tenders\", \"awards\"])\n", - "fig = px.bar(\n", - " final,\n", - " x=\"year\",\n", - " y=\"value\",\n", - " color=\"variable\",\n", - " barmode=\"group\",\n", - " title=\"Tenders and awards by year\",\n", - " template=\"plotly_white\",\n", - " text=\"value\",\n", - " color_discrete_sequence=[\"#D6E100\", \"#6c75e1\"],\n", - " labels={\"value\": \"Tenders and awards\"},\n", - ")\n", - "fig.update_layout(width=600, height=350, bargap=0.5)\n", - "\n", - "fig.show()" + "plot_objects_per_year(dates)" ], "metadata": { "id": "ZFUPPW51dF4m" @@ -371,20 +345,7 @@ { "cell_type": "code", "source": [ - "fig = px.bar(\n", - " buyers.head(10),\n", - " y=\"name\",\n", - " x=\"total_tenders\",\n", - " title=\"Top 10 buyers\",\n", - " template=\"plotly_white\",\n", - " text=\"total_tenders\",\n", - " labels={\"total_tenders\": \"Number of procedures\", \"name\": \"buyer\"},\n", - " color_discrete_sequence=[\"#D6E100\", \"#6c75e1\"],\n", - " orientation=\"h\",\n", - ")\n", - "# fig.update_traces()\n", - "fig.update_layout(width=850, height=450, bargap=0.4, yaxis={\"categoryorder\": \"total ascending\"})\n", - "fig.show()" + "plot_top_buyers(buyers.head(10))" ], "metadata": { "id": "KmgHVCXaiCeG" diff --git a/manage.py b/manage.py index 3155c49..8cb56eb 100755 --- a/manage.py +++ b/manage.py @@ -17,12 +17,14 @@ ], "template_publisher_analysis": [ "component_environment", + "component_charts", "component_setup_kingfisher", "component_errors_kingfisher", "component_scope_kingfisher", ], "template_structure_and_format_feedback": [ "component_environment", + "component_charts", "component_setup_kingfisher", "component_errors_kingfisher", "component_scope_kingfisher", @@ -30,6 +32,7 @@ ], "template_data_quality_feedback": [ "component_environment", + "component_charts", "component_setup_kingfisher", "component_errors_kingfisher", "component_scope_kingfisher", @@ -39,17 +42,20 @@ ], "template_usability_checks": [ "component_environment", + "component_charts", "component_setup_kingfisher", "component_scope_usability", "component_check_usability", ], "template_usability_checks_fieldlist": [ "component_environment", + "component_charts", "component_setup_fieldlist", "component_check_usability", ], "template_usability_checks_registry": [ "component_environment", + "component_charts", "component_setup_registry", "component_check_usability", ], diff --git a/template_data_quality_feedback.ipynb b/template_data_quality_feedback.ipynb index c6122d0..a608c2d 100644 --- a/template_data_quality_feedback.ipynb +++ b/template_data_quality_feedback.ipynb @@ -29,7 +29,7 @@ "outputs": [], "source": [ "! pip install --upgrade pip > pip.log\n", - "! pip install --upgrade 'ocdskingfishercolab<0.4' altair ipywidgets matplotlib plotly psycopg2-binary seaborn >> pip.log" + "! pip install --upgrade 'ocdskingfishercolab<0.4' ipywidgets psycopg2-binary >> pip.log" ] }, { @@ -51,12 +51,8 @@ "source": [ "from collections import Counter\n", "\n", - "import altair as alt\n", - "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", - "import plotly.express as px\n", - "import seaborn as sns\n", "from google.colab.data_table import DataTable\n", "from google.colab.files import download\n", "from ipywidgets import widgets\n", @@ -134,6 +130,296 @@ "# set_light_mode()" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "IhnbdjqU1e6p" + }, + "source": [ + "## Charts Setup\n", + "*You must run the cells in this section each time you connect to a new runtime. For example, when you return to the notebook after an idle timeout, when the runtime crashes, or when you restart or factory reset the runtime.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "elpUvMf61Ym6" + }, + "outputs": [], + "source": [ + "! pip install --upgrade altair >> pip.log" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P1aenztz1zK3" + }, + "source": [ + "Import chart packages and define chart functions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Bip37aP917XY" + }, + "outputs": [], + "source": [ + "import altair as alt\n", + "\n", + "\n", + "def plot_release_count(release_counts):\n", + " # check if input contains the right columns\n", + " if not set([\"collection_id\", \"release_type\", \"release_count\", \"ocid_count\"]).issubset(release_counts.columns):\n", + " raise ValueError(\n", + " \"Input data must contain the following columns: collection_id, release_type, release_count, ocid_count\"\n", + " )\n", + " chart = (\n", + " alt.Chart(release_counts)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\n", + " \"release_count\",\n", + " type=\"ordinal\",\n", + " axis=alt.Axis(title=\"release count\", labelAngle=0),\n", + " ),\n", + " y=alt.Y(\n", + " \"ocid_count\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"ocid count\", format=\"~s\", tickCount=5),\n", + " ),\n", + " color=alt.Color(\n", + " \"release_type\",\n", + " type=\"nominal\",\n", + " title=\"release type\",\n", + " scale=alt.Scale(range=[\"#D6E100\", \"#FB6045\", \"#23B2A7\", \"#6C75E1\"]),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"release_count\", title=\"release count\"),\n", + " alt.Tooltip(\"ocid_count\", title=\"ocid count\", format=\"~s\"),\n", + " alt.Tooltip(\"release_type\", title=\"release type\"),\n", + " alt.Tooltip(\"collection_id\", title=\"collection id\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + "\n", + " return chart\n", + "\n", + "\n", + "def plot_objects_per_stage(objects_per_stage):\n", + " # check if input contains the right columns\n", + " if not set([\"stage\", \"object_count\"]).issubset(objects_per_stage.columns):\n", + " raise ValueError(\"Data must contain columns 'stage' and 'object_count'\")\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(objects_per_stage)\n", + " .mark_bar(fill=\"#d6e100\")\n", + " .encode(\n", + " x=alt.X(\n", + " \"stage\",\n", + " type=\"ordinal\",\n", + " scale=alt.Scale(\n", + " domain=[\n", + " \"planning\",\n", + " \"tender\",\n", + " \"awards\",\n", + " \"contracts\",\n", + " \"implementation\",\n", + " ]\n", + " ),\n", + " sort=[\"planning\", \"tender\", \"awards\", \"contracts\", \"implementation\"],\n", + " axis=alt.Axis(title=\"stage\", labelAngle=0),\n", + " ),\n", + " y=alt.Y(\n", + " \"object_count\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"number of objects\", format=\"~s\", tickCount=5),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"stage\", title=\"stage\"),\n", + " alt.Tooltip(\"object_count\", title=\"number of objects\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart\n", + "\n", + "\n", + "def plot_releases_by_month(release_dates):\n", + " # check if input contains the right columns\n", + " if not set([\"date\", \"collection_id\", \"release_type\", \"release_count\"]).issubset(release_dates.columns):\n", + " raise ValueError(\"Data must contain columns 'date', 'collection_id', 'release_type', 'release_count'\")\n", + " # check if number of rows is more than 5000\n", + " if release_dates.shape[0] > 5000:\n", + " alt.data_transformers.disable_max_rows()\n", + "\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(release_dates)\n", + " .mark_line(strokeWidth=3)\n", + " .encode(\n", + " x=alt.X(\"date\", timeUnit=\"yearmonth\", axis=alt.Axis(title=\"year and month\")),\n", + " y=alt.Y(\n", + " \"release_count\",\n", + " type=\"quantitative\",\n", + " aggregate=\"sum\",\n", + " axis=alt.Axis(title=\"number of releases\", format=\"~s\", tickCount=5),\n", + " scale=alt.Scale(zero=False),\n", + " ),\n", + " color=alt.Color(\n", + " \"release_type\",\n", + " type=\"nominal\",\n", + " scale=alt.Scale(range=[\"#D6E100\", \"#FB6045\", \"#23B2A7\", \"#6C75E1\"]),\n", + " legend=alt.Legend(title=\"release type\"),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"date\", timeUnit=\"yearmonth\", title=\"date\"),\n", + " alt.Tooltip(\"release_count\", aggregate=\"sum\", title=\"number of releases\"),\n", + " alt.Tooltip(\"release_type\", title=\"release type\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart\n", + "\n", + "\n", + "def plot_objects_per_year(objects_per_year):\n", + " # check if input contains the right columns\n", + " if not set([\"year\", \"tenders\", \"awards\"]).issubset(objects_per_year.columns):\n", + " raise ValueError(\"Data must contain columns 'year', 'tenders' and 'awards'\")\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(objects_per_year)\n", + " .transform_fold([\"tenders\", \"awards\"])\n", + " .mark_line(strokeWidth=3)\n", + " .encode(\n", + " x=alt.X(\n", + " \"year\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"year\", format=\".0f\", tickCount=dates.shape[0]),\n", + " ),\n", + " y=alt.Y(\n", + " \"value\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"number of objects\", format=\"~s\", tickCount=5),\n", + " scale=alt.Scale(zero=False),\n", + " ),\n", + " color=alt.Color(\n", + " \"key\",\n", + " type=\"nominal\",\n", + " title=\"object type\",\n", + " scale=alt.Scale(domain=[\"tenders\", \"awards\"], range=[\"#D6E100\", \"#FB6045\"]),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"year\", title=\"year\", type=\"quantitative\"),\n", + " alt.Tooltip(\"value\", title=\"number of objects\", type=\"quantitative\"),\n", + " alt.Tooltip(\"key\", title=\"object type\", type=\"nominal\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart\n", + "\n", + "\n", + "def plot_top_buyers(buyers):\n", + " # check if input contains the right columns\n", + " if not set([\"name\", \"total_tenders\"]).issubset(buyers.columns):\n", + " raise ValueError(\"Data must contain columns 'name' and 'total_tenders'\")\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(buyers)\n", + " .mark_bar(fill=\"#d6e100\")\n", + " .encode(\n", + " x=alt.X(\n", + " \"total_tenders\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"number of tenders\", format=\"~s\", tickCount=5),\n", + " ),\n", + " y=alt.Y(\n", + " \"name\",\n", + " type=\"ordinal\",\n", + " axis=alt.Axis(title=\"buyer\", labelAngle=0),\n", + " sort=alt.SortField(\"total_tenders\", order=\"descending\"),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"name\", title=\"buyer\", type=\"nominal\"),\n", + " alt.Tooltip(\"total_tenders\", title=\"number of tenders\", type=\"quantitative\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart" + ] + }, { "cell_type": "markdown", "metadata": { @@ -612,14 +898,7 @@ }, "outputs": [], "source": [ - "objects_per_stage_chart = sns.catplot(x=\"stage\", y=\"object_count\", kind=\"bar\", data=objects_per_stage).set_xticklabels(\n", - " rotation=90\n", - ")\n", - "\n", - "for ax in objects_per_stage_chart.axes.flat:\n", - " format_thousands(ax.yaxis)\n", - "\n", - "objects_per_stage" + "plot_objects_per_stage(objects_per_stage)" ] }, { @@ -764,11 +1043,7 @@ "release_dates = release_dates.set_index(\"date\")\n", "release_dates = release_dates.groupby([\"collection_id\", \"release_type\"]).resample(\"M\").sum()\n", "\n", - "fig, ax = plt.subplots(figsize=[15, 5])\n", - "sns.lineplot(data=release_dates, x=\"date\", y=\"release_count\", hue=\"collection_id\", style=\"release_type\")\n", - "\n", - "format_thousands(ax.yaxis)\n", - "sns.despine()" + "plot_releases_by_month(release_dates)" ] }, { @@ -2068,14 +2343,7 @@ }, "outputs": [], "source": [ - "release_count_chart = sns.catplot(\n", - " x=\"release_count\", y=\"ocid_count\", kind=\"bar\", col=\"collection_id\", hue=\"release_type\", data=release_counts\n", - ").set_xticklabels(rotation=90)\n", - "\n", - "for ax in release_count_chart.axes.flat:\n", - " format_thousands(ax.yaxis)\n", - "\n", - "plt.show(release_count_chart)" + "plot_release_count(release_counts)" ] }, { diff --git a/template_meta_analysis.ipynb b/template_meta_analysis.ipynb index a2b176b..e9bc6d0 100644 --- a/template_meta_analysis.ipynb +++ b/template_meta_analysis.ipynb @@ -29,7 +29,7 @@ "outputs": [], "source": [ "! pip install --upgrade pip > pip.log\n", - "! pip install --upgrade 'ocdskingfishercolab<0.4' altair ipywidgets matplotlib plotly psycopg2-binary seaborn >> pip.log" + "! pip install --upgrade 'ocdskingfishercolab<0.4' ipywidgets psycopg2-binary >> pip.log" ] }, { @@ -51,12 +51,8 @@ "source": [ "from collections import Counter\n", "\n", - "import altair as alt\n", - "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", - "import plotly.express as px\n", - "import seaborn as sns\n", "from google.colab.data_table import DataTable\n", "from google.colab.files import download\n", "from ipywidgets import widgets\n", diff --git a/template_publisher_analysis.ipynb b/template_publisher_analysis.ipynb index 252a0a7..a1f8057 100644 --- a/template_publisher_analysis.ipynb +++ b/template_publisher_analysis.ipynb @@ -29,7 +29,7 @@ "outputs": [], "source": [ "! pip install --upgrade pip > pip.log\n", - "! pip install --upgrade 'ocdskingfishercolab<0.4' altair ipywidgets matplotlib plotly psycopg2-binary seaborn >> pip.log" + "! pip install --upgrade 'ocdskingfishercolab<0.4' ipywidgets psycopg2-binary >> pip.log" ] }, { @@ -51,12 +51,8 @@ "source": [ "from collections import Counter\n", "\n", - "import altair as alt\n", - "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", - "import plotly.express as px\n", - "import seaborn as sns\n", "from google.colab.data_table import DataTable\n", "from google.colab.files import download\n", "from ipywidgets import widgets\n", @@ -134,6 +130,296 @@ "# set_light_mode()" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "IhnbdjqU1e6p" + }, + "source": [ + "## Charts Setup\n", + "*You must run the cells in this section each time you connect to a new runtime. For example, when you return to the notebook after an idle timeout, when the runtime crashes, or when you restart or factory reset the runtime.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "elpUvMf61Ym6" + }, + "outputs": [], + "source": [ + "! pip install --upgrade altair >> pip.log" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P1aenztz1zK3" + }, + "source": [ + "Import chart packages and define chart functions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Bip37aP917XY" + }, + "outputs": [], + "source": [ + "import altair as alt\n", + "\n", + "\n", + "def plot_release_count(release_counts):\n", + " # check if input contains the right columns\n", + " if not set([\"collection_id\", \"release_type\", \"release_count\", \"ocid_count\"]).issubset(release_counts.columns):\n", + " raise ValueError(\n", + " \"Input data must contain the following columns: collection_id, release_type, release_count, ocid_count\"\n", + " )\n", + " chart = (\n", + " alt.Chart(release_counts)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\n", + " \"release_count\",\n", + " type=\"ordinal\",\n", + " axis=alt.Axis(title=\"release count\", labelAngle=0),\n", + " ),\n", + " y=alt.Y(\n", + " \"ocid_count\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"ocid count\", format=\"~s\", tickCount=5),\n", + " ),\n", + " color=alt.Color(\n", + " \"release_type\",\n", + " type=\"nominal\",\n", + " title=\"release type\",\n", + " scale=alt.Scale(range=[\"#D6E100\", \"#FB6045\", \"#23B2A7\", \"#6C75E1\"]),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"release_count\", title=\"release count\"),\n", + " alt.Tooltip(\"ocid_count\", title=\"ocid count\", format=\"~s\"),\n", + " alt.Tooltip(\"release_type\", title=\"release type\"),\n", + " alt.Tooltip(\"collection_id\", title=\"collection id\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + "\n", + " return chart\n", + "\n", + "\n", + "def plot_objects_per_stage(objects_per_stage):\n", + " # check if input contains the right columns\n", + " if not set([\"stage\", \"object_count\"]).issubset(objects_per_stage.columns):\n", + " raise ValueError(\"Data must contain columns 'stage' and 'object_count'\")\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(objects_per_stage)\n", + " .mark_bar(fill=\"#d6e100\")\n", + " .encode(\n", + " x=alt.X(\n", + " \"stage\",\n", + " type=\"ordinal\",\n", + " scale=alt.Scale(\n", + " domain=[\n", + " \"planning\",\n", + " \"tender\",\n", + " \"awards\",\n", + " \"contracts\",\n", + " \"implementation\",\n", + " ]\n", + " ),\n", + " sort=[\"planning\", \"tender\", \"awards\", \"contracts\", \"implementation\"],\n", + " axis=alt.Axis(title=\"stage\", labelAngle=0),\n", + " ),\n", + " y=alt.Y(\n", + " \"object_count\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"number of objects\", format=\"~s\", tickCount=5),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"stage\", title=\"stage\"),\n", + " alt.Tooltip(\"object_count\", title=\"number of objects\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart\n", + "\n", + "\n", + "def plot_releases_by_month(release_dates):\n", + " # check if input contains the right columns\n", + " if not set([\"date\", \"collection_id\", \"release_type\", \"release_count\"]).issubset(release_dates.columns):\n", + " raise ValueError(\"Data must contain columns 'date', 'collection_id', 'release_type', 'release_count'\")\n", + " # check if number of rows is more than 5000\n", + " if release_dates.shape[0] > 5000:\n", + " alt.data_transformers.disable_max_rows()\n", + "\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(release_dates)\n", + " .mark_line(strokeWidth=3)\n", + " .encode(\n", + " x=alt.X(\"date\", timeUnit=\"yearmonth\", axis=alt.Axis(title=\"year and month\")),\n", + " y=alt.Y(\n", + " \"release_count\",\n", + " type=\"quantitative\",\n", + " aggregate=\"sum\",\n", + " axis=alt.Axis(title=\"number of releases\", format=\"~s\", tickCount=5),\n", + " scale=alt.Scale(zero=False),\n", + " ),\n", + " color=alt.Color(\n", + " \"release_type\",\n", + " type=\"nominal\",\n", + " scale=alt.Scale(range=[\"#D6E100\", \"#FB6045\", \"#23B2A7\", \"#6C75E1\"]),\n", + " legend=alt.Legend(title=\"release type\"),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"date\", timeUnit=\"yearmonth\", title=\"date\"),\n", + " alt.Tooltip(\"release_count\", aggregate=\"sum\", title=\"number of releases\"),\n", + " alt.Tooltip(\"release_type\", title=\"release type\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart\n", + "\n", + "\n", + "def plot_objects_per_year(objects_per_year):\n", + " # check if input contains the right columns\n", + " if not set([\"year\", \"tenders\", \"awards\"]).issubset(objects_per_year.columns):\n", + " raise ValueError(\"Data must contain columns 'year', 'tenders' and 'awards'\")\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(objects_per_year)\n", + " .transform_fold([\"tenders\", \"awards\"])\n", + " .mark_line(strokeWidth=3)\n", + " .encode(\n", + " x=alt.X(\n", + " \"year\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"year\", format=\".0f\", tickCount=dates.shape[0]),\n", + " ),\n", + " y=alt.Y(\n", + " \"value\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"number of objects\", format=\"~s\", tickCount=5),\n", + " scale=alt.Scale(zero=False),\n", + " ),\n", + " color=alt.Color(\n", + " \"key\",\n", + " type=\"nominal\",\n", + " title=\"object type\",\n", + " scale=alt.Scale(domain=[\"tenders\", \"awards\"], range=[\"#D6E100\", \"#FB6045\"]),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"year\", title=\"year\", type=\"quantitative\"),\n", + " alt.Tooltip(\"value\", title=\"number of objects\", type=\"quantitative\"),\n", + " alt.Tooltip(\"key\", title=\"object type\", type=\"nominal\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart\n", + "\n", + "\n", + "def plot_top_buyers(buyers):\n", + " # check if input contains the right columns\n", + " if not set([\"name\", \"total_tenders\"]).issubset(buyers.columns):\n", + " raise ValueError(\"Data must contain columns 'name' and 'total_tenders'\")\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(buyers)\n", + " .mark_bar(fill=\"#d6e100\")\n", + " .encode(\n", + " x=alt.X(\n", + " \"total_tenders\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"number of tenders\", format=\"~s\", tickCount=5),\n", + " ),\n", + " y=alt.Y(\n", + " \"name\",\n", + " type=\"ordinal\",\n", + " axis=alt.Axis(title=\"buyer\", labelAngle=0),\n", + " sort=alt.SortField(\"total_tenders\", order=\"descending\"),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"name\", title=\"buyer\", type=\"nominal\"),\n", + " alt.Tooltip(\"total_tenders\", title=\"number of tenders\", type=\"quantitative\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart" + ] + }, { "cell_type": "markdown", "metadata": { @@ -612,14 +898,7 @@ }, "outputs": [], "source": [ - "objects_per_stage_chart = sns.catplot(x=\"stage\", y=\"object_count\", kind=\"bar\", data=objects_per_stage).set_xticklabels(\n", - " rotation=90\n", - ")\n", - "\n", - "for ax in objects_per_stage_chart.axes.flat:\n", - " format_thousands(ax.yaxis)\n", - "\n", - "objects_per_stage" + "plot_objects_per_stage(objects_per_stage)" ] }, { @@ -764,11 +1043,7 @@ "release_dates = release_dates.set_index(\"date\")\n", "release_dates = release_dates.groupby([\"collection_id\", \"release_type\"]).resample(\"M\").sum()\n", "\n", - "fig, ax = plt.subplots(figsize=[15, 5])\n", - "sns.lineplot(data=release_dates, x=\"date\", y=\"release_count\", hue=\"collection_id\", style=\"release_type\")\n", - "\n", - "format_thousands(ax.yaxis)\n", - "sns.despine()" + "plot_releases_by_month(release_dates)" ] }, { @@ -838,6 +1113,9 @@ "kernelspec": { "display_name": "Python 3", "name": "python3" + }, + "language_info": { + "name": "python" } }, "nbformat": 4, diff --git a/template_structure_and_format_feedback.ipynb b/template_structure_and_format_feedback.ipynb index 5fc7b86..651cc97 100644 --- a/template_structure_and_format_feedback.ipynb +++ b/template_structure_and_format_feedback.ipynb @@ -29,7 +29,7 @@ "outputs": [], "source": [ "! pip install --upgrade pip > pip.log\n", - "! pip install --upgrade 'ocdskingfishercolab<0.4' altair ipywidgets matplotlib plotly psycopg2-binary seaborn >> pip.log" + "! pip install --upgrade 'ocdskingfishercolab<0.4' ipywidgets psycopg2-binary >> pip.log" ] }, { @@ -51,12 +51,8 @@ "source": [ "from collections import Counter\n", "\n", - "import altair as alt\n", - "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", - "import plotly.express as px\n", - "import seaborn as sns\n", "from google.colab.data_table import DataTable\n", "from google.colab.files import download\n", "from ipywidgets import widgets\n", @@ -134,6 +130,296 @@ "# set_light_mode()" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "IhnbdjqU1e6p" + }, + "source": [ + "## Charts Setup\n", + "*You must run the cells in this section each time you connect to a new runtime. For example, when you return to the notebook after an idle timeout, when the runtime crashes, or when you restart or factory reset the runtime.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "elpUvMf61Ym6" + }, + "outputs": [], + "source": [ + "! pip install --upgrade altair >> pip.log" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P1aenztz1zK3" + }, + "source": [ + "Import chart packages and define chart functions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Bip37aP917XY" + }, + "outputs": [], + "source": [ + "import altair as alt\n", + "\n", + "\n", + "def plot_release_count(release_counts):\n", + " # check if input contains the right columns\n", + " if not set([\"collection_id\", \"release_type\", \"release_count\", \"ocid_count\"]).issubset(release_counts.columns):\n", + " raise ValueError(\n", + " \"Input data must contain the following columns: collection_id, release_type, release_count, ocid_count\"\n", + " )\n", + " chart = (\n", + " alt.Chart(release_counts)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\n", + " \"release_count\",\n", + " type=\"ordinal\",\n", + " axis=alt.Axis(title=\"release count\", labelAngle=0),\n", + " ),\n", + " y=alt.Y(\n", + " \"ocid_count\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"ocid count\", format=\"~s\", tickCount=5),\n", + " ),\n", + " color=alt.Color(\n", + " \"release_type\",\n", + " type=\"nominal\",\n", + " title=\"release type\",\n", + " scale=alt.Scale(range=[\"#D6E100\", \"#FB6045\", \"#23B2A7\", \"#6C75E1\"]),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"release_count\", title=\"release count\"),\n", + " alt.Tooltip(\"ocid_count\", title=\"ocid count\", format=\"~s\"),\n", + " alt.Tooltip(\"release_type\", title=\"release type\"),\n", + " alt.Tooltip(\"collection_id\", title=\"collection id\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + "\n", + " return chart\n", + "\n", + "\n", + "def plot_objects_per_stage(objects_per_stage):\n", + " # check if input contains the right columns\n", + " if not set([\"stage\", \"object_count\"]).issubset(objects_per_stage.columns):\n", + " raise ValueError(\"Data must contain columns 'stage' and 'object_count'\")\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(objects_per_stage)\n", + " .mark_bar(fill=\"#d6e100\")\n", + " .encode(\n", + " x=alt.X(\n", + " \"stage\",\n", + " type=\"ordinal\",\n", + " scale=alt.Scale(\n", + " domain=[\n", + " \"planning\",\n", + " \"tender\",\n", + " \"awards\",\n", + " \"contracts\",\n", + " \"implementation\",\n", + " ]\n", + " ),\n", + " sort=[\"planning\", \"tender\", \"awards\", \"contracts\", \"implementation\"],\n", + " axis=alt.Axis(title=\"stage\", labelAngle=0),\n", + " ),\n", + " y=alt.Y(\n", + " \"object_count\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"number of objects\", format=\"~s\", tickCount=5),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"stage\", title=\"stage\"),\n", + " alt.Tooltip(\"object_count\", title=\"number of objects\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart\n", + "\n", + "\n", + "def plot_releases_by_month(release_dates):\n", + " # check if input contains the right columns\n", + " if not set([\"date\", \"collection_id\", \"release_type\", \"release_count\"]).issubset(release_dates.columns):\n", + " raise ValueError(\"Data must contain columns 'date', 'collection_id', 'release_type', 'release_count'\")\n", + " # check if number of rows is more than 5000\n", + " if release_dates.shape[0] > 5000:\n", + " alt.data_transformers.disable_max_rows()\n", + "\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(release_dates)\n", + " .mark_line(strokeWidth=3)\n", + " .encode(\n", + " x=alt.X(\"date\", timeUnit=\"yearmonth\", axis=alt.Axis(title=\"year and month\")),\n", + " y=alt.Y(\n", + " \"release_count\",\n", + " type=\"quantitative\",\n", + " aggregate=\"sum\",\n", + " axis=alt.Axis(title=\"number of releases\", format=\"~s\", tickCount=5),\n", + " scale=alt.Scale(zero=False),\n", + " ),\n", + " color=alt.Color(\n", + " \"release_type\",\n", + " type=\"nominal\",\n", + " scale=alt.Scale(range=[\"#D6E100\", \"#FB6045\", \"#23B2A7\", \"#6C75E1\"]),\n", + " legend=alt.Legend(title=\"release type\"),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"date\", timeUnit=\"yearmonth\", title=\"date\"),\n", + " alt.Tooltip(\"release_count\", aggregate=\"sum\", title=\"number of releases\"),\n", + " alt.Tooltip(\"release_type\", title=\"release type\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart\n", + "\n", + "\n", + "def plot_objects_per_year(objects_per_year):\n", + " # check if input contains the right columns\n", + " if not set([\"year\", \"tenders\", \"awards\"]).issubset(objects_per_year.columns):\n", + " raise ValueError(\"Data must contain columns 'year', 'tenders' and 'awards'\")\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(objects_per_year)\n", + " .transform_fold([\"tenders\", \"awards\"])\n", + " .mark_line(strokeWidth=3)\n", + " .encode(\n", + " x=alt.X(\n", + " \"year\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"year\", format=\".0f\", tickCount=dates.shape[0]),\n", + " ),\n", + " y=alt.Y(\n", + " \"value\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"number of objects\", format=\"~s\", tickCount=5),\n", + " scale=alt.Scale(zero=False),\n", + " ),\n", + " color=alt.Color(\n", + " \"key\",\n", + " type=\"nominal\",\n", + " title=\"object type\",\n", + " scale=alt.Scale(domain=[\"tenders\", \"awards\"], range=[\"#D6E100\", \"#FB6045\"]),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"year\", title=\"year\", type=\"quantitative\"),\n", + " alt.Tooltip(\"value\", title=\"number of objects\", type=\"quantitative\"),\n", + " alt.Tooltip(\"key\", title=\"object type\", type=\"nominal\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart\n", + "\n", + "\n", + "def plot_top_buyers(buyers):\n", + " # check if input contains the right columns\n", + " if not set([\"name\", \"total_tenders\"]).issubset(buyers.columns):\n", + " raise ValueError(\"Data must contain columns 'name' and 'total_tenders'\")\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(buyers)\n", + " .mark_bar(fill=\"#d6e100\")\n", + " .encode(\n", + " x=alt.X(\n", + " \"total_tenders\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"number of tenders\", format=\"~s\", tickCount=5),\n", + " ),\n", + " y=alt.Y(\n", + " \"name\",\n", + " type=\"ordinal\",\n", + " axis=alt.Axis(title=\"buyer\", labelAngle=0),\n", + " sort=alt.SortField(\"total_tenders\", order=\"descending\"),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"name\", title=\"buyer\", type=\"nominal\"),\n", + " alt.Tooltip(\"total_tenders\", title=\"number of tenders\", type=\"quantitative\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart" + ] + }, { "cell_type": "markdown", "metadata": { @@ -612,14 +898,7 @@ }, "outputs": [], "source": [ - "objects_per_stage_chart = sns.catplot(x=\"stage\", y=\"object_count\", kind=\"bar\", data=objects_per_stage).set_xticklabels(\n", - " rotation=90\n", - ")\n", - "\n", - "for ax in objects_per_stage_chart.axes.flat:\n", - " format_thousands(ax.yaxis)\n", - "\n", - "objects_per_stage" + "plot_objects_per_stage(objects_per_stage)" ] }, { @@ -764,11 +1043,7 @@ "release_dates = release_dates.set_index(\"date\")\n", "release_dates = release_dates.groupby([\"collection_id\", \"release_type\"]).resample(\"M\").sum()\n", "\n", - "fig, ax = plt.subplots(figsize=[15, 5])\n", - "sns.lineplot(data=release_dates, x=\"date\", y=\"release_count\", hue=\"collection_id\", style=\"release_type\")\n", - "\n", - "format_thousands(ax.yaxis)\n", - "sns.despine()" + "plot_releases_by_month(release_dates)" ] }, { @@ -1119,6 +1394,9 @@ "kernelspec": { "display_name": "Python 3", "name": "python3" + }, + "language_info": { + "name": "python" } }, "nbformat": 4, diff --git a/template_usability_checks.ipynb b/template_usability_checks.ipynb index 29a2dae..a0ccf08 100644 --- a/template_usability_checks.ipynb +++ b/template_usability_checks.ipynb @@ -29,7 +29,7 @@ "outputs": [], "source": [ "! pip install --upgrade pip > pip.log\n", - "! pip install --upgrade 'ocdskingfishercolab<0.4' altair ipywidgets matplotlib plotly psycopg2-binary seaborn >> pip.log" + "! pip install --upgrade 'ocdskingfishercolab<0.4' ipywidgets psycopg2-binary >> pip.log" ] }, { @@ -51,12 +51,8 @@ "source": [ "from collections import Counter\n", "\n", - "import altair as alt\n", - "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", - "import plotly.express as px\n", - "import seaborn as sns\n", "from google.colab.data_table import DataTable\n", "from google.colab.files import download\n", "from ipywidgets import widgets\n", @@ -134,6 +130,296 @@ "# set_light_mode()" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "IhnbdjqU1e6p" + }, + "source": [ + "## Charts Setup\n", + "*You must run the cells in this section each time you connect to a new runtime. For example, when you return to the notebook after an idle timeout, when the runtime crashes, or when you restart or factory reset the runtime.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "elpUvMf61Ym6" + }, + "outputs": [], + "source": [ + "! pip install --upgrade altair >> pip.log" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P1aenztz1zK3" + }, + "source": [ + "Import chart packages and define chart functions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Bip37aP917XY" + }, + "outputs": [], + "source": [ + "import altair as alt\n", + "\n", + "\n", + "def plot_release_count(release_counts):\n", + " # check if input contains the right columns\n", + " if not set([\"collection_id\", \"release_type\", \"release_count\", \"ocid_count\"]).issubset(release_counts.columns):\n", + " raise ValueError(\n", + " \"Input data must contain the following columns: collection_id, release_type, release_count, ocid_count\"\n", + " )\n", + " chart = (\n", + " alt.Chart(release_counts)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\n", + " \"release_count\",\n", + " type=\"ordinal\",\n", + " axis=alt.Axis(title=\"release count\", labelAngle=0),\n", + " ),\n", + " y=alt.Y(\n", + " \"ocid_count\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"ocid count\", format=\"~s\", tickCount=5),\n", + " ),\n", + " color=alt.Color(\n", + " \"release_type\",\n", + " type=\"nominal\",\n", + " title=\"release type\",\n", + " scale=alt.Scale(range=[\"#D6E100\", \"#FB6045\", \"#23B2A7\", \"#6C75E1\"]),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"release_count\", title=\"release count\"),\n", + " alt.Tooltip(\"ocid_count\", title=\"ocid count\", format=\"~s\"),\n", + " alt.Tooltip(\"release_type\", title=\"release type\"),\n", + " alt.Tooltip(\"collection_id\", title=\"collection id\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + "\n", + " return chart\n", + "\n", + "\n", + "def plot_objects_per_stage(objects_per_stage):\n", + " # check if input contains the right columns\n", + " if not set([\"stage\", \"object_count\"]).issubset(objects_per_stage.columns):\n", + " raise ValueError(\"Data must contain columns 'stage' and 'object_count'\")\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(objects_per_stage)\n", + " .mark_bar(fill=\"#d6e100\")\n", + " .encode(\n", + " x=alt.X(\n", + " \"stage\",\n", + " type=\"ordinal\",\n", + " scale=alt.Scale(\n", + " domain=[\n", + " \"planning\",\n", + " \"tender\",\n", + " \"awards\",\n", + " \"contracts\",\n", + " \"implementation\",\n", + " ]\n", + " ),\n", + " sort=[\"planning\", \"tender\", \"awards\", \"contracts\", \"implementation\"],\n", + " axis=alt.Axis(title=\"stage\", labelAngle=0),\n", + " ),\n", + " y=alt.Y(\n", + " \"object_count\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"number of objects\", format=\"~s\", tickCount=5),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"stage\", title=\"stage\"),\n", + " alt.Tooltip(\"object_count\", title=\"number of objects\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart\n", + "\n", + "\n", + "def plot_releases_by_month(release_dates):\n", + " # check if input contains the right columns\n", + " if not set([\"date\", \"collection_id\", \"release_type\", \"release_count\"]).issubset(release_dates.columns):\n", + " raise ValueError(\"Data must contain columns 'date', 'collection_id', 'release_type', 'release_count'\")\n", + " # check if number of rows is more than 5000\n", + " if release_dates.shape[0] > 5000:\n", + " alt.data_transformers.disable_max_rows()\n", + "\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(release_dates)\n", + " .mark_line(strokeWidth=3)\n", + " .encode(\n", + " x=alt.X(\"date\", timeUnit=\"yearmonth\", axis=alt.Axis(title=\"year and month\")),\n", + " y=alt.Y(\n", + " \"release_count\",\n", + " type=\"quantitative\",\n", + " aggregate=\"sum\",\n", + " axis=alt.Axis(title=\"number of releases\", format=\"~s\", tickCount=5),\n", + " scale=alt.Scale(zero=False),\n", + " ),\n", + " color=alt.Color(\n", + " \"release_type\",\n", + " type=\"nominal\",\n", + " scale=alt.Scale(range=[\"#D6E100\", \"#FB6045\", \"#23B2A7\", \"#6C75E1\"]),\n", + " legend=alt.Legend(title=\"release type\"),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"date\", timeUnit=\"yearmonth\", title=\"date\"),\n", + " alt.Tooltip(\"release_count\", aggregate=\"sum\", title=\"number of releases\"),\n", + " alt.Tooltip(\"release_type\", title=\"release type\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart\n", + "\n", + "\n", + "def plot_objects_per_year(objects_per_year):\n", + " # check if input contains the right columns\n", + " if not set([\"year\", \"tenders\", \"awards\"]).issubset(objects_per_year.columns):\n", + " raise ValueError(\"Data must contain columns 'year', 'tenders' and 'awards'\")\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(objects_per_year)\n", + " .transform_fold([\"tenders\", \"awards\"])\n", + " .mark_line(strokeWidth=3)\n", + " .encode(\n", + " x=alt.X(\n", + " \"year\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"year\", format=\".0f\", tickCount=dates.shape[0]),\n", + " ),\n", + " y=alt.Y(\n", + " \"value\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"number of objects\", format=\"~s\", tickCount=5),\n", + " scale=alt.Scale(zero=False),\n", + " ),\n", + " color=alt.Color(\n", + " \"key\",\n", + " type=\"nominal\",\n", + " title=\"object type\",\n", + " scale=alt.Scale(domain=[\"tenders\", \"awards\"], range=[\"#D6E100\", \"#FB6045\"]),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"year\", title=\"year\", type=\"quantitative\"),\n", + " alt.Tooltip(\"value\", title=\"number of objects\", type=\"quantitative\"),\n", + " alt.Tooltip(\"key\", title=\"object type\", type=\"nominal\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart\n", + "\n", + "\n", + "def plot_top_buyers(buyers):\n", + " # check if input contains the right columns\n", + " if not set([\"name\", \"total_tenders\"]).issubset(buyers.columns):\n", + " raise ValueError(\"Data must contain columns 'name' and 'total_tenders'\")\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(buyers)\n", + " .mark_bar(fill=\"#d6e100\")\n", + " .encode(\n", + " x=alt.X(\n", + " \"total_tenders\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"number of tenders\", format=\"~s\", tickCount=5),\n", + " ),\n", + " y=alt.Y(\n", + " \"name\",\n", + " type=\"ordinal\",\n", + " axis=alt.Axis(title=\"buyer\", labelAngle=0),\n", + " sort=alt.SortField(\"total_tenders\", order=\"descending\"),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"name\", title=\"buyer\", type=\"nominal\"),\n", + " alt.Tooltip(\"total_tenders\", title=\"number of tenders\", type=\"quantitative\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart" + ] + }, { "cell_type": "markdown", "metadata": { @@ -476,18 +762,7 @@ }, "outputs": [], "source": [ - "fig = px.bar(\n", - " stages,\n", - " x=\"stage\",\n", - " y=\"object_count\",\n", - " title=\"Stages covered\",\n", - " template=\"plotly_white\",\n", - " text=\"object_count\",\n", - " labels={\"stage\": \"Stage\", \"object_count\": \"Number of releases\"},\n", - ")\n", - "fig.update_traces(marker_color=\"#D6E100\")\n", - "fig.update_layout(width=400, height=350, bargap=0.5)\n", - "fig.show()" + "plot_objects_per_stage(stages)" ] }, { @@ -563,22 +838,7 @@ }, "outputs": [], "source": [ - "final = pd.melt(dates, id_vars=[\"year\"], value_vars=[\"tenders\", \"awards\"])\n", - "fig = px.bar(\n", - " final,\n", - " x=\"year\",\n", - " y=\"value\",\n", - " color=\"variable\",\n", - " barmode=\"group\",\n", - " title=\"Tenders and awards by year\",\n", - " template=\"plotly_white\",\n", - " text=\"value\",\n", - " color_discrete_sequence=[\"#D6E100\", \"#6c75e1\"],\n", - " labels={\"value\": \"Tenders and awards\"},\n", - ")\n", - "fig.update_layout(width=600, height=350, bargap=0.5)\n", - "\n", - "fig.show()" + "plot_objects_per_year(dates)" ] }, { @@ -692,20 +952,7 @@ }, "outputs": [], "source": [ - "fig = px.bar(\n", - " buyers.head(10),\n", - " y=\"name\",\n", - " x=\"total_tenders\",\n", - " title=\"Top 10 buyers\",\n", - " template=\"plotly_white\",\n", - " text=\"total_tenders\",\n", - " labels={\"total_tenders\": \"Number of procedures\", \"name\": \"buyer\"},\n", - " color_discrete_sequence=[\"#D6E100\", \"#6c75e1\"],\n", - " orientation=\"h\",\n", - ")\n", - "# fig.update_traces()\n", - "fig.update_layout(width=850, height=450, bargap=0.4, yaxis={\"categoryorder\": \"total ascending\"})\n", - "fig.show()" + "plot_top_buyers(buyers.head(10))" ] }, { @@ -1564,6 +1811,9 @@ "kernelspec": { "display_name": "Python 3", "name": "python3" + }, + "language_info": { + "name": "python" } }, "nbformat": 4, diff --git a/template_usability_checks_fieldlist.ipynb b/template_usability_checks_fieldlist.ipynb index ecc0ee0..0a29f5f 100644 --- a/template_usability_checks_fieldlist.ipynb +++ b/template_usability_checks_fieldlist.ipynb @@ -29,7 +29,7 @@ "outputs": [], "source": [ "! pip install --upgrade pip > pip.log\n", - "! pip install --upgrade 'ocdskingfishercolab<0.4' altair ipywidgets matplotlib plotly psycopg2-binary seaborn >> pip.log" + "! pip install --upgrade 'ocdskingfishercolab<0.4' ipywidgets psycopg2-binary >> pip.log" ] }, { @@ -51,12 +51,8 @@ "source": [ "from collections import Counter\n", "\n", - "import altair as alt\n", - "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", - "import plotly.express as px\n", - "import seaborn as sns\n", "from google.colab.data_table import DataTable\n", "from google.colab.files import download\n", "from ipywidgets import widgets\n", @@ -134,6 +130,296 @@ "# set_light_mode()" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "IhnbdjqU1e6p" + }, + "source": [ + "## Charts Setup\n", + "*You must run the cells in this section each time you connect to a new runtime. For example, when you return to the notebook after an idle timeout, when the runtime crashes, or when you restart or factory reset the runtime.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "elpUvMf61Ym6" + }, + "outputs": [], + "source": [ + "! pip install --upgrade altair >> pip.log" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P1aenztz1zK3" + }, + "source": [ + "Import chart packages and define chart functions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Bip37aP917XY" + }, + "outputs": [], + "source": [ + "import altair as alt\n", + "\n", + "\n", + "def plot_release_count(release_counts):\n", + " # check if input contains the right columns\n", + " if not set([\"collection_id\", \"release_type\", \"release_count\", \"ocid_count\"]).issubset(release_counts.columns):\n", + " raise ValueError(\n", + " \"Input data must contain the following columns: collection_id, release_type, release_count, ocid_count\"\n", + " )\n", + " chart = (\n", + " alt.Chart(release_counts)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\n", + " \"release_count\",\n", + " type=\"ordinal\",\n", + " axis=alt.Axis(title=\"release count\", labelAngle=0),\n", + " ),\n", + " y=alt.Y(\n", + " \"ocid_count\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"ocid count\", format=\"~s\", tickCount=5),\n", + " ),\n", + " color=alt.Color(\n", + " \"release_type\",\n", + " type=\"nominal\",\n", + " title=\"release type\",\n", + " scale=alt.Scale(range=[\"#D6E100\", \"#FB6045\", \"#23B2A7\", \"#6C75E1\"]),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"release_count\", title=\"release count\"),\n", + " alt.Tooltip(\"ocid_count\", title=\"ocid count\", format=\"~s\"),\n", + " alt.Tooltip(\"release_type\", title=\"release type\"),\n", + " alt.Tooltip(\"collection_id\", title=\"collection id\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + "\n", + " return chart\n", + "\n", + "\n", + "def plot_objects_per_stage(objects_per_stage):\n", + " # check if input contains the right columns\n", + " if not set([\"stage\", \"object_count\"]).issubset(objects_per_stage.columns):\n", + " raise ValueError(\"Data must contain columns 'stage' and 'object_count'\")\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(objects_per_stage)\n", + " .mark_bar(fill=\"#d6e100\")\n", + " .encode(\n", + " x=alt.X(\n", + " \"stage\",\n", + " type=\"ordinal\",\n", + " scale=alt.Scale(\n", + " domain=[\n", + " \"planning\",\n", + " \"tender\",\n", + " \"awards\",\n", + " \"contracts\",\n", + " \"implementation\",\n", + " ]\n", + " ),\n", + " sort=[\"planning\", \"tender\", \"awards\", \"contracts\", \"implementation\"],\n", + " axis=alt.Axis(title=\"stage\", labelAngle=0),\n", + " ),\n", + " y=alt.Y(\n", + " \"object_count\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"number of objects\", format=\"~s\", tickCount=5),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"stage\", title=\"stage\"),\n", + " alt.Tooltip(\"object_count\", title=\"number of objects\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart\n", + "\n", + "\n", + "def plot_releases_by_month(release_dates):\n", + " # check if input contains the right columns\n", + " if not set([\"date\", \"collection_id\", \"release_type\", \"release_count\"]).issubset(release_dates.columns):\n", + " raise ValueError(\"Data must contain columns 'date', 'collection_id', 'release_type', 'release_count'\")\n", + " # check if number of rows is more than 5000\n", + " if release_dates.shape[0] > 5000:\n", + " alt.data_transformers.disable_max_rows()\n", + "\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(release_dates)\n", + " .mark_line(strokeWidth=3)\n", + " .encode(\n", + " x=alt.X(\"date\", timeUnit=\"yearmonth\", axis=alt.Axis(title=\"year and month\")),\n", + " y=alt.Y(\n", + " \"release_count\",\n", + " type=\"quantitative\",\n", + " aggregate=\"sum\",\n", + " axis=alt.Axis(title=\"number of releases\", format=\"~s\", tickCount=5),\n", + " scale=alt.Scale(zero=False),\n", + " ),\n", + " color=alt.Color(\n", + " \"release_type\",\n", + " type=\"nominal\",\n", + " scale=alt.Scale(range=[\"#D6E100\", \"#FB6045\", \"#23B2A7\", \"#6C75E1\"]),\n", + " legend=alt.Legend(title=\"release type\"),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"date\", timeUnit=\"yearmonth\", title=\"date\"),\n", + " alt.Tooltip(\"release_count\", aggregate=\"sum\", title=\"number of releases\"),\n", + " alt.Tooltip(\"release_type\", title=\"release type\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart\n", + "\n", + "\n", + "def plot_objects_per_year(objects_per_year):\n", + " # check if input contains the right columns\n", + " if not set([\"year\", \"tenders\", \"awards\"]).issubset(objects_per_year.columns):\n", + " raise ValueError(\"Data must contain columns 'year', 'tenders' and 'awards'\")\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(objects_per_year)\n", + " .transform_fold([\"tenders\", \"awards\"])\n", + " .mark_line(strokeWidth=3)\n", + " .encode(\n", + " x=alt.X(\n", + " \"year\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"year\", format=\".0f\", tickCount=dates.shape[0]),\n", + " ),\n", + " y=alt.Y(\n", + " \"value\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"number of objects\", format=\"~s\", tickCount=5),\n", + " scale=alt.Scale(zero=False),\n", + " ),\n", + " color=alt.Color(\n", + " \"key\",\n", + " type=\"nominal\",\n", + " title=\"object type\",\n", + " scale=alt.Scale(domain=[\"tenders\", \"awards\"], range=[\"#D6E100\", \"#FB6045\"]),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"year\", title=\"year\", type=\"quantitative\"),\n", + " alt.Tooltip(\"value\", title=\"number of objects\", type=\"quantitative\"),\n", + " alt.Tooltip(\"key\", title=\"object type\", type=\"nominal\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart\n", + "\n", + "\n", + "def plot_top_buyers(buyers):\n", + " # check if input contains the right columns\n", + " if not set([\"name\", \"total_tenders\"]).issubset(buyers.columns):\n", + " raise ValueError(\"Data must contain columns 'name' and 'total_tenders'\")\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(buyers)\n", + " .mark_bar(fill=\"#d6e100\")\n", + " .encode(\n", + " x=alt.X(\n", + " \"total_tenders\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"number of tenders\", format=\"~s\", tickCount=5),\n", + " ),\n", + " y=alt.Y(\n", + " \"name\",\n", + " type=\"ordinal\",\n", + " axis=alt.Axis(title=\"buyer\", labelAngle=0),\n", + " sort=alt.SortField(\"total_tenders\", order=\"descending\"),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"name\", title=\"buyer\", type=\"nominal\"),\n", + " alt.Tooltip(\"total_tenders\", title=\"number of tenders\", type=\"quantitative\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart" + ] + }, { "cell_type": "markdown", "metadata": { @@ -1062,6 +1348,9 @@ "kernelspec": { "display_name": "Python 3", "name": "python3" + }, + "language_info": { + "name": "python" } }, "nbformat": 4, diff --git a/template_usability_checks_registry.ipynb b/template_usability_checks_registry.ipynb index 346d61e..4ace403 100644 --- a/template_usability_checks_registry.ipynb +++ b/template_usability_checks_registry.ipynb @@ -29,7 +29,7 @@ "outputs": [], "source": [ "! pip install --upgrade pip > pip.log\n", - "! pip install --upgrade 'ocdskingfishercolab<0.4' altair ipywidgets matplotlib plotly psycopg2-binary seaborn >> pip.log" + "! pip install --upgrade 'ocdskingfishercolab<0.4' ipywidgets psycopg2-binary >> pip.log" ] }, { @@ -51,12 +51,8 @@ "source": [ "from collections import Counter\n", "\n", - "import altair as alt\n", - "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", - "import plotly.express as px\n", - "import seaborn as sns\n", "from google.colab.data_table import DataTable\n", "from google.colab.files import download\n", "from ipywidgets import widgets\n", @@ -134,6 +130,296 @@ "# set_light_mode()" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "IhnbdjqU1e6p" + }, + "source": [ + "## Charts Setup\n", + "*You must run the cells in this section each time you connect to a new runtime. For example, when you return to the notebook after an idle timeout, when the runtime crashes, or when you restart or factory reset the runtime.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "elpUvMf61Ym6" + }, + "outputs": [], + "source": [ + "! pip install --upgrade altair >> pip.log" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P1aenztz1zK3" + }, + "source": [ + "Import chart packages and define chart functions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Bip37aP917XY" + }, + "outputs": [], + "source": [ + "import altair as alt\n", + "\n", + "\n", + "def plot_release_count(release_counts):\n", + " # check if input contains the right columns\n", + " if not set([\"collection_id\", \"release_type\", \"release_count\", \"ocid_count\"]).issubset(release_counts.columns):\n", + " raise ValueError(\n", + " \"Input data must contain the following columns: collection_id, release_type, release_count, ocid_count\"\n", + " )\n", + " chart = (\n", + " alt.Chart(release_counts)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\n", + " \"release_count\",\n", + " type=\"ordinal\",\n", + " axis=alt.Axis(title=\"release count\", labelAngle=0),\n", + " ),\n", + " y=alt.Y(\n", + " \"ocid_count\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"ocid count\", format=\"~s\", tickCount=5),\n", + " ),\n", + " color=alt.Color(\n", + " \"release_type\",\n", + " type=\"nominal\",\n", + " title=\"release type\",\n", + " scale=alt.Scale(range=[\"#D6E100\", \"#FB6045\", \"#23B2A7\", \"#6C75E1\"]),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"release_count\", title=\"release count\"),\n", + " alt.Tooltip(\"ocid_count\", title=\"ocid count\", format=\"~s\"),\n", + " alt.Tooltip(\"release_type\", title=\"release type\"),\n", + " alt.Tooltip(\"collection_id\", title=\"collection id\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + "\n", + " return chart\n", + "\n", + "\n", + "def plot_objects_per_stage(objects_per_stage):\n", + " # check if input contains the right columns\n", + " if not set([\"stage\", \"object_count\"]).issubset(objects_per_stage.columns):\n", + " raise ValueError(\"Data must contain columns 'stage' and 'object_count'\")\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(objects_per_stage)\n", + " .mark_bar(fill=\"#d6e100\")\n", + " .encode(\n", + " x=alt.X(\n", + " \"stage\",\n", + " type=\"ordinal\",\n", + " scale=alt.Scale(\n", + " domain=[\n", + " \"planning\",\n", + " \"tender\",\n", + " \"awards\",\n", + " \"contracts\",\n", + " \"implementation\",\n", + " ]\n", + " ),\n", + " sort=[\"planning\", \"tender\", \"awards\", \"contracts\", \"implementation\"],\n", + " axis=alt.Axis(title=\"stage\", labelAngle=0),\n", + " ),\n", + " y=alt.Y(\n", + " \"object_count\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"number of objects\", format=\"~s\", tickCount=5),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"stage\", title=\"stage\"),\n", + " alt.Tooltip(\"object_count\", title=\"number of objects\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart\n", + "\n", + "\n", + "def plot_releases_by_month(release_dates):\n", + " # check if input contains the right columns\n", + " if not set([\"date\", \"collection_id\", \"release_type\", \"release_count\"]).issubset(release_dates.columns):\n", + " raise ValueError(\"Data must contain columns 'date', 'collection_id', 'release_type', 'release_count'\")\n", + " # check if number of rows is more than 5000\n", + " if release_dates.shape[0] > 5000:\n", + " alt.data_transformers.disable_max_rows()\n", + "\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(release_dates)\n", + " .mark_line(strokeWidth=3)\n", + " .encode(\n", + " x=alt.X(\"date\", timeUnit=\"yearmonth\", axis=alt.Axis(title=\"year and month\")),\n", + " y=alt.Y(\n", + " \"release_count\",\n", + " type=\"quantitative\",\n", + " aggregate=\"sum\",\n", + " axis=alt.Axis(title=\"number of releases\", format=\"~s\", tickCount=5),\n", + " scale=alt.Scale(zero=False),\n", + " ),\n", + " color=alt.Color(\n", + " \"release_type\",\n", + " type=\"nominal\",\n", + " scale=alt.Scale(range=[\"#D6E100\", \"#FB6045\", \"#23B2A7\", \"#6C75E1\"]),\n", + " legend=alt.Legend(title=\"release type\"),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"date\", timeUnit=\"yearmonth\", title=\"date\"),\n", + " alt.Tooltip(\"release_count\", aggregate=\"sum\", title=\"number of releases\"),\n", + " alt.Tooltip(\"release_type\", title=\"release type\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart\n", + "\n", + "\n", + "def plot_objects_per_year(objects_per_year):\n", + " # check if input contains the right columns\n", + " if not set([\"year\", \"tenders\", \"awards\"]).issubset(objects_per_year.columns):\n", + " raise ValueError(\"Data must contain columns 'year', 'tenders' and 'awards'\")\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(objects_per_year)\n", + " .transform_fold([\"tenders\", \"awards\"])\n", + " .mark_line(strokeWidth=3)\n", + " .encode(\n", + " x=alt.X(\n", + " \"year\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"year\", format=\".0f\", tickCount=dates.shape[0]),\n", + " ),\n", + " y=alt.Y(\n", + " \"value\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"number of objects\", format=\"~s\", tickCount=5),\n", + " scale=alt.Scale(zero=False),\n", + " ),\n", + " color=alt.Color(\n", + " \"key\",\n", + " type=\"nominal\",\n", + " title=\"object type\",\n", + " scale=alt.Scale(domain=[\"tenders\", \"awards\"], range=[\"#D6E100\", \"#FB6045\"]),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"year\", title=\"year\", type=\"quantitative\"),\n", + " alt.Tooltip(\"value\", title=\"number of objects\", type=\"quantitative\"),\n", + " alt.Tooltip(\"key\", title=\"object type\", type=\"nominal\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart\n", + "\n", + "\n", + "def plot_top_buyers(buyers):\n", + " # check if input contains the right columns\n", + " if not set([\"name\", \"total_tenders\"]).issubset(buyers.columns):\n", + " raise ValueError(\"Data must contain columns 'name' and 'total_tenders'\")\n", + " # draw chart\n", + " chart = (\n", + " alt.Chart(buyers)\n", + " .mark_bar(fill=\"#d6e100\")\n", + " .encode(\n", + " x=alt.X(\n", + " \"total_tenders\",\n", + " type=\"quantitative\",\n", + " axis=alt.Axis(title=\"number of tenders\", format=\"~s\", tickCount=5),\n", + " ),\n", + " y=alt.Y(\n", + " \"name\",\n", + " type=\"ordinal\",\n", + " axis=alt.Axis(title=\"buyer\", labelAngle=0),\n", + " sort=alt.SortField(\"total_tenders\", order=\"descending\"),\n", + " ),\n", + " tooltip=[\n", + " alt.Tooltip(\"name\", title=\"buyer\", type=\"nominal\"),\n", + " alt.Tooltip(\"total_tenders\", title=\"number of tenders\", type=\"quantitative\"),\n", + " ],\n", + " )\n", + " .properties(\n", + " width=600,\n", + " height=350,\n", + " padding=50,\n", + " title=alt.TitleParams(text=\"\", subtitle=[\"\"], fontSize=18),\n", + " )\n", + " .configure_axis(\n", + " titleFontSize=14,\n", + " labelFontSize=14,\n", + " labelPadding=5,\n", + " ticks=False,\n", + " domain=False,\n", + " )\n", + " .configure_view(strokeWidth=0)\n", + " )\n", + " return chart" + ] + }, { "cell_type": "markdown", "metadata": { @@ -1197,6 +1483,9 @@ "kernelspec": { "display_name": "Python 3", "name": "python3" + }, + "language_info": { + "name": "python" } }, "nbformat": 4,