diff --git a/component_setup_registry.ipynb b/component_setup_registry.ipynb index 07338f0..0418295 100644 --- a/component_setup_registry.ipynb +++ b/component_setup_registry.ipynb @@ -29,15 +29,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "id": "6syz0fkkEdgj" }, - "outputs": [], - "source": [ - "! curl -sSOL https://github.com/open-contracting/cardinal-rs/releases/download/0.0.5/ocdscardinal-0.0.5-linux-64-bit.zip", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Archive: ocdscardinal-0.0.5-linux-64-bit.zip\r\n", + " inflating: ocdscardinal \r\n" + ] + } + ], + "source": [ + "! curl -sSOL https://github.com/open-contracting/cardinal-rs/releases/download/0.0.5/ocdscardinal-0.0.5-linux-64-bit.zip\n", "! unzip -oj ocdscardinal-0.0.5-linux-64-bit.zip ocdscardinal-0.0.5-linux-64-bit/ocdscardinal\n", - "! ls" + "\n", + "import os" ] }, { @@ -64,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "id": "qauxqGNfb0-w" }, @@ -75,7 +85,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "id": "RLox7K6WEuuf" }, @@ -98,46 +108,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "id": "E5DV4-cKfhr8" }, "outputs": [], "source": [ - "file_gz = input(\"Name of .jsonl.gz file\")" + "for file in os.listdir(\".\"):\n", + " if file.endswith(\".gz\"):\n", + " file_name = file.replace(\".gz\", \"\")\n", + " break" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "kGXWGevbfk0m" - }, - "outputs": [], - "source": [ - "file_jsonl = file.replace(\".gz\", \"\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "id": "AvFAxSCP0LSR" }, "outputs": [], "source": [ - "! gunzip -f \"$file_gz\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "19bdlU6bfo-3" - }, - "outputs": [], - "source": [ - "! ls -lh \"$file_jsonl\"" + "! gunzip -f \"$file_name\"" ] }, { @@ -160,13 +151,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "id": "mEHY4LwZHMU4" }, "outputs": [], "source": [ - "! ./ocdscardinal coverage \"$file_jsonl\" >> result_fields.json" + "coverage = !./ocdscardinal coverage $file_name\n", + "fields = (\n", + " pd.DataFrame.from_dict(json.loads(coverage[0]), orient=\"index\", columns=[\"count\"])\n", + " .reset_index()\n", + " .rename(columns={\"index\": \"path\"})\n", + ")" ] }, { @@ -180,19 +176,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "id": "u8jlJP9VH7Xi" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": " path count\n0 id 277\n2 tag 277\n3 date 277\n4 ocid 277\n5 buyer/id 277\n.. ... ...\n278 workDetails 146\n279 contracts/relatedProcesses/id 7\n280 contracts/relatedProcesses/title 7\n282 contracts/relatedProcesses/relationship 7\n285 contracts/relatedProcesses 7\n\n[207 rows x 2 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
pathcount
0id277
2tag277
3date277
4ocid277
5buyer/id277
.........
278workDetails146
279contracts/relatedProcesses/id7
280contracts/relatedProcesses/title7
282contracts/relatedProcesses/relationship7
285contracts/relatedProcesses7
\n

207 rows × 2 columns

\n
" + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "fields = (\n", - " pd.DataFrame(pd.read_json(\"result_fields.json\", typ=\"series\"), columns=[\"releases\"])\n", - " .rename_axis(\"path\")\n", - " .reset_index()\n", - ")\n", "# Leaves only object members\n", - "fields_table = fields[fields.path.str.contains(\"[a-z]$\")]\n", + "fields_table = fields[fields.path.str.contains(\"[a-z]$\")].copy()\n", "fields_table[\"path\"] = fields_table[\"path\"].str.replace(r\"[][]|^/\", \"\", regex=True)\n", "fields_table" ] diff --git a/template_usability_checks_registry.ipynb b/template_usability_checks_registry.ipynb index d16570e..47124b2 100644 --- a/template_usability_checks_registry.ipynb +++ b/template_usability_checks_registry.ipynb @@ -538,14 +538,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "id": "6syz0fkkEdgj" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Archive: ocdscardinal-0.0.5-linux-64-bit.zip\r\n", + " inflating: ocdscardinal \r\n" + ] + } + ], "source": [ - "! curl -sSOL https://github.com/open-contracting/cardinal-rs/releases/download/0.0.5/ocdscardinal-0.0.5-linux-64-bit.zip! unzip -oj ocdscardinal-0.0.5-linux-64-bit.zip ocdscardinal-0.0.5-linux-64-bit/ocdscardinal\n", - "! ls" + "! curl -sSOL https://github.com/open-contracting/cardinal-rs/releases/download/0.0.5/ocdscardinal-0.0.5-linux-64-bit.zip\n", + "! unzip -oj ocdscardinal-0.0.5-linux-64-bit.zip ocdscardinal-0.0.5-linux-64-bit/ocdscardinal\n", + "\n", + "import os" ] }, { @@ -572,7 +583,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "id": "qauxqGNfb0-w" }, @@ -583,7 +594,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "id": "RLox7K6WEuuf" }, @@ -606,46 +617,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "id": "E5DV4-cKfhr8" }, "outputs": [], "source": [ - "file_gz = input(\"Name of .jsonl.gz file\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "kGXWGevbfk0m" - }, - "outputs": [], - "source": [ - "file_jsonl = file.replace(\".gz\", \"\")" + "for file in os.listdir(\".\"):\n", + " if file.endswith(\".gz\"):\n", + " file_name = file.replace(\".gz\", \"\")\n", + " break" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "id": "AvFAxSCP0LSR" }, "outputs": [], "source": [ - "! gunzip -f \"$file_gz\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "19bdlU6bfo-3" - }, - "outputs": [], - "source": [ - "! ls -lh \"$file_jsonl\"" + "! gunzip -f \"$file_name\"" ] }, { @@ -668,13 +660,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "id": "mEHY4LwZHMU4" }, "outputs": [], "source": [ - "! ./ocdscardinal coverage \"$file_jsonl\" >> result_fields.json" + "coverage = !./ocdscardinal coverage $file_name\n", + "fields = (\n", + " pd.DataFrame.from_dict(json.loads(coverage[0]), orient=\"index\", columns=[\"count\"])\n", + " .reset_index()\n", + " .rename(columns={\"index\": \"path\"})\n", + ")" ] }, { @@ -688,19 +685,122 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "id": "u8jlJP9VH7Xi" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pathcount
0id277
2tag277
3date277
4ocid277
5buyer/id277
.........
278workDetails146
279contracts/relatedProcesses/id7
280contracts/relatedProcesses/title7
282contracts/relatedProcesses/relationship7
285contracts/relatedProcesses7
\n", + "

207 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " path count\n", + "0 id 277\n", + "2 tag 277\n", + "3 date 277\n", + "4 ocid 277\n", + "5 buyer/id 277\n", + ".. ... ...\n", + "278 workDetails 146\n", + "279 contracts/relatedProcesses/id 7\n", + "280 contracts/relatedProcesses/title 7\n", + "282 contracts/relatedProcesses/relationship 7\n", + "285 contracts/relatedProcesses 7\n", + "\n", + "[207 rows x 2 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "fields = (\n", - " pd.DataFrame(pd.read_json(\"result_fields.json\", typ=\"series\"), columns=[\"releases\"])\n", - " .rename_axis(\"path\")\n", - " .reset_index()\n", - ")\n", "# Leaves only object members\n", - "fields_table = fields[fields.path.str.contains(\"[a-z]$\")]\n", + "fields_table = fields[fields.path.str.contains(\"[a-z]$\")].copy()\n", "fields_table[\"path\"] = fields_table[\"path\"].str.replace(r\"[][]|^/\", \"\", regex=True)\n", "fields_table" ]