diff --git a/component_setup_registry.ipynb b/component_setup_registry.ipynb
index 07338f0..0418295 100644
--- a/component_setup_registry.ipynb
+++ b/component_setup_registry.ipynb
@@ -29,15 +29,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {
"id": "6syz0fkkEdgj"
},
- "outputs": [],
- "source": [
- "! curl -sSOL https://github.com/open-contracting/cardinal-rs/releases/download/0.0.5/ocdscardinal-0.0.5-linux-64-bit.zip",
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Archive: ocdscardinal-0.0.5-linux-64-bit.zip\r\n",
+ " inflating: ocdscardinal \r\n"
+ ]
+ }
+ ],
+ "source": [
+ "! curl -sSOL https://github.com/open-contracting/cardinal-rs/releases/download/0.0.5/ocdscardinal-0.0.5-linux-64-bit.zip\n",
"! unzip -oj ocdscardinal-0.0.5-linux-64-bit.zip ocdscardinal-0.0.5-linux-64-bit/ocdscardinal\n",
- "! ls"
+ "\n",
+ "import os"
]
},
{
@@ -64,7 +74,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {
"id": "qauxqGNfb0-w"
},
@@ -75,7 +85,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {
"id": "RLox7K6WEuuf"
},
@@ -98,46 +108,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {
"id": "E5DV4-cKfhr8"
},
"outputs": [],
"source": [
- "file_gz = input(\"Name of .jsonl.gz file\")"
+ "for file in os.listdir(\".\"):\n",
+ " if file.endswith(\".gz\"):\n",
+ " file_name = file.replace(\".gz\", \"\")\n",
+ " break"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "kGXWGevbfk0m"
- },
- "outputs": [],
- "source": [
- "file_jsonl = file.replace(\".gz\", \"\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {
"id": "AvFAxSCP0LSR"
},
"outputs": [],
"source": [
- "! gunzip -f \"$file_gz\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "19bdlU6bfo-3"
- },
- "outputs": [],
- "source": [
- "! ls -lh \"$file_jsonl\""
+ "! gunzip -f \"$file_name\""
]
},
{
@@ -160,13 +151,18 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {
"id": "mEHY4LwZHMU4"
},
"outputs": [],
"source": [
- "! ./ocdscardinal coverage \"$file_jsonl\" >> result_fields.json"
+ "coverage = !./ocdscardinal coverage $file_name\n",
+ "fields = (\n",
+ " pd.DataFrame.from_dict(json.loads(coverage[0]), orient=\"index\", columns=[\"count\"])\n",
+ " .reset_index()\n",
+ " .rename(columns={\"index\": \"path\"})\n",
+ ")"
]
},
{
@@ -180,19 +176,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {
"id": "u8jlJP9VH7Xi"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " path count\n0 id 277\n2 tag 277\n3 date 277\n4 ocid 277\n5 buyer/id 277\n.. ... ...\n278 workDetails 146\n279 contracts/relatedProcesses/id 7\n280 contracts/relatedProcesses/title 7\n282 contracts/relatedProcesses/relationship 7\n285 contracts/relatedProcesses 7\n\n[207 rows x 2 columns]",
+ "text/html": "
\n\n
\n \n \n | \n path | \n count | \n
\n \n \n \n 0 | \n id | \n 277 | \n
\n \n 2 | \n tag | \n 277 | \n
\n \n 3 | \n date | \n 277 | \n
\n \n 4 | \n ocid | \n 277 | \n
\n \n 5 | \n buyer/id | \n 277 | \n
\n \n ... | \n ... | \n ... | \n
\n \n 278 | \n workDetails | \n 146 | \n
\n \n 279 | \n contracts/relatedProcesses/id | \n 7 | \n
\n \n 280 | \n contracts/relatedProcesses/title | \n 7 | \n
\n \n 282 | \n contracts/relatedProcesses/relationship | \n 7 | \n
\n \n 285 | \n contracts/relatedProcesses | \n 7 | \n
\n \n
\n
207 rows × 2 columns
\n
"
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "fields = (\n",
- " pd.DataFrame(pd.read_json(\"result_fields.json\", typ=\"series\"), columns=[\"releases\"])\n",
- " .rename_axis(\"path\")\n",
- " .reset_index()\n",
- ")\n",
"# Leaves only object members\n",
- "fields_table = fields[fields.path.str.contains(\"[a-z]$\")]\n",
+ "fields_table = fields[fields.path.str.contains(\"[a-z]$\")].copy()\n",
"fields_table[\"path\"] = fields_table[\"path\"].str.replace(r\"[][]|^/\", \"\", regex=True)\n",
"fields_table"
]
diff --git a/template_usability_checks_registry.ipynb b/template_usability_checks_registry.ipynb
index d16570e..47124b2 100644
--- a/template_usability_checks_registry.ipynb
+++ b/template_usability_checks_registry.ipynb
@@ -538,14 +538,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {
"id": "6syz0fkkEdgj"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Archive: ocdscardinal-0.0.5-linux-64-bit.zip\r\n",
+ " inflating: ocdscardinal \r\n"
+ ]
+ }
+ ],
"source": [
- "! curl -sSOL https://github.com/open-contracting/cardinal-rs/releases/download/0.0.5/ocdscardinal-0.0.5-linux-64-bit.zip! unzip -oj ocdscardinal-0.0.5-linux-64-bit.zip ocdscardinal-0.0.5-linux-64-bit/ocdscardinal\n",
- "! ls"
+ "! curl -sSOL https://github.com/open-contracting/cardinal-rs/releases/download/0.0.5/ocdscardinal-0.0.5-linux-64-bit.zip\n",
+ "! unzip -oj ocdscardinal-0.0.5-linux-64-bit.zip ocdscardinal-0.0.5-linux-64-bit/ocdscardinal\n",
+ "\n",
+ "import os"
]
},
{
@@ -572,7 +583,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {
"id": "qauxqGNfb0-w"
},
@@ -583,7 +594,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {
"id": "RLox7K6WEuuf"
},
@@ -606,46 +617,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {
"id": "E5DV4-cKfhr8"
},
"outputs": [],
"source": [
- "file_gz = input(\"Name of .jsonl.gz file\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "kGXWGevbfk0m"
- },
- "outputs": [],
- "source": [
- "file_jsonl = file.replace(\".gz\", \"\")"
+ "for file in os.listdir(\".\"):\n",
+ " if file.endswith(\".gz\"):\n",
+ " file_name = file.replace(\".gz\", \"\")\n",
+ " break"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {
"id": "AvFAxSCP0LSR"
},
"outputs": [],
"source": [
- "! gunzip -f \"$file_gz\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "19bdlU6bfo-3"
- },
- "outputs": [],
- "source": [
- "! ls -lh \"$file_jsonl\""
+ "! gunzip -f \"$file_name\""
]
},
{
@@ -668,13 +660,18 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {
"id": "mEHY4LwZHMU4"
},
"outputs": [],
"source": [
- "! ./ocdscardinal coverage \"$file_jsonl\" >> result_fields.json"
+ "coverage = !./ocdscardinal coverage $file_name\n",
+ "fields = (\n",
+ " pd.DataFrame.from_dict(json.loads(coverage[0]), orient=\"index\", columns=[\"count\"])\n",
+ " .reset_index()\n",
+ " .rename(columns={\"index\": \"path\"})\n",
+ ")"
]
},
{
@@ -688,19 +685,122 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {
"id": "u8jlJP9VH7Xi"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " path | \n",
+ " count | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " id | \n",
+ " 277 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " tag | \n",
+ " 277 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " date | \n",
+ " 277 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " ocid | \n",
+ " 277 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " buyer/id | \n",
+ " 277 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 278 | \n",
+ " workDetails | \n",
+ " 146 | \n",
+ "
\n",
+ " \n",
+ " 279 | \n",
+ " contracts/relatedProcesses/id | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " 280 | \n",
+ " contracts/relatedProcesses/title | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " 282 | \n",
+ " contracts/relatedProcesses/relationship | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " 285 | \n",
+ " contracts/relatedProcesses | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
207 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " path count\n",
+ "0 id 277\n",
+ "2 tag 277\n",
+ "3 date 277\n",
+ "4 ocid 277\n",
+ "5 buyer/id 277\n",
+ ".. ... ...\n",
+ "278 workDetails 146\n",
+ "279 contracts/relatedProcesses/id 7\n",
+ "280 contracts/relatedProcesses/title 7\n",
+ "282 contracts/relatedProcesses/relationship 7\n",
+ "285 contracts/relatedProcesses 7\n",
+ "\n",
+ "[207 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "fields = (\n",
- " pd.DataFrame(pd.read_json(\"result_fields.json\", typ=\"series\"), columns=[\"releases\"])\n",
- " .rename_axis(\"path\")\n",
- " .reset_index()\n",
- ")\n",
"# Leaves only object members\n",
- "fields_table = fields[fields.path.str.contains(\"[a-z]$\")]\n",
+ "fields_table = fields[fields.path.str.contains(\"[a-z]$\")].copy()\n",
"fields_table[\"path\"] = fields_table[\"path\"].str.replace(r\"[][]|^/\", \"\", regex=True)\n",
"fields_table"
]