Skip to content

Commit

Permalink
fix: fix issues in setup_registry
Browse files Browse the repository at this point in the history
  • Loading branch information
yolile committed Feb 10, 2024
1 parent 747f4de commit e46a9c8
Show file tree
Hide file tree
Showing 2 changed files with 186 additions and 85 deletions.
87 changes: 44 additions & 43 deletions component_setup_registry.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,25 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {
"id": "6syz0fkkEdgj"
},
"outputs": [],
"source": [
"! curl -sSOL https://github.com/open-contracting/cardinal-rs/releases/download/0.0.5/ocdscardinal-0.0.5-linux-64-bit.zip",
"outputs": [

This comment has been minimized.

Copy link
@jpmckinney

jpmckinney Feb 12, 2024

Member

Please empty the outputs arrays, and set execution_count back to null.

This comment has been minimized.

Copy link
@jpmckinney

jpmckinney Feb 12, 2024

Member

This comment has been minimized.

Copy link
@yolile

yolile Feb 12, 2024

Author Member

Done, thanks

{
"name": "stdout",
"output_type": "stream",
"text": [
"Archive: ocdscardinal-0.0.5-linux-64-bit.zip\r\n",
" inflating: ocdscardinal \r\n"
]
}
],
"source": [
"! curl -sSOL https://github.com/open-contracting/cardinal-rs/releases/download/0.0.5/ocdscardinal-0.0.5-linux-64-bit.zip\n",
"! unzip -oj ocdscardinal-0.0.5-linux-64-bit.zip ocdscardinal-0.0.5-linux-64-bit/ocdscardinal\n",
"! ls"
"\n",
"import os"
]
},
{
Expand All @@ -64,7 +74,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {
"id": "qauxqGNfb0-w"
},
Expand All @@ -75,7 +85,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {
"id": "RLox7K6WEuuf"
},
Expand All @@ -98,46 +108,27 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {
"id": "E5DV4-cKfhr8"
},
"outputs": [],
"source": [
"file_gz = input(\"Name of .jsonl.gz file\")"
"for file in os.listdir(\".\"):\n",
" if file.endswith(\".gz\"):\n",
" file_name = file.replace(\".gz\", \"\")\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "kGXWGevbfk0m"
},
"outputs": [],
"source": [
"file_jsonl = file.replace(\".gz\", \"\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {
"id": "AvFAxSCP0LSR"
},
"outputs": [],
"source": [
"! gunzip -f \"$file_gz\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "19bdlU6bfo-3"
},
"outputs": [],
"source": [
"! ls -lh \"$file_jsonl\""
"! gunzip -f \"$file_name\""
]
},
{
Expand All @@ -160,13 +151,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"metadata": {
"id": "mEHY4LwZHMU4"
},
"outputs": [],
"source": [
"! ./ocdscardinal coverage \"$file_jsonl\" >> result_fields.json"
"coverage = !./ocdscardinal coverage $file_name\n",
"fields = (\n",
" pd.DataFrame.from_dict(json.loads(coverage[0]), orient=\"index\", columns=[\"count\"])\n",
" .reset_index()\n",
" .rename(columns={\"index\": \"path\"})\n",
")"
]
},
{
Expand All @@ -180,19 +176,24 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 12,
"metadata": {
"id": "u8jlJP9VH7Xi"
},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": " path count\n0 id 277\n2 tag 277\n3 date 277\n4 ocid 277\n5 buyer/id 277\n.. ... ...\n278 workDetails 146\n279 contracts/relatedProcesses/id 7\n280 contracts/relatedProcesses/title 7\n282 contracts/relatedProcesses/relationship 7\n285 contracts/relatedProcesses 7\n\n[207 rows x 2 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>path</th>\n <th>count</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>id</td>\n <td>277</td>\n </tr>\n <tr>\n <th>2</th>\n <td>tag</td>\n <td>277</td>\n </tr>\n <tr>\n <th>3</th>\n <td>date</td>\n <td>277</td>\n </tr>\n <tr>\n <th>4</th>\n <td>ocid</td>\n <td>277</td>\n </tr>\n <tr>\n <th>5</th>\n <td>buyer/id</td>\n <td>277</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>278</th>\n <td>workDetails</td>\n <td>146</td>\n </tr>\n <tr>\n <th>279</th>\n <td>contracts/relatedProcesses/id</td>\n <td>7</td>\n </tr>\n <tr>\n <th>280</th>\n <td>contracts/relatedProcesses/title</td>\n <td>7</td>\n </tr>\n <tr>\n <th>282</th>\n <td>contracts/relatedProcesses/relationship</td>\n <td>7</td>\n </tr>\n <tr>\n <th>285</th>\n <td>contracts/relatedProcesses</td>\n <td>7</td>\n </tr>\n </tbody>\n</table>\n<p>207 rows × 2 columns</p>\n</div>"
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fields = (\n",
" pd.DataFrame(pd.read_json(\"result_fields.json\", typ=\"series\"), columns=[\"releases\"])\n",
" .rename_axis(\"path\")\n",
" .reset_index()\n",
")\n",
"# Leaves only object members\n",
"fields_table = fields[fields.path.str.contains(\"[a-z]$\")]\n",
"fields_table = fields[fields.path.str.contains(\"[a-z]$\")].copy()\n",
"fields_table[\"path\"] = fields_table[\"path\"].str.replace(r\"[][]|^/\", \"\", regex=True)\n",
"fields_table"
]
Expand Down
Loading

0 comments on commit e46a9c8

Please sign in to comment.