timothyjlaurent
diff --git a/‎.github/workflows/release.yaml
+2-2 b/‎.github/workflows/release.yaml
+2-2
diff --git a/‎.github/workflows/upload-assets.yaml
+8-8 b/‎.github/workflows/upload-assets.yaml
+8-8
diff --git a/‎Eval Board Syn.ipynb
+247 b/‎Eval Board Syn.ipynb
+247
@@ -4,7 +4,7 @@ on:
   workflow_dispatch:
     inputs:
       is_test:
-        description: 'Use Test Pypi'
+        description: "Use Test Pypi"
         required: true
         type: boolean
         default: true
@@ -14,7 +14,7 @@ jobs:
     name: Build and publish to pypi
     runs-on: ubuntu-8core
     timeout-minutes: 10
-    environment: 
+    environment:
       name: release
     permissions:
       id-token: write
 
@@ -11,11 +11,11 @@ jobs:
     name: Build frontend assets
     runs-on: ubuntu-8core
     timeout-minutes: 10
-    environment: 
+    environment:
       name: release
     permissions:
-      contents: 'write'
-      id-token: 'write'
+      contents: "write"
+      id-token: "write"
     steps:
       - uses: actions/checkout@v3
         with:
@@ -24,7 +24,7 @@ jobs:
       - uses: actions/setup-node@v1
         with:
           node-version: "16.x"
-      - id: 'build'
+      - id: "build"
         run: |
           ./weave/frontend/build.sh
           if [[ -z "$(git status weave/frontend/sha1.txt --porcelain)" ]]
@@ -36,16 +36,16 @@ jobs:
             git diff
             echo "UPLOAD_ASSETS=true" >> "$GITHUB_OUTPUT"
           fi
-      - id: 'auth'
-        name: 'Authenticate to Google Cloud'
-        uses: 'google-github-actions/auth@v1'
+      - id: "auth"
+        name: "Authenticate to Google Cloud"
+        uses: "google-github-actions/auth@v1"
         if: ${{ steps.build.outputs.UPLOAD_ASSETS == 'true' }}
         with:
           workload_identity_provider: ${{ secrets.WORKLOAD_IDENTITY_PROVIDER }}
           # the service account secret format is wrong, hard coding it until it is fixed in core
           #service_account: ${{ secrets.WORKLOAD_IDENTITY_SERVICE_ACCOUNT }}
           service_account: [email protected]
-      - id: 'upload-and-push'
+      - id: "upload-and-push"
         if: ${{ steps.build.outputs.UPLOAD_ASSETS == 'true' }}
         run: |
           ./weave/frontend/bundle.sh
 
@@ -0,0 +1,247 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "5214b543",
+   "metadata": {},
+   "source": [
+    "- Load two eval_results\n",
+    "\n",
+    "EvalResult\n",
+    "- example, label, result, item_summary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "314bb6b4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "import typing\n",
+    "import weave\n",
+    "import random\n",
+    "import string\n",
+    "from weave import weave_internal\n",
+    "weave.use_frontend_devmode()\n",
+    "from weave.panels import panel_board\n",
+    "from weave import ops_domain"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "550daef6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def rand_string_n(n: int) -> str:\n",
+    "    return \"\".join(\n",
+    "        random.choice(string.ascii_uppercase + string.digits) for _ in range(n)\n",
+    "    )\n",
+    "\n",
+    "dataset_raw = [{\n",
+    "    'id': str(i),\n",
+    "    'example': rand_string_n(10),\n",
+    "    'label': random.choice(string.ascii_uppercase)} for i in range(50)]\n",
+    "dataset = weave.save(dataset_raw, 'dataset')\n",
+    "#dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d0d930d8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def predict(dataset_row, config):\n",
+    "    if random.random() < config['correct_chance']:\n",
+    "        return dataset_row['label']\n",
+    "    return random.choice(string.ascii_uppercase)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eb86b95c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def evaluate(dataset, predict_config):\n",
+    "    eval_result = []\n",
+    "    correct_count = 0\n",
+    "    count = 0\n",
+    "    for dataset_row in dataset:\n",
+    "        start_time = time.time()\n",
+    "        result = predict(dataset_row, predict_config)\n",
+    "        latency = time.time() - start_time\n",
+    "        latency = random.gauss(predict_config['latency_mu'], predict_config['latency_sigma'])\n",
+    "        correct = dataset_row['label'] == result\n",
+    "        if correct:\n",
+    "            correct_count += 1\n",
+    "        count +=1 \n",
+    "        eval_result.append({\n",
+    "            'dataset_id': dataset_row['id'],\n",
+    "            'result': result,\n",
+    "            'summary': {\n",
+    "                'latency': latency,\n",
+    "                'correct': correct\n",
+    "            }\n",
+    "        })\n",
+    "    return {\n",
+    "        'config': predict_config,\n",
+    "        'eval_table': eval_result,\n",
+    "        'summary': {'accuracy': correct_count / len(dataset)}}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "05d16a5e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eval_result_raw0 = evaluate(dataset_raw, {'correct_chance': 0.5, 'latency_mu': 0.3, 'latency_sigma': 0.1})\n",
+    "eval_result_raw1 = evaluate(dataset_raw, {'correct_chance': 0.5, 'latency_mu': 0.4, 'latency_sigma': 0.2})\n",
+    "eval_result0 = weave.save(eval_result_raw0, 'eval_result0')\n",
+    "eval_result1 = weave.save(eval_result_raw1, 'eval_result1')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e8065ad6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "varbar = panel_board.varbar()\n",
+    "\n",
+    "dataset_var = varbar.add('dataset', dataset)\n",
+    "eval_result0_var = varbar.add('eval_result0', eval_result0)\n",
+    "eval_result1_var = varbar.add('eval_result1', eval_result1)\n",
+    "\n",
+    "summary = varbar.add('summary', weave.ops.make_list(\n",
+    "    a=weave.ops.TypedDict.merge(weave.ops.dict_(name='res0'), eval_result0_var['summary']),\n",
+    "    b=weave.ops.TypedDict.merge(weave.ops.dict_(name='res1'), eval_result1_var['summary']),\n",
+    "))\n",
+    "\n",
+    "weave.ops.make_list(a=eval_result0_var['eval_table'], b=eval_result0_var['eval_table'])\n",
+    "\n",
+    "concatted_evals = varbar.add('concatted_evals', weave.ops.List.concat(\n",
+    "    weave.ops.make_list(\n",
+    "        a=eval_result0_var['eval_table'].map(\n",
+    "            lambda row: weave.ops.TypedDict.merge(\n",
+    "                weave.ops.dict_(name='res0'), row)),\n",
+    "        b=eval_result1_var['eval_table'].map(\n",
+    "            lambda row: weave.ops.TypedDict.merge(\n",
+    "                weave.ops.dict_(name='res1'), row)))))\n",
+    "\n",
+    "# join evals together first\n",
+    "joined_evals = varbar.add('joined_evals', weave.ops.join_all(\n",
+    "    weave.ops.make_list(a=eval_result0_var['eval_table'], b=eval_result1_var['eval_table']),\n",
+    "    lambda row: row['dataset_id'],\n",
+    "    False))\n",
+    "\n",
+    "# then join dataset to evals\n",
+    "dataset_evals = varbar.add('dataset_evals', weave.ops.join_2(\n",
+    "    dataset_var,\n",
+    "    joined_evals,\n",
+    "    lambda row: row['id'],\n",
+    "    lambda row: row['dataset_id'][0],\n",
+    "    'dataset',\n",
+    "    'evals',\n",
+    "    False,\n",
+    "    False\n",
+    "))\n",
+    "\n",
+    "\n",
+    "main = weave.panels.Group(\n",
+    "        layoutMode=\"grid\",\n",
+    "        showExpressions=True,\n",
+    "        enableAddPanel=True,\n",
+    "    )\n",
+    "\n",
+    "#### Run/config info TODO\n",
+    "\n",
+    "#### Summary info\n",
+    "\n",
+    "main.add(\"accuracy\",\n",
+    "         weave.panels.Plot(summary,\n",
+    "                           x=lambda row: row['accuracy'],\n",
+    "                           y=lambda row: row['name'],\n",
+    "                           color=lambda row: row['name']\n",
+    "                          ),\n",
+    "         layout=weave.panels.GroupPanelLayout(x=0, y=0, w=12, h=4))\n",
+    "\n",
+    "\n",
+    "main.add(\"latency\",\n",
+    "         weave.panels.Plot(concatted_evals,\n",
+    "                           x=lambda row: row['summary']['latency'],\n",
+    "                           y=lambda row: row['name'],\n",
+    "                           color=lambda row: row['name'],\n",
+    "                           mark='boxplot'),\n",
+    "         layout=weave.panels.GroupPanelLayout(x=12, y=0, w=12, h=4))\n",
+    "\n",
+    "#ct = main.add('concat_t', concatted_evals, layout=weave.panels.GroupPanelLayout(x=0, y=4, w=24, h=12))\n",
+    "# main.add('dataset_table', dataset)\n",
+    "# main.add('joined_evals', joined_evals)\n",
+    "# main.add('dataset_evals', dataset_evals, layout=weave.panels.GroupPanelLayout(x=0, y=4, w=24, h=6))\n",
+    "\n",
+    "##### Example details\n",
+    "\n",
+    "# more ideas: show examples that all got wrong, or that are confusing\n",
+    "\n",
+    "faceted_view = weave.panels.Facet(dataset_evals,\n",
+    "                             x=lambda row: row['evals.summary'][0]['correct'],\n",
+    "                             y=lambda row: row['evals.summary'][1]['correct'],\n",
+    "                             select=lambda row: row.count())\n",
+    "\n",
+    "faceted = main.add('faceted', faceted_view, layout=weave.panels.GroupPanelLayout(x=0, y=4, w=12, h=6))\n",
+    "\n",
+    "main.add(\"example_latencies\",\n",
+    "         weave.panels.Plot(dataset_evals,\n",
+    "                           x=lambda row: row['evals.summary']['latency'][0],\n",
+    "                           y=lambda row: row['evals.summary']['latency'][1]),\n",
+    "         layout=weave.panels.GroupPanelLayout(x=12, y=4, w=12, h=6))\n",
+    "\n",
+    "faceted_sel = weave.panels.Table(faceted.selected())\n",
+    "faceted_sel.config.rowSize = 2\n",
+    "faceted_sel.add_column(lambda row: row['dataset.id'], 'id')\n",
+    "faceted_sel.add_column(lambda row: row['dataset.example'], 'example')\n",
+    "faceted_sel.add_column(lambda row: row['dataset.label'], 'label')\n",
+    "faceted_sel.add_column(lambda row: weave.ops.dict_(res0=row['evals.result'][0], res1=row['evals.result'][1]), 'result')\n",
+    "faceted_sel.add_column(lambda row: weave.ops.dict_(res0=row['evals.summary'][0]['correct'], res1=row['evals.summary'][1]['correct']), 'correct')\n",
+    "faceted_sel.add_column(lambda row: weave.ops.dict_(res0=row['evals.summary'][0]['latency'], res1=row['evals.summary'][1]['latency']), 'latency')\n",
+    "\n",
+    "main.add('faceted_sel', faceted_sel, layout=weave.panels.GroupPanelLayout(x=0, y=10, w=24, h=12))\n",
+    "\n",
+    "weave.panels.Board(vars=varbar, panels=main)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}