From 20071e5a2696a2b87358b901ec5e176cafc53bb2 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Mon, 17 Jun 2024 12:34:52 +0100
Subject: [PATCH 1/4] docs(website): perform casts for PyTorch as needed

---
 docs/tutorial/scikit-learn.qmd | 6 +-----
 docs/tutorial/xgboost.qmd      | 6 +-----
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/docs/tutorial/scikit-learn.qmd b/docs/tutorial/scikit-learn.qmd
index 042f0e1..53a5637 100644
--- a/docs/tutorial/scikit-learn.qmd
+++ b/docs/tutorial/scikit-learn.qmd
@@ -81,8 +81,7 @@ weather
 flight_data = (
     flights.mutate(
         # Convert the arrival delay to a factor
-        # By default, PyTorch expects the target to have a Long datatype
-        arr_delay=ibis.ifelse(flights.arr_delay >= 30, 1, 0).cast("int64"),
+        arr_delay=ibis.ifelse(flights.arr_delay >= 30, 1, 0),
         # We will use the date (not date-time) in the recipe below
         date=flights.time_hour.date(),
     )
@@ -167,9 +166,6 @@ flights_rec = ml.Recipe(
     ml.DropZeroVariance(ml.everything()),
     ml.MutateAt("dep_time", ibis._.hour() * 60 + ibis._.minute()),
     ml.MutateAt(ml.timestamp(), ibis._.epoch_seconds()),
-    # By default, PyTorch requires that the type of `X` is `np.float32`.
-    # https://discuss.pytorch.org/t/mat1-and-mat2-must-have-the-same-dtype-but-got-double-and-float/197555/2
-    ml.Cast(ml.numeric(), "float32"),
 )
 ```
 
diff --git a/docs/tutorial/xgboost.qmd b/docs/tutorial/xgboost.qmd
index 8f37f48..9ef7a27 100644
--- a/docs/tutorial/xgboost.qmd
+++ b/docs/tutorial/xgboost.qmd
@@ -81,8 +81,7 @@ weather
 flight_data = (
     flights.mutate(
         # Convert the arrival delay to a factor
-        # By default, PyTorch expects the target to have a Long datatype
-        arr_delay=ibis.ifelse(flights.arr_delay >= 30, 1, 0).cast("int64"),
+        arr_delay=ibis.ifelse(flights.arr_delay >= 30, 1, 0),
         # We will use the date (not date-time) in the recipe below
         date=flights.time_hour.date(),
     )
@@ -167,9 +166,6 @@ flights_rec = ml.Recipe(
     ml.DropZeroVariance(ml.everything()),
     ml.MutateAt("dep_time", ibis._.hour() * 60 + ibis._.minute()),
     ml.MutateAt(ml.timestamp(), ibis._.epoch_seconds()),
-    # By default, PyTorch requires that the type of `X` is `np.float32`.
-    # https://discuss.pytorch.org/t/mat1-and-mat2-must-have-the-same-dtype-but-got-double-and-float/197555/2
-    ml.Cast(ml.numeric(), "float32"),
 )
 ```
 

From a4919fb973e5a725dc9678d78f67697d393183f8 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Mon, 17 Jun 2024 12:57:29 +0100
Subject: [PATCH 2/4] docs: acknowledge original source for the tutorial

---
 docs/tutorial/_acknowledgments.md                |  3 +++
 docs/tutorial/pytorch.qmd                        |  2 ++
 docs/tutorial/scikit-learn.qmd                   |  2 ++
 docs/tutorial/xgboost.qmd                        |  2 ++
 examples/Preprocess your data with recipes.ipynb | 10 ++++++++++
 5 files changed, 19 insertions(+)
 create mode 100644 docs/tutorial/_acknowledgments.md

diff --git a/docs/tutorial/_acknowledgments.md b/docs/tutorial/_acknowledgments.md
new file mode 100644
index 0000000..af0fdb7
--- /dev/null
+++ b/docs/tutorial/_acknowledgments.md
@@ -0,0 +1,3 @@
+## Acknowledgments
+
+This tutorial is derived from the [tidymodels article of the same name](https://www.tidymodels.org/start/recipes/). The transformation logic is very similar, and much of the text is copied verbatim.
diff --git a/docs/tutorial/pytorch.qmd b/docs/tutorial/pytorch.qmd
index b2c8c97..b616210 100644
--- a/docs/tutorial/pytorch.qmd
+++ b/docs/tutorial/pytorch.qmd
@@ -240,3 +240,5 @@ X_test = test_data.drop("arr_delay")
 y_test = test_data.arr_delay
 pipe.score(X_test, y_test)
 ```
+
+{{< include _acknowledgments.md >}}
diff --git a/docs/tutorial/scikit-learn.qmd b/docs/tutorial/scikit-learn.qmd
index 53a5637..5131659 100644
--- a/docs/tutorial/scikit-learn.qmd
+++ b/docs/tutorial/scikit-learn.qmd
@@ -207,3 +207,5 @@ X_test = test_data.drop("arr_delay")
 y_test = test_data.arr_delay
 pipe.score(X_test, y_test)
 ```
+
+{{< include _acknowledgments.md >}}
diff --git a/docs/tutorial/xgboost.qmd b/docs/tutorial/xgboost.qmd
index 9ef7a27..70b231b 100644
--- a/docs/tutorial/xgboost.qmd
+++ b/docs/tutorial/xgboost.qmd
@@ -207,3 +207,5 @@ X_test = test_data.drop("arr_delay")
 y_test = test_data.arr_delay
 pipe.score(X_test, y_test)
 ```
+
+{{< include _acknowledgments.md >}}
diff --git a/examples/Preprocess your data with recipes.ipynb b/examples/Preprocess your data with recipes.ipynb
index 14d1139..0263a68 100644
--- a/examples/Preprocess your data with recipes.ipynb	
+++ b/examples/Preprocess your data with recipes.ipynb	
@@ -1221,6 +1221,16 @@
     "y_test = test_data.arr_delay\n",
     "pipe.score(X_test, y_test)"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cc21b842-b85c-4ed9-af03-1feace909172",
+   "metadata": {},
+   "source": [
+    "## Acknowledgments\n",
+    "\n",
+    "This tutorial is derived from the [tidymodels article of the same name](https://www.tidymodels.org/start/recipes/). The transformation logic is very similar, and much of the text is copied verbatim."
+   ]
   }
  ],
  "metadata": {

From 5337593cdbcdf3881b4f23464c47c35d817d8195 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Mon, 17 Jun 2024 13:01:07 +0100
Subject: [PATCH 3/4] docs: replace "data set" with "dataset" everywhere

---
 docs/tutorial/pytorch.qmd                        | 4 ++--
 docs/tutorial/scikit-learn.qmd                   | 4 ++--
 docs/tutorial/xgboost.qmd                        | 4 ++--
 examples/Preprocess your data with recipes.ipynb | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/tutorial/pytorch.qmd b/docs/tutorial/pytorch.qmd
index b616210..94a09dd 100644
--- a/docs/tutorial/pytorch.qmd
+++ b/docs/tutorial/pytorch.qmd
@@ -28,7 +28,7 @@ pip install 'ibis-framework[duckdb,examples]' ibis-ml skorch torch
 
 ## The New York City flight data
 
-Let's use the [nycflights13 data](https://github.com/hadley/nycflights13) to predict whether a plane arrives more than 30 minutes late. This data set contains information on 325,819 flights departing near New York City in 2013. Let's start by loading the data and making a few changes to the variables:
+Let's use the [nycflights13 data](https://github.com/hadley/nycflights13) to predict whether a plane arrives more than 30 minutes late. This dataset contains information on 325,819 flights departing near New York City in 2013. Let's start by loading the data and making a few changes to the variables:
 
 ```{python}
 #| output: false
@@ -107,7 +107,7 @@ flight_data = (
 flight_data
 ```
 
-We can see that about 16% of the flights in this data set arrived more than 30 minutes late.
+We can see that about 16% of the flights in this dataset arrived more than 30 minutes late.
 
 ```{python}
 flight_data.arr_delay.value_counts().rename(n="arr_delay_count").mutate(
diff --git a/docs/tutorial/scikit-learn.qmd b/docs/tutorial/scikit-learn.qmd
index 5131659..cefdf96 100644
--- a/docs/tutorial/scikit-learn.qmd
+++ b/docs/tutorial/scikit-learn.qmd
@@ -28,7 +28,7 @@ pip install 'ibis-framework[duckdb,examples]' ibis-ml scikit-learn
 
 ## The New York City flight data
 
-Let's use the [nycflights13 data](https://github.com/hadley/nycflights13) to predict whether a plane arrives more than 30 minutes late. This data set contains information on 325,819 flights departing near New York City in 2013. Let's start by loading the data and making a few changes to the variables:
+Let's use the [nycflights13 data](https://github.com/hadley/nycflights13) to predict whether a plane arrives more than 30 minutes late. This dataset contains information on 325,819 flights departing near New York City in 2013. Let's start by loading the data and making a few changes to the variables:
 
 ```{python}
 #| output: false
@@ -106,7 +106,7 @@ flight_data = (
 flight_data
 ```
 
-We can see that about 16% of the flights in this data set arrived more than 30 minutes late.
+We can see that about 16% of the flights in this dataset arrived more than 30 minutes late.
 
 ```{python}
 flight_data.arr_delay.value_counts().rename(n="arr_delay_count").mutate(
diff --git a/docs/tutorial/xgboost.qmd b/docs/tutorial/xgboost.qmd
index 70b231b..8f53aff 100644
--- a/docs/tutorial/xgboost.qmd
+++ b/docs/tutorial/xgboost.qmd
@@ -28,7 +28,7 @@ pip install 'ibis-framework[duckdb,examples]' ibis-ml 'xgboost[scikit-learn]'
 
 ## The New York City flight data
 
-Let's use the [nycflights13 data](https://github.com/hadley/nycflights13) to predict whether a plane arrives more than 30 minutes late. This data set contains information on 325,819 flights departing near New York City in 2013. Let's start by loading the data and making a few changes to the variables:
+Let's use the [nycflights13 data](https://github.com/hadley/nycflights13) to predict whether a plane arrives more than 30 minutes late. This dataset contains information on 325,819 flights departing near New York City in 2013. Let's start by loading the data and making a few changes to the variables:
 
 ```{python}
 #| output: false
@@ -106,7 +106,7 @@ flight_data = (
 flight_data
 ```
 
-We can see that about 16% of the flights in this data set arrived more than 30 minutes late.
+We can see that about 16% of the flights in this dataset arrived more than 30 minutes late.
 
 ```{python}
 flight_data.arr_delay.value_counts().rename(n="arr_delay_count").mutate(
diff --git a/examples/Preprocess your data with recipes.ipynb b/examples/Preprocess your data with recipes.ipynb
index 0263a68..6c4a4b7 100644
--- a/examples/Preprocess your data with recipes.ipynb	
+++ b/examples/Preprocess your data with recipes.ipynb	
@@ -33,7 +33,7 @@
    "source": [
     "## The New York City flight data\n",
     "\n",
-    "Let's use the [nycflights13 data](https://github.com/hadley/nycflights13) to predict whether a plane arrives more than 30 minutes late. This data set contains information on 325,819 flights departing near New York City in 2013. Let's start by loading the data and making a few changes to the variables:"
+    "Let's use the [nycflights13 data](https://github.com/hadley/nycflights13) to predict whether a plane arrives more than 30 minutes late. This dataset contains information on 325,819 flights departing near New York City in 2013. Let's start by loading the data and making a few changes to the variables:"
    ]
   },
   {
@@ -317,7 +317,7 @@
    "id": "722b2213-3b84-4f03-9006-59bf72591613",
    "metadata": {},
    "source": [
-    "We can see that about 16% of the flights in this data set arrived more than 30 minutes late."
+    "We can see that about 16% of the flights in this dataset arrived more than 30 minutes late."
    ]
   },
   {

From d9fca48ba3d96ee8e264366ccd2545524b7ffa12 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Mon, 17 Jun 2024 14:59:09 +0100
Subject: [PATCH 4/4] docs(website): move support matrix under reference

---
 docs/_quarto.yml                                          | 6 ++++--
 .../support-matrix/index.qmd}                             | 8 ++++----
 docs/{ => reference/support-matrix}/step_config.yml       | 0
 docs/{ => reference/support-matrix}/support_matrix.py     | 0
 4 files changed, 8 insertions(+), 6 deletions(-)
 rename docs/{support_matrix.qmd => reference/support-matrix/index.qmd} (94%)
 rename docs/{ => reference/support-matrix}/step_config.yml (100%)
 rename docs/{ => reference/support-matrix}/support_matrix.py (100%)

diff --git a/docs/_quarto.yml b/docs/_quarto.yml
index f00b0e4..48b0785 100644
--- a/docs/_quarto.yml
+++ b/docs/_quarto.yml
@@ -48,8 +48,6 @@ website:
       - text: "Tutorial"
         href: tutorial/index.qmd
       - sidebar:reference
-      - text: "Support Matrix"
-        href: support_matrix.qmd
     tools:
       - icon: github
         menu:
@@ -82,6 +80,10 @@ website:
             - reference/steps-temporal.qmd
             - reference/steps-other.qmd
 
+        - section: Support
+          contents:
+            - reference/support-matrix/index.qmd
+
 format:
   html:
     theme:
diff --git a/docs/support_matrix.qmd b/docs/reference/support-matrix/index.qmd
similarity index 94%
rename from docs/support_matrix.qmd
rename to docs/reference/support-matrix/index.qmd
index efc5438..77ba107 100644
--- a/docs/support_matrix.qmd
+++ b/docs/reference/support-matrix/index.qmd
@@ -20,7 +20,7 @@ varies:
 1. ✅ Fully supported
 2. 🚫 Not supported
 3. 🟡 Partial support (hover over for more information)
-4. 🔍 Support varies by operation or data type across different backends (check [operation support matrix](https://ibis-project.org/support_matrix) for details)
+4. 🔍 Support varies by operation or data type across different backends (check the [Ibis operation support matrix](https://ibis-project.org/backends/support/matrix) for details)
 
 :::
 :::
@@ -65,11 +65,10 @@ dict(value=len(ibis.util.backend_entry_points()) - 3, color="green", icon="datab
 ```{python}
 from itables import show
 from support_matrix import make_support_matrix
-import re
-
 
 matrix = make_support_matrix()
 
+
 def custom_replace(value):
     if value is True:
         return "✅"
@@ -80,10 +79,11 @@ def custom_replace(value):
     else:
         return f"<span title='Support: {value}'>🟡</span>"
 
+
 show(
     matrix.applymap(custom_replace),
     ordering=False,
     paging=False,
     buttons=["copy", "excel", "csv"],
 )
-```
\ No newline at end of file
+```
diff --git a/docs/step_config.yml b/docs/reference/support-matrix/step_config.yml
similarity index 100%
rename from docs/step_config.yml
rename to docs/reference/support-matrix/step_config.yml
diff --git a/docs/support_matrix.py b/docs/reference/support-matrix/support_matrix.py
similarity index 100%
rename from docs/support_matrix.py
rename to docs/reference/support-matrix/support_matrix.py