From 4ebcf11e21a2d749911e4f3a9c57df1404abbeaf Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 14 Jan 2024 07:48:46 +0000 Subject: [PATCH] fixups --- docs/abs.md | 61 +++++++++++++++++++++++-------------------- docs/index.md | 6 ++--- docs/noop.md | 1 - docs/prerequisites.md | 22 ++++++++++++++++ run.py | 5 +++- 5 files changed, 61 insertions(+), 34 deletions(-) diff --git a/docs/abs.md b/docs/abs.md index ed89117..0d37e68 100644 --- a/docs/abs.md +++ b/docs/abs.md @@ -25,9 +25,9 @@ Next, let's define `abs_i64` on the Rust side. The general idea will be: - A Series is backed by a ChunkedArray. Each chunk is an Arrow Array - which is continuous in memory. So we're going to start by iterating - over chunks. -- For each chunk, we iterate over the elements in that array. + which is contiguous in memory. So we're going to start by iterating + over chunks by calling `downcast_iter`. +- For each chunk, we iterate over the elements in that array (`into_iter`) - Each element can either be `Some(i64)`, or `None`. If it's `None`, we return `None`, whereas if it's `Some(i64)`, then we take its absolute value. @@ -50,7 +50,34 @@ fn abs_i64(inputs: &[Series]) -> PolarsResult { } ``` -NOTE: there are faster ways of implementing this particular operation. If you +Let's try this out. Make a Python file `run.py` with the following: +```python +import polars as pl +import minimal_plugin # noqa: F401 + +df = pl.DataFrame({ + 'a': [1, -1, None], + 'b': [4.1, 5.2, -6.3], + 'c': ['hello', 'everybody!', '!'] +}) +print(df.with_columns(pl.col('a').mp.abs_i64().name.suffix('_abs'))) +``` +If this outputs +``` +shape: (3, 4) +┌──────┬──────┬────────────┬───────┐ +│ a ┆ b ┆ c ┆ a_abs │ +│ --- ┆ --- ┆ --- ┆ --- │ +│ i64 ┆ f64 ┆ str ┆ i64 │ +╞══════╪══════╪════════════╪═══════╡ +│ 1 ┆ 4.1 ┆ hello ┆ 1 │ +│ -1 ┆ 5.2 ┆ everybody! ┆ 1 │ +│ null ┆ -6.3 ┆ ! ┆ null │ +└──────┴──────┴────────────┴───────┘ +``` +then you did everything correctly! + +> NOTE: there are faster ways of implementing this particular operation. If you look at the Polars source code, you'll see that it's a bit different there. The purpose of this exercise is to show you an implementation which is explicit and generalisable enough that you can customise it according to your @@ -130,29 +157,7 @@ fn abs_numeric(inputs: &[Series]) -> PolarsResult { } ``` -Let's try this out: +Now, if you return to `run.py`, you should be able to run ```python -import polars as pl -import minimal_plugin # noqa: F401 - -df = pl.DataFrame({ - 'a': [1, -1, None], - 'b': [4.1, 5.2, -6.3], - 'c': ['hello', 'everybody!', '!'] -}) print(df.with_columns(pl.col('a', 'b').mp.abs_numeric().name.suffix('_abs'))) -``` -If this outputs -``` -shape: (3, 5) -┌──────┬──────┬────────────┬───────┬───────┐ -│ a ┆ b ┆ c ┆ a_abs ┆ b_abs │ -│ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ i64 ┆ f64 ┆ str ┆ i64 ┆ f64 │ -╞══════╪══════╪════════════╪═══════╪═══════╡ -│ 1 ┆ 4.1 ┆ hello ┆ 1 ┆ 4.1 │ -│ -1 ┆ 5.2 ┆ everybody! ┆ 1 ┆ 5.2 │ -│ null ┆ -6.3 ┆ ! ┆ null ┆ 6.3 │ -└──────┴──────┴────────────┴───────┴───────┘ -``` -then you did everything correctly! \ No newline at end of file +``` \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 7a9a3b4..cfe068c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -6,9 +6,7 @@ ![](assets/image.png){: style="width:400px"} -This tutorial is just meant to get you started. We'll make a Polars Plugin with -some basic functionality - you can then customise it according to your own needs! +This tutorial is just meant to get you started. Once you've understood the +basics, you can customise the examples according to your needs. Code to follow along is here: https://github.com/MarcoGorelli/polars-plugins-tutorial. - -Let's get this party started! diff --git a/docs/noop.md b/docs/noop.md index b3cc084..b3e6d4f 100644 --- a/docs/noop.md +++ b/docs/noop.md @@ -10,7 +10,6 @@ Here are the files we'll need to create: - `Cargo.toml`: file with Rust dependencies. - `pyproject.toml`: file with Python build info. -- `requirements.txt`: Python build dependencies. Start by copying the `Cargo.toml` and `pyproject.toml` files from this repository - they contain the diff --git a/docs/prerequisites.md b/docs/prerequisites.md index 2ea8eca..7d4637a 100644 --- a/docs/prerequisites.md +++ b/docs/prerequisites.md @@ -1,5 +1,27 @@ # 0. Prerequisites +## Knowledge + +> "But you know what I like more than materialistic things? Knowledge." Tai Lopez + +How much Rust do you need to know to write your own Polars plugin? Less than +you think. + +If you pick up [The Rust Programming Language](https://doc.rust-lang.org/book/) +and can make it through the first 9 chapters, then I postulate +that you'll have enough knowledge at least 99% of inefficient `map_elements` +calls. +If you want to make a plugin which is generic enough that you can share +it with others, then you may need chapter 10 as well. + +You'll also need basic Python knowledge: classes, decorators, and functions. + +Alternatively, you could just clone this repo and then hack away +at the examples trial-and-error style until you get what you're looking +for - the compiler will probably help you more than you're expecting. + +## Software + First, you should probably make new directory for this project. Either clone https://github.com/MarcoGorelli/polars-plugins-tutorial, or make a new directory. diff --git a/run.py b/run.py index 6308396..701089f 100644 --- a/run.py +++ b/run.py @@ -1,9 +1,12 @@ import polars as pl import minimal_plugin # noqa: F401 +import polars as pl +import minimal_plugin # noqa: F401 + df = pl.DataFrame({ 'a': [1, -1, None], 'b': [4.1, 5.2, -6.3], 'c': ['hello', 'everybody!', '!'] }) -print(df.with_columns(pl.col('a', 'b').mp.abs_numeric().name.suffix('_abs'))) +print(df.with_columns(pl.col('a').mp.abs_numeric().name.suffix('_abs'))) \ No newline at end of file