Skip to content

Commit c84afb2

Browse files
Tmonsteramoeba
andauthored
Rename R to R-arrow (#68)
* should change arrow to show R-arrow * new arrow benchmarks report solution as R-arrow * update arrow to R-arrow in a few more places * Fix remaining issues in Tmonster#10 (#13) * Fix remaining issues in arrow -> R-arrow rename * Fix bug in rename code in report.R The previous code was causing something wild to happen. The changed code is idiomatic code for replacing values in a data.frame based on a condition. --------- Co-authored-by: Bryce Mecum <[email protected]>
1 parent 4901623 commit c84afb2

File tree

15 files changed

+42
-35
lines changed

15 files changed

+42
-35
lines changed

.github/workflows/regression.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
strategy:
1818
fail-fast: false
1919
matrix:
20-
solution: [data.table, collapse, dplyr, pandas, pydatatable, spark, juliadf, juliads, polars, arrow, duckdb, duckdb-latest, datafusion]
20+
solution: [data.table, collapse, dplyr, pandas, pydatatable, spark, juliadf, juliads, polars, R-arrow, duckdb, duckdb-latest, datafusion]
2121
name: Regression Tests solo solutions
2222
runs-on: ubuntu-20.04
2323
env:

arrow/groupby-arrow.R renamed to R-arrow/groupby-R-arrow.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@ source("./_helpers/helpers.R")
77
stopifnot(requireNamespace("bit64", quietly=TRUE)) # used in chk to sum numeric columns
88
.libPaths("./arrow/r-arrow") # tidyverse/dplyr#4641 ## leave it like here in case if this affects arrow pkg as well
99
suppressPackageStartupMessages({
10-
library("arrow", lib.loc="./arrow/r-arrow", warn.conflicts=FALSE)
11-
library("dplyr", lib.loc="./arrow/r-arrow", warn.conflicts=FALSE)
10+
library("arrow", lib.loc="./R-arrow/r-arrow", warn.conflicts=FALSE)
11+
library("dplyr", lib.loc="./R-arrow/r-arrow", warn.conflicts=FALSE)
1212
})
1313
ver = packageVersion("arrow")
1414
git = ""
1515
task = "groupby"
16-
solution = "arrow"
16+
solution = "R-arrow"
1717
fun = "group_by"
1818
cache = TRUE
1919
on_disk = FALSE

arrow/join-arrow.R renamed to R-arrow/join-R-arrow.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@ source("./_helpers/helpers.R")
66

77
.libPaths("./arrow/r-arrow") # tidyverse/dplyr#4641 ## leave it like here in case if this affects arrow pkg as well
88
suppressPackageStartupMessages({
9-
library("arrow", lib.loc="./arrow/r-arrow", warn.conflicts=FALSE)
10-
library("dplyr", lib.loc="./arrow/r-arrow", warn.conflicts=FALSE)
9+
library("arrow", lib.loc="./R-arrow/r-arrow", warn.conflicts=FALSE)
10+
library("dplyr", lib.loc="./R-arrow/r-arrow", warn.conflicts=FALSE)
1111
})
1212
ver = packageVersion("arrow")
1313
git = ""
1414
task = "join"
15-
solution = "arrow"
15+
solution = "R-arrow"
1616
cache = TRUE
1717
on_disk = FALSE
1818

R-arrow/setup-R-arrow.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
set -e
3+
4+
# install stable arrow
5+
mkdir -p ./R-arrow/r-arrow
6+
Rscript -e 'install.packages(c("arrow","dplyr"), lib="./R-arrow/r-arrow")'

arrow/upg-arrow.sh renamed to R-arrow/upg-R-arrow.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ set -e
33

44
# upgrade all packages in arrow library only if new arrow is out
55
echo 'upgrading arrow...'
6-
Rscript -e 'ap=available.packages(); if (ap["arrow","Version"]!=packageVersion("arrow", lib.loc="./arrow/r-arrow")) update.packages(lib.loc="./arrow/r-arrow", ask=FALSE, checkBuilt=TRUE, quiet=TRUE)'
6+
Rscript -e 'ap=available.packages(); if (ap["arrow","Version"]!=packageVersion("arrow", lib.loc="./R-arrow/r-arrow")) update.packages(lib.loc="./R-arrow/r-arrow", ask=FALSE, checkBuilt=TRUE, quiet=TRUE)'

R-arrow/ver-R-arrow.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/bash
2+
set -e
3+
4+
Rscript -e 'v=read.dcf(system.file(package="arrow", lib.loc="./R-arrow/r-arrow", "DESCRIPTION"), fields=c("Version","RemoteSha")); colnames(v)[colnames(v)=="RemoteSha"]="Revision"; cnafill=function(x) {x=c(x); x[is.na(x)]=""; x}; fw=function(f, v) writeLines(v, file.path("R-arrow", f)); invisible(mapply(fw, toupper(colnames(v)), cnafill(v)))'

_benchplot/benchplot-dict.R

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ solution.dict = {list(
4242
"juliads" = list(name=c(short="IMD.jl", long="InMemoryDatasets.jl"), color=c(strong="#b80000", light="#ff1f1f")),
4343
"clickhouse" = list(name=c(short="clickhouse", long="ClickHouse"), color=c(strong="hotpink4", light="hotpink1")),
4444
"polars" = list(name=c(short="polars", long="Polars"), color=c(strong="deepskyblue4", light="deepskyblue3")),
45-
"arrow" = list(name=c(short="arrow", long="Arrow"), color=c(strong="aquamarine3", light="aquamarine1")),
45+
"R-arrow" = list(name=c(short="R-arrow", long="R-arrow"), color=c(strong="aquamarine3", light="aquamarine1")),
4646
"duckdb" = list(name=c(short="duckdb", long="DuckDB"), color=c(strong="#ddcd07", light="#fff100")),
4747
"duckdb-latest" = list(name=c(short="duckdb-latest", long="duckdb-latest"), color=c(strong="#ddcd07", light="#fff100")),
4848
"datafusion" = list(name=c(short="datafusion", long="Datafusion"), color=c(strong="deepskyblue4", light="deepskyblue3"))
@@ -199,7 +199,7 @@ groupby.syntax.dict = {list(
199199
"regression v1 v2 by id2 id4" = "DF.groupby(['id2','id4']).agg((pl.pearson_corr('v1','v2')**2).alias('r2')).collect()",
200200
"sum v3 count by id1:id6" = "DF.groupby(['id1','id2','id3','id4','id5','id6']).agg([pl.sum('v3').alias('v3'), pl.count('v1').alias('count')]).collect()"
201201
)},
202-
"arrow" = {c(
202+
"R-arrow" = {c(
203203
"sum v1 by id1" = "AT %>% group_by(id1) %>% summarise(v1=sum(v1, na.rm=TRUE))",
204204
"sum v1 by id1:id2" = "AT %>% group_by(id1, id2) %>% summarise(v1=sum(v1, na.rm=TRUE))",
205205
"sum v1 mean v3 by id3" = "AT %>% group_by(id3) %>% summarise(v1=sum(v1, na.rm=TRUE), v3=mean(v3, na.rm=TRUE))",
@@ -260,7 +260,7 @@ groupby.syntax.dict = {list(
260260
"juliads" = list(),
261261
"clickhouse" = list(),
262262
"polars" = list(),
263-
"arrow" = list("Expression row_number() <= 2L not supported in Arrow; pulling data into R" = "max v1 - min v2 by id3", "Expression cor(v1, v2, ... is not supported in arrow; pulling data into R" = "regression v1 v2 by id2 id4"),
263+
"R-arrow" = list("Expression row_number() <= 2L not supported in R-arrow; pulling data into R" = "max v1 - min v2 by id3", "Expression cor(v1, v2, ... is not supported in R-arrow; pulling data into R" = "regression v1 v2 by id2 id4"),
264264
"duckdb" = list(),
265265
"duckdb-latest" = list(),
266266
"datafusion" = list()
@@ -309,7 +309,7 @@ groupby.data.exceptions = {list(
309309
"polars" = {list(
310310
# "out of memory" = c("G1_1e9_1e2_0_0","G1_1e9_1e1_0_0","G1_1e9_2e0_0_0","G1_1e9_1e2_0_1","G1_1e9_1e2_5_0") # q10
311311
)},
312-
"arrow" = {list(
312+
"R-arrow" = {list(
313313
# "timeout" = c(), # q10
314314
"internal error" = c("G1_1e8_2e0_0_0", "G1_1e8_1e2_0_1", "G1_1e8_1e2_5_0", "G1_1e9_1e2_0_0","G1_1e9_1e2_0_1","G1_1e9_1e2_5_0","G1_1e9_1e1_0_0", # inherits from dplyr
315315
"G1_1e9_2e0_0_0"), # #190
@@ -413,7 +413,7 @@ join.syntax.dict = {list(
413413
"medium inner on factor" = "DF.merge(medium, on='id5')",
414414
"big inner on int" = "DF.merge(big, on='id3')"
415415
)},
416-
"arrow" = {c(
416+
"R-arrow" = {c(
417417
"small inner on int" = "inner_join(DF, small, by='id1')",
418418
"medium inner on int" = "inner_join(DF, medium, by='id2')",
419419
"medium outer on int" = "left_join(DF, medium, by='id2')",
@@ -454,7 +454,7 @@ join.query.exceptions = {list(
454454
"juliads" = list(),
455455
"clickhouse" = list(),
456456
"polars" = list(),
457-
"arrow" = list(),
457+
"R-arrow" = list(),
458458
"duckdb" = list(),
459459
"duckdb-latest" = list(),
460460
"datafusion" = list()
@@ -496,7 +496,7 @@ join.data.exceptions = {list(
496496
"polars" = {list(
497497
"out of memory" = c("J1_1e9_NA_0_0","J1_1e9_NA_5_0","J1_1e9_NA_0_1")
498498
)},
499-
"arrow" = {list(
499+
"R-arrow" = {list(
500500
"out of memory" = c("J1_1e9_NA_0_0","J1_1e9_NA_5_0","J1_1e9_NA_0_1", "J1_1e8_NA_0_0", "J1_1e8_NA_5_0", "J1_1e8_NA_0_1" )#,
501501
# "not yet implemented: #189" = c("J1_1e7_NA_0_0","J1_1e7_NA_5_0","J1_1e7_NA_0_1","J1_1e8_NA_0_0","J1_1e8_NA_5_0","J1_1e8_NA_0_1","J1_1e9_NA_0_0","J1_1e9_NA_5_0","J1_1e9_NA_0_1")
502502
)},

_control/solutions.csv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ clickhouse,groupby
2525
clickhouse,join
2626
polars,groupby
2727
polars,join
28-
arrow,groupby
29-
arrow,join
28+
R-arrow,groupby
29+
R-arrow,join
3030
duckdb,groupby
3131
duckdb,join
3232
duckdb-latest,groupby

_launcher/launcher.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ readret = function(x) {
1414
file.ext = function(x) {
1515
ans = switch(
1616
x,
17-
"collapse"=, "data.table"=, "dplyr"=, "h2o"=, "arrow"=, "duckdb"="R", "duckdb-latest"="R",
17+
"collapse"=, "data.table"=, "dplyr"=, "h2o"=, "R-arrow"=, "duckdb"="R", "duckdb-latest"="R",
1818
"pandas"=, "spark"=, "pydatatable"=, "modin"=, "dask"=, "datafusion"=, "polars"="py",
1919
"clickhouse"="sql",
2020
"juliadf"="jl", "juliads"="jl",

_launcher/solution.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ if ("quiet" %in% names(args)) {
110110
file.ext = function(x) {
111111
ans = switch(
112112
x,
113-
"collapse"=, "data.table"=, "dplyr"=, "h2o"=, "arrow"=, "duckdb"="R", "duckdb-latest"="R",
113+
"collapse"=, "data.table"=, "dplyr"=, "h2o"=, "R-arrow"=, "duckdb"="R", "duckdb-latest"="R",
114114
"pandas"="py", "spark"=, "pydatatable"=, "modin"=, "dask"=, "datafusion"=, "polars"="py",
115115
"clickhouse"="sql",
116116
"juliadf"="jl", "juliads"="jl"

0 commit comments

Comments
 (0)