ivelasq
diff --git a/‎_freeze/site_libs/quarto-listing/list.min.js
Lines changed: 1 addition & 1 deletion b/‎_freeze/site_libs/quarto-listing/list.min.js
Lines changed: 1 addition & 1 deletion
diff --git a/‎_freeze/site_libs/quarto-listing/quarto-listing.js
Lines changed: 14 additions & 4 deletions b/‎_freeze/site_libs/quarto-listing/quarto-listing.js
Lines changed: 14 additions & 4 deletions
diff --git a/‎_freeze/til-r/r-composition/index/execute-results/html.json
Lines changed: 17 additions & 0 deletions b/‎_freeze/til-r/r-composition/index/execute-results/html.json
Lines changed: 17 additions & 0 deletions
diff --git a/‎_freeze/til-r/r-composition/index/figure-html/unnamed-chunk-2-1.png
55.2 KB b/‎_freeze/til-r/r-composition/index/figure-html/unnamed-chunk-2-1.png
55.2 KB
diff --git a/‎_freeze/til-r/r-composition/index/figure-html/unnamed-chunk-3-1.png
51.6 KB b/‎_freeze/til-r/r-composition/index/figure-html/unnamed-chunk-3-1.png
51.6 KB
diff --git a/‎pipedream.Rproj
Lines changed: 1 addition & 0 deletions b/‎pipedream.Rproj
Lines changed: 1 addition & 0 deletions
diff --git a/‎til-r/r-composition/index.knit.md
Lines changed: 167 additions & 0 deletions b/‎til-r/r-composition/index.knit.md
Lines changed: 167 additions & 0 deletions
diff --git a/‎til-r/r-composition/index.qmd
Lines changed: 120 additions & 0 deletions b/‎til-r/r-composition/index.qmd
Lines changed: 120 additions & 0 deletions
@@ -2,6 +2,7 @@ const kProgressiveAttr = "data-src";
 let categoriesLoaded = false;
 
 window.quartoListingCategory = (category) => {
+  category = atob(category);
   if (categoriesLoaded) {
     activateCategory(category);
     setCategoryHash(category);
@@ -15,7 +16,9 @@ window["quarto-listing-loaded"] = () => {
   if (hash) {
     // If there is a category, switch to that
     if (hash.category) {
-      activateCategory(hash.category);
+      // category hash are URI encoded so we need to decode it before processing
+      // so that we can match it with the category element processed in JS
+      activateCategory(decodeURIComponent(hash.category));
     }
     // Paginate a specific listing
     const listingIds = Object.keys(window["quarto-listings"]);
@@ -58,7 +61,10 @@ window.document.addEventListener("DOMContentLoaded", function (_event) {
   );
 
   for (const categoryEl of categoryEls) {
-    const category = categoryEl.getAttribute("data-category");
+    // category needs to support non ASCII characters
+    const category = decodeURIComponent(
+      atob(categoryEl.getAttribute("data-category"))
+    );
     categoryEl.onclick = () => {
       activateCategory(category);
       setCategoryHash(category);
@@ -208,7 +214,9 @@ function activateCategory(category) {
 
   // Activate this category
   const categoryEl = window.document.querySelector(
-    `.quarto-listing-category .category[data-category='${category}'`
+    `.quarto-listing-category .category[data-category='${btoa(
+      encodeURIComponent(category)
+    )}']`
   );
   if (categoryEl) {
     categoryEl.classList.add("active");
@@ -231,7 +239,9 @@ function filterListingCategory(category) {
         list.filter(function (item) {
           const itemValues = item.values();
           if (itemValues.categories !== null) {
-            const categories = itemValues.categories.split(",");
+            const categories = decodeURIComponent(
+              atob(itemValues.categories)
+            ).split(",");
             return categories.includes(category);
           } else {
             return false;
 
@@ -0,0 +1,17 @@
+{
+  "hash": "add4d05eb93703b182620718d2b16338",
+  "result": {
+    "engine": "knitr",
+    "markdown": "---\ntitle: \"How to find out how much of R Core is R\"\ndate: \"2024-12-25\"\ncategory: R\noutput: html_document\n---\n\n\n\nCleaning out my computer as I get ready to switch to a new one has me running into old gems. So, when I say \"Today I learned,\" I really mean \"I learned this back in December 2021.\" 😅\n\nBack then, I gave a talk at Why R? called \n[Packages for Using R With Python, Tableau, and Other Tools](https://www.youtube.com/watch?v=vyA2EiIz4pI&feature=youtu.be). One part of the talk was about how R itself isn't just made up of R. \n\nI adapted [this classic blog post](https://librestats.wordpress.com/2011/08/27/how-much-of-r-is-written-in-r/) by wrathematics to explore the composition of the [R 4.1.2 source package](https://cran.r-project.org/src/base/R-4/). The post features a script that scans the `.R`, `.c`, and `.f` files in the source, then records the language (R, C, or Fortran) and the number of lines of code in each language to a CSV file. Keep in mind, I have almost no knowledge of Shell (and this was pre-ChatGPT days!), so it took me a bit to adapt the original script from 2011.\n\n```{.bash filename=\"shell.sh\"}\noutdir=\"./\"\n\nrdir=\"./R-4.1.2\" #eg, ~/R-2.13.1/\ncd $rdir/src\n\nfor rfile in `find . -type f -name *.R`\ndo\nloc=`wc -l $rfile | sed -e 's/ ./,/' -e 's/\\/[^/]*\\//\\//g' -e 's/\\/[^/]*\\//\\//g' -e 's/\\/[^/]*\\///g' -e 's/\\///'`\necho \"R,$loc\"  >> $outdir/r_source_loc.csv\ndone\n\nfor cfile in `find . -type f -name *.c`\ndo\nloc=`wc -l $cfile | sed -e 's/ ./,/' -e 's/\\/[^/]*\\//\\//g' -e 's/\\/[^/]*\\//\\//g' -e 's/\\/[^/]*\\///g' -e 's/\\///'`\necho \"C,$loc\"  >> $outdir/r_source_loc.csv\ndone\n\nfor ffile in `find . -type f -name *.f`\ndo\nloc=`wc -l $ffile | sed -e 's/ ./,/' -e 's/\\/[^/]*\\//\\//g' -e 's/\\/[^/]*\\//\\//g' -e 's/\\/[^/]*\\///g' -e 's/\\///'`\necho \"Fortran,$loc\"  >> $outdir/r_source_loc.csv\ndone\n```\n\nThe script creates a file called `r_source_loc.csv`. It shows the number of lines by programming language by script in R 4.1.2. We can read it into R:\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(dplyr)\nlibrary(stringr)\n\nr_loc <-\n  readr::read_table(here::here(\"til-r\", \"r-composition\", \"r_source_loc.csv\"),\n             col_names = c(\"language\", \"lines\", \"script\")) |> \n  mutate(language = case_when(str_detect(language, \"R,,\") ~ \"R\",\n                              str_detect(language, \"C,,\") ~ \"C\",\n                              str_detect(language, \"Fortran,,\") ~ \"Fortran\"),\n         lines = as.numeric(lines)) |> \n  distinct()\n\nhead(r_loc)\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n# A tibble: 6 × 3\n  language lines script        \n  <chr>    <dbl> <chr>         \n1 R           20 .snow2.RR     \n2 R            9 .multicore3.RR\n3 R           15 .multicore2.RR\n4 R           10 .multicore1.RR\n5 R           25 .RSeed.R      \n6 R           36 .Master.R     \n```\n\n\n:::\n:::\n\n\n\nNow, we can visualize the percentage of R Core sourcecode files by language using ggplot2:\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(ggplot2)\nlibrary(forcats)\n\nr_loc |> \n  filter(!is.na(language)) |> \n  group_by(language) |> \n  summarise (n = n()) |> \n  mutate(rel.freq =  n / sum(n), accuracy = 0.1) |> \n  ggplot(aes(x = fct_reorder(language, desc(rel.freq)), y = rel.freq, fill = language)) +\n  geom_bar(stat = \"identity\") +\n  geom_text(\n    aes(label = scales::percent(rel.freq)),\n    position = position_dodge(width = 0.9),\n    vjust = -0.25,\n    size = 4\n  ) +\n  theme_minimal() +\n  labs(title = \"Percentage of R Core Sourcecode Files by Language\") +\n  theme(plot.title = element_text(size = 14),\n        axis.title.x = element_blank(),\n        axis.title.y = element_blank(),\n        axis.text.x = element_text(size = 12),\n        axis.text.y = element_blank()) +\n  scale_fill_manual(values = c(\"R\" = \"#332288\", \n                               \"C\" = \"#882255\", \n                               \"Fortran\" = \"#44AA99\"))\n```\n\n::: {.cell-output-display}\n![](index_files/figure-html/unnamed-chunk-2-1.png){width=672}\n:::\n:::\n\n\n\nOr, we can visualize the percentage of R Core lines of code by language:\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\nr_loc |> \n  filter(!is.na(language)) |> \n  group_by(language) %>% \n  summarise(sum_lines = sum(lines, na.rm = TRUE)) |> \n  ungroup() |> \n  mutate(percent = sum_lines/sum(sum_lines)) |> \n  ggplot(aes(x = fct_reorder(language, desc(percent)), y = percent, fill = language)) +\n  geom_bar(stat = \"identity\") +\n  geom_text(\n    aes(label = scales::percent(percent)),\n    position = position_dodge(width = 0.9),\n    vjust = -0.25,\n    size = 4\n  )+\n  theme_minimal() +\n  labs(title = \"Percentage of R Core Lines of Code by Language\") +\n  theme(plot.title = element_text(size = 14),\n        axis.title.x = element_blank(),\n        axis.title.y = element_blank(),\n        axis.text.x = element_text(size = 12),\n        axis.text.y = element_blank(),\n        legend.position = \"none\") +\n  scale_fill_manual(values = c(\"R\" = \"#332288\", \n                               \"C\" = \"#882255\", \n                               \"Fortran\" = \"#44AA99\"))\n```\n\n::: {.cell-output-display}\n![](index_files/figure-html/unnamed-chunk-3-1.png){width=672}\n:::\n:::\n\n\n\nIt’s interesting to see how much goes into making R what it is: an ecosystem built on collaboration across languages and tools (which was the takeaway from the talk!). If you’re curious about R's source code, give the script a shot!",
+    "supporting": [
+      "index_files"
+    ],
+    "filters": [
+      "rmarkdown/pagebreak.lua"
+    ],
+    "includes": {},
+    "engineDependencies": {},
+    "preserve": {},
+    "postProcess": true
+  }
+}
@@ -1,4 +1,5 @@
 Version: 1.0
+ProjectId: ebc81a81-96a3-4c8e-a728-27afe2266e1a
 
 RestoreWorkspace: Default
 SaveWorkspace: Default
 
@@ -0,0 +1,167 @@
+---
+title: "How to find out how much of R Core is R"
+date: "2024-12-25"
+category: R
+output: html_document
+---
+
+
+
+Cleaning out my computer as I get ready to switch to a new one has me running into old gems. So, when I say "Today I learned," I really mean "I learned this back in December 2021." 😅
+
+Back then, I gave a talk at Why R? called 
+[Packages for Using R With Python, Tableau, and Other Tools](https://www.youtube.com/watch?v=vyA2EiIz4pI&feature=youtu.be). One part of the talk was about how R itself isn't just made up of R. 
+
+I adapted [this classic blog post](https://librestats.wordpress.com/2011/08/27/how-much-of-r-is-written-in-r/) by wrathematics to explore the composition of the [R 4.1.2 source package](https://cran.r-project.org/src/base/R-4/). The post features a script that scans the `.R`, `.c`, and `.f` files in the source, then records the language (R, C, or Fortran) and the number of lines of code in each language to a CSV file. Keep in mind, I have almost no knowledge of Shell (and this was pre-ChatGPT days!), so it took me a bit to adapt the original script from 2011.
+
+```{.bash filename="shell.sh"}
+outdir="./"
+
+rdir="./R-4.1.2" #eg, ~/R-2.13.1/
+cd $rdir/src
+
+for rfile in `find . -type f -name *.R`
+do
+loc=`wc -l $rfile | sed -e 's/ ./,/' -e 's/\/[^/]*\//\//g' -e 's/\/[^/]*\//\//g' -e 's/\/[^/]*\///g' -e 's/\///'`
+echo "R,$loc"  >> $outdir/r_source_loc.csv
+done
+
+for cfile in `find . -type f -name *.c`
+do
+loc=`wc -l $cfile | sed -e 's/ ./,/' -e 's/\/[^/]*\//\//g' -e 's/\/[^/]*\//\//g' -e 's/\/[^/]*\///g' -e 's/\///'`
+echo "C,$loc"  >> $outdir/r_source_loc.csv
+done
+
+for ffile in `find . -type f -name *.f`
+do
+loc=`wc -l $ffile | sed -e 's/ ./,/' -e 's/\/[^/]*\//\//g' -e 's/\/[^/]*\//\//g' -e 's/\/[^/]*\///g' -e 's/\///'`
+echo "Fortran,$loc"  >> $outdir/r_source_loc.csv
+done
+```
+
+The script creates a file called `r_source_loc.csv`. It shows the number of lines by programming language by script in R 4.1.2. We can read it into R:
+
+
+
+::: {.cell}
+
+```{.r .cell-code}
+library(dplyr)
+library(stringr)
+
+r_loc <-
+  readr::read_table(here::here("til-r", "r-composition", "r_source_loc.csv"),
+             col_names = c("language", "lines", "script")) |> 
+  mutate(language = case_when(str_detect(language, "R,,") ~ "R",
+                              str_detect(language, "C,,") ~ "C",
+                              str_detect(language, "Fortran,,") ~ "Fortran"),
+         lines = as.numeric(lines)) |> 
+  distinct()
+
+head(r_loc)
+```
+
+::: {.cell-output .cell-output-stdout}
+
+```
+# A tibble: 6 × 3
+  language lines script        
+  <chr>    <dbl> <chr>         
+1 R           20 .snow2.RR     
+2 R            9 .multicore3.RR
+3 R           15 .multicore2.RR
+4 R           10 .multicore1.RR
+5 R           25 .RSeed.R      
+6 R           36 .Master.R     
+```
+
+
+:::
+:::
+
+
+
+Now, we can visualize the percentage of R Core sourcecode files by language using ggplot2:
+
+
+
+::: {.cell}
+
+```{.r .cell-code}
+library(ggplot2)
+library(forcats)
+
+r_loc |> 
+  filter(!is.na(language)) |> 
+  group_by(language) |> 
+  summarise (n = n()) |> 
+  mutate(rel.freq =  n / sum(n), accuracy = 0.1) |> 
+  ggplot(aes(x = fct_reorder(language, desc(rel.freq)), y = rel.freq, fill = language)) +
+  geom_bar(stat = "identity") +
+  geom_text(
+    aes(label = scales::percent(rel.freq)),
+    position = position_dodge(width = 0.9),
+    vjust = -0.25,
+    size = 4
+  ) +
+  theme_minimal() +
+  labs(title = "Percentage of R Core Sourcecode Files by Language") +
+  theme(plot.title = element_text(size = 14),
+        axis.title.x = element_blank(),
+        axis.title.y = element_blank(),
+        axis.text.x = element_text(size = 12),
+        axis.text.y = element_blank()) +
+  scale_fill_manual(values = c("R" = "#332288", 
+                               "C" = "#882255", 
+                               "Fortran" = "#44AA99"))
+```
+
+::: {.cell-output-display}
+![](index_files/figure-html/unnamed-chunk-2-1.png){width=672}
+:::
+:::
+
+
+
+Or, we can visualize the percentage of R Core lines of code by language:
+
+
+
+::: {.cell}
+
+```{.r .cell-code}
+r_loc |> 
+  filter(!is.na(language)) |> 
+  group_by(language) %>% 
+  summarise(sum_lines = sum(lines, na.rm = TRUE)) |> 
+  ungroup() |> 
+  mutate(percent = sum_lines/sum(sum_lines)) |> 
+  ggplot(aes(x = fct_reorder(language, desc(percent)), y = percent, fill = language)) +
+  geom_bar(stat = "identity") +
+  geom_text(
+    aes(label = scales::percent(percent)),
+    position = position_dodge(width = 0.9),
+    vjust = -0.25,
+    size = 4
+  )+
+  theme_minimal() +
+  labs(title = "Percentage of R Core Lines of Code by Language") +
+  theme(plot.title = element_text(size = 14),
+        axis.title.x = element_blank(),
+        axis.title.y = element_blank(),
+        axis.text.x = element_text(size = 12),
+        axis.text.y = element_blank(),
+        legend.position = "none") +
+  scale_fill_manual(values = c("R" = "#332288", 
+                               "C" = "#882255", 
+                               "Fortran" = "#44AA99"))
+```
+
+::: {.cell-output-display}
+![](index_files/figure-html/unnamed-chunk-3-1.png){width=672}
+:::
+:::
+
+
+
+It’s interesting to see how much goes into making R what it is: an ecosystem built on collaboration across languages and tools (which was the takeaway from the talk!). If you’re curious about R's source code, give the script a shot!
@@ -0,0 +1,120 @@
+---
+title: "How to find out how much of R Core is R"
+date: "2024-12-25"
+category: R
+output: html_document
+---
+
+Cleaning out my computer as I get ready to switch to a new one has me running into old gems. So, when I say "Today I learned," I really mean "I learned this back in December 2021." 😅
+
+Back then, I gave a talk at Why R? called 
+[Packages for Using R With Python, Tableau, and Other Tools](https://www.youtube.com/watch?v=vyA2EiIz4pI&feature=youtu.be). One part of the talk was about how R itself isn't just made up of R. 
+
+I adapted [this classic blog post](https://librestats.wordpress.com/2011/08/27/how-much-of-r-is-written-in-r/) by wrathematics to explore the composition of the [R 4.1.2 source package](https://cran.r-project.org/src/base/R-4/). The post features a script that scans the `.R`, `.c`, and `.f` files in the source, then records the language (R, C, or Fortran) and the number of lines of code in each language to a CSV file. Keep in mind, I have almost no knowledge of Shell (and this was pre-ChatGPT days!), so it took me a bit to adapt the original script from 2011.
+
+```{.bash filename="shell.sh"}
+outdir="./"
+
+rdir="./R-4.1.2" #eg, ~/R-2.13.1/
+cd $rdir/src
+
+for rfile in `find . -type f -name *.R`
+do
+loc=`wc -l $rfile | sed -e 's/ ./,/' -e 's/\/[^/]*\//\//g' -e 's/\/[^/]*\//\//g' -e 's/\/[^/]*\///g' -e 's/\///'`
+echo "R,$loc"  >> $outdir/r_source_loc.csv
+done
+
+for cfile in `find . -type f -name *.c`
+do
+loc=`wc -l $cfile | sed -e 's/ ./,/' -e 's/\/[^/]*\//\//g' -e 's/\/[^/]*\//\//g' -e 's/\/[^/]*\///g' -e 's/\///'`
+echo "C,$loc"  >> $outdir/r_source_loc.csv
+done
+
+for ffile in `find . -type f -name *.f`
+do
+loc=`wc -l $ffile | sed -e 's/ ./,/' -e 's/\/[^/]*\//\//g' -e 's/\/[^/]*\//\//g' -e 's/\/[^/]*\///g' -e 's/\///'`
+echo "Fortran,$loc"  >> $outdir/r_source_loc.csv
+done
+```
+
+The script creates a file called `r_source_loc.csv`. It shows the number of lines by programming language by script in R 4.1.2. We can read it into R:
+
+```{r}
+#| warning: false
+library(dplyr)
+library(stringr)
+
+r_loc <-
+  readr::read_table(here::here("til-r", "r-composition", "r_source_loc.csv"),
+             col_names = c("language", "lines", "script")) |> 
+  mutate(language = case_when(str_detect(language, "R,,") ~ "R",
+                              str_detect(language, "C,,") ~ "C",
+                              str_detect(language, "Fortran,,") ~ "Fortran"),
+         lines = as.numeric(lines)) |> 
+  distinct()
+
+head(r_loc)
+```
+
+Now, we can visualize the percentage of R Core sourcecode files by language using ggplot2:
+
+```{r}
+library(ggplot2)
+library(forcats)
+
+r_loc |> 
+  filter(!is.na(language)) |> 
+  group_by(language) |> 
+  summarise (n = n()) |> 
+  mutate(rel.freq =  n / sum(n), accuracy = 0.1) |> 
+  ggplot(aes(x = fct_reorder(language, desc(rel.freq)), y = rel.freq, fill = language)) +
+  geom_bar(stat = "identity") +
+  geom_text(
+    aes(label = scales::percent(rel.freq)),
+    position = position_dodge(width = 0.9),
+    vjust = -0.25,
+    size = 4
+  ) +
+  theme_minimal() +
+  labs(title = "Percentage of R Core Sourcecode Files by Language") +
+  theme(plot.title = element_text(size = 14),
+        axis.title.x = element_blank(),
+        axis.title.y = element_blank(),
+        axis.text.x = element_text(size = 12),
+        axis.text.y = element_blank()) +
+  scale_fill_manual(values = c("R" = "#332288", 
+                               "C" = "#882255", 
+                               "Fortran" = "#44AA99"))
+```
+
+Or, we can visualize the percentage of R Core lines of code by language:
+
+```{r}
+r_loc |> 
+  filter(!is.na(language)) |> 
+  group_by(language) %>% 
+  summarise(sum_lines = sum(lines, na.rm = TRUE)) |> 
+  ungroup() |> 
+  mutate(percent = sum_lines/sum(sum_lines)) |> 
+  ggplot(aes(x = fct_reorder(language, desc(percent)), y = percent, fill = language)) +
+  geom_bar(stat = "identity") +
+  geom_text(
+    aes(label = scales::percent(percent)),
+    position = position_dodge(width = 0.9),
+    vjust = -0.25,
+    size = 4
+  )+
+  theme_minimal() +
+  labs(title = "Percentage of R Core Lines of Code by Language") +
+  theme(plot.title = element_text(size = 14),
+        axis.title.x = element_blank(),
+        axis.title.y = element_blank(),
+        axis.text.x = element_text(size = 12),
+        axis.text.y = element_blank(),
+        legend.position = "none") +
+  scale_fill_manual(values = c("R" = "#332288", 
+                               "C" = "#882255", 
+                               "Fortran" = "#44AA99"))
+```
+
+It’s interesting to see how much goes into making R what it is: an ecosystem built on collaboration across languages and tools (which was the takeaway from the talk!). If you’re curious about R's source code, give the script a shot!
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`Version: 1.0`
	`2`	`+ProjectId: ebc81a81-96a3-4c8e-a728-27afe2266e1a`
`2`	`3`
`3`	`4`	`RestoreWorkspace: Default`
`4`	`5`	`SaveWorkspace: Default`