diff --git a/DESCRIPTION b/DESCRIPTION index 1fc9fc23..141a9d1d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: piar Title: Price Index Aggregation -Version: 0.8.3.9003 +Version: 0.8.3.9004 Authors@R: c( person("Steve", "Martin", role = c("aut", "cre", "cph"), email = "marberts@protonmail.com", diff --git a/NAMESPACE b/NAMESPACE index 8e2ba940..22a0154e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -93,6 +93,7 @@ export(as_index) export(carry_backward) export(carry_forward) export(chain) +export(combine_classifications) export(contrib) export(contrib2DF) export(elemental_index) diff --git a/NEWS.md b/NEWS.md index e143d0d4..c5e2ba64 100644 --- a/NEWS.md +++ b/NEWS.md @@ -26,6 +26,11 @@ in `elementary_index()`. - The `margin` argument in `split(index)` has been renamed to `along`. +## Improvements + +- Added a new function `combine_classifications()` to more easily build an +aggregation structure that spans multiple hierarchical classifications. + ## Bug fixes - Setting `contrib = FALSE` in `aggregate(index)` always returns an index with diff --git a/R/combine_classifications.R b/R/combine_classifications.R new file mode 100644 index 00000000..3cbf785d --- /dev/null +++ b/R/combine_classifications.R @@ -0,0 +1,42 @@ +#' Combine hierarchical classifications +#' +#' Combine hierarchical classifications by stacking one classification after +#' another. +#' +#' @param x,y A list with a entry for each level in classification giving the +#' "digits" that represent each level in the hierarchy, as made +#' by [expand_classification()] or [split_classification()]. +#' @param sep A character used to separate `x` and `y`. The +#' default separates levels across classifications by `"."`. +#' @returns +#' A list with a entry for each level in the combined classification. +#' +#' @examples +#' # Combine an unbalanced industry classification with a balanced +#' # geographic classification +#' +#' industry <- c("111", "112", "12") +#' region <- c("11", "21", "22") +#' +#' combine_classifications( +#' expand_classification(industry, pad = "0"), +#' expand_classification(region) +#' ) +#' @export +combine_classifications <- function(x, y, sep = ".") { + x <- lapply(x, as.character) + lx <- lengths(x) + if (length(lx) == 0L) { + return(y) + } + y <- lapply(y, as.character) + ly <- lengths(y) + if (length(ly) == 0L) { + return(x) + } + if (any(lx != lx[1L]) || any(ly != lx[1L])) { + stop("each component of 'x' and 'y' must have the same number of elements") + } + + c(x, lapply(y, \(y) paste(last(x), y, sep = sep))) +} diff --git a/R/expand_classification.R b/R/expand_classification.R index c3d28437..df53569f 100644 --- a/R/expand_classification.R +++ b/R/expand_classification.R @@ -15,7 +15,7 @@ #' level of the classification, ordered so that moving down the list goes down #' the hierarchy (as made by `expand_classification()`). #' @param sep A character used to combine codes/labels across elements of `...`. -#' The default uses ':'. +#' The default uses `":"`. #' @param pad A string used to pad the shorter labels for an unbalanced #' classification. The default pads with NA. #' @@ -32,6 +32,9 @@ #' [split_classification()] to expand a classification by splitting along #' a delimiter. #' +#' [combine_classifications()] for combining multiple hierarchical +#' classifications. +#' #' `csh_from_digits()` in the \pkg{accumulate} package for different handling #' of unbalanced classifications. #' diff --git a/R/extract.piar_index.R b/R/extract.piar_index.R index 925a2ffc..9932fe54 100644 --- a/R/extract.piar_index.R +++ b/R/extract.piar_index.R @@ -1,4 +1,4 @@ -#' Extract and replace index values +#' Extract index values #' #' Methods to extract and replace index values like a matrix. #' diff --git a/R/split_classification.R b/R/split_classification.R index c40dcc2c..45233596 100644 --- a/R/split_classification.R +++ b/R/split_classification.R @@ -10,7 +10,7 @@ #' See [strsplit()]. #' @param ... Additional argument to pass to [strsplit()]. #' @param sep A character used to delineate levels in `x` in the result. The -#' default separates levels by '.'. +#' default separates levels by `"."`. #' @param pad A string used to pad the shorter labels for an unbalanced #' classification. The default pads with NA. #' @@ -24,6 +24,9 @@ #' [expand_classification()] to expand a classification by the width of the #' levels. #' +#' [combine_classifications()] for combining multiple hierarchical +#' classifications. +#' #' @examples #' #' # A simple classification structure #' # 1 diff --git a/R/window.piar_index.R b/R/window.piar_index.R index bec6639d..d6c3f2fe 100644 --- a/R/window.piar_index.R +++ b/R/window.piar_index.R @@ -1,4 +1,4 @@ -#' Index window +#' Window a price index #' #' Extract and replace index values over a window of time periods. #' diff --git a/README.Rmd b/README.Rmd index 3f66fc5a..acf5cd6c 100644 --- a/README.Rmd +++ b/README.Rmd @@ -116,8 +116,6 @@ guidelines for this project for more information. Balk, B. M. (2008). *Price and Quantity Index Numbers*. Cambridge University Press. -Chiru, R., Huang, N., Lequain, M. Smith, P., and Wright, A. (2015). *The Canadian Consumer Price Index Reference Paper*, Statistics Canada catalogue 62-553-X. Statistics Canada. - ILO, IMF, UNECE, OECD, and World Bank. (2004). *Producer Price Index Manual: Theory and Practice*. International Monetary Fund. IMF, ILO, Eurostat, UNECE, OECD, and World Bank. (2020). *Consumer Price Index Manual: Concepts and Methods*. International Monetary Fund. diff --git a/README.md b/README.md index 919c841b..94c48f22 100644 --- a/README.md +++ b/README.md @@ -106,12 +106,12 @@ And an aggregation structure. # industrial classifications ms_weights -#> business classification weight level1 level2 -#> 1 B1 11 553 1 11 -#> 2 B2 11 646 1 11 -#> 3 B3 11 312 1 11 -#> 4 B4 12 622 1 12 -#> 5 B5 12 330 1 12 +#> business classification weight +#> 1 B1 11 553 +#> 2 B2 11 646 +#> 3 B3 11 312 +#> 4 B4 12 622 +#> 5 B5 12 330 ms_weights[c("level1", "level2")] <- expand_classification(ms_weights$classification) @@ -165,10 +165,6 @@ the contribution guidelines for this project for more information. Balk, B. M. (2008). *Price and Quantity Index Numbers*. Cambridge University Press. -Chiru, R., Huang, N., Lequain, M. Smith, P., and Wright, A. (2015). *The -Canadian Consumer Price Index Reference Paper*, Statistics Canada -catalogue 62-553-X. Statistics Canada. - ILO, IMF, UNECE, OECD, and World Bank. (2004). *Producer Price Index Manual: Theory and Practice*. International Monetary Fund. diff --git a/_pkgdown.yml b/_pkgdown.yml index e4d3dcf9..ea695c77 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -67,6 +67,7 @@ reference: - aggregation_structure - expand_classification - split_classification + - combine_classifications - as_aggregation_structure - as.matrix.piar_aggregation_structure - weights.piar_aggregation_structure diff --git a/man/combine_classifications.Rd b/man/combine_classifications.Rd new file mode 100644 index 00000000..630fc620 --- /dev/null +++ b/man/combine_classifications.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/combine_classifications.R +\name{combine_classifications} +\alias{combine_classifications} +\title{Combine hierarchical classifications} +\usage{ +combine_classifications(x, y, sep = ".") +} +\arguments{ +\item{x, y}{A list with a entry for each level in classification giving the +"digits" that represent each level in the hierarchy, as made +by \code{\link[=expand_classification]{expand_classification()}} or \code{\link[=split_classification]{split_classification()}}.} + +\item{sep}{A character used to separate \code{x} and \code{y}. The +default separates levels across classifications by \code{"."}.} +} +\value{ +A list with a entry for each level in the combined classification. +} +\description{ +Combine hierarchical classifications by stacking one classification after +another. +} +\examples{ +# Combine an unbalanced industry classification with a balanced +# geographic classification + +industry <- c("111", "112", "12") +region <- c("11", "21", "22") + +combine_classifications( + expand_classification(industry, pad = "0"), + expand_classification(region) +) +} diff --git a/man/expand_classification.Rd b/man/expand_classification.Rd index 47bb8f19..3c10aae3 100644 --- a/man/expand_classification.Rd +++ b/man/expand_classification.Rd @@ -27,7 +27,7 @@ level of the classification, ordered so that moving down the list goes down the hierarchy (as made by \code{expand_classification()}).} \item{sep}{A character used to combine codes/labels across elements of \code{...}. -The default uses ':'.} +The default uses \code{":"}.} } \value{ \code{expand_classification()} returns a list with a entry for each level @@ -74,6 +74,9 @@ expand_classification(c("01.1.1", "01.1.2", "01.2.1"), width = 2) \code{\link[=split_classification]{split_classification()}} to expand a classification by splitting along a delimiter. +\code{\link[=combine_classifications]{combine_classifications()}} for combining multiple hierarchical +classifications. + \code{csh_from_digits()} in the \pkg{accumulate} package for different handling of unbalanced classifications. } diff --git a/man/split_classification.Rd b/man/split_classification.Rd index 9a4d0487..d0c26cb0 100644 --- a/man/split_classification.Rd +++ b/man/split_classification.Rd @@ -17,7 +17,7 @@ See \code{\link[=strsplit]{strsplit()}}.} \item{...}{Additional argument to pass to \code{\link[=strsplit]{strsplit()}}.} \item{sep}{A character used to delineate levels in \code{x} in the result. The -default separates levels by '.'.} +default separates levels by \code{"."}.} \item{pad}{A string used to pad the shorter labels for an unbalanced classification. The default pads with NA.} @@ -50,4 +50,7 @@ split_classification(c("01.1.1", "01.1.2", "01.2.1"), ".", fixed = TRUE) \code{\link[=expand_classification]{expand_classification()}} to expand a classification by the width of the levels. + +\code{\link[=combine_classifications]{combine_classifications()}} for combining multiple hierarchical +classifications. } diff --git a/man/sub-.piar_index.Rd b/man/sub-.piar_index.Rd index db8d78df..5d141f47 100644 --- a/man/sub-.piar_index.Rd +++ b/man/sub-.piar_index.Rd @@ -3,7 +3,7 @@ \name{[.piar_index} \alias{[.piar_index} \alias{[<-.piar_index} -\title{Extract and replace index values} +\title{Extract index values} \usage{ \method{[}{piar_index}(x, i, j, ...) diff --git a/man/window.piar_index.Rd b/man/window.piar_index.Rd index 4f86b981..569b48e0 100644 --- a/man/window.piar_index.Rd +++ b/man/window.piar_index.Rd @@ -3,7 +3,7 @@ \name{window.piar_index} \alias{window.piar_index} \alias{window<-.piar_index} -\title{Index window} +\title{Window a price index} \usage{ \method{window}{piar_index}(x, start = NULL, end = NULL, ...) diff --git a/tests/Examples/piar-Ex.Rout.save b/tests/Examples/piar-Ex.Rout.save index 0b226955..a4354e3c 100644 --- a/tests/Examples/piar-Ex.Rout.save +++ b/tests/Examples/piar-Ex.Rout.save @@ -1,5 +1,5 @@ -R version 4.5.0 (2025-04-11) -- "How About a Twenty-Six" +R version 4.5.2 (2025-10-31) -- "[Not] Part in a Rumble" Copyright (C) 2025 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu @@ -370,6 +370,46 @@ levels 1 2 3 2 0.2000000 1 8 3 0.1666667 1 9 > +> +> +> +> cleanEx() +> nameEx("combine_classifications") +> ### * combine_classifications +> +> flush(stderr()); flush(stdout()) +> +> ### Name: combine_classifications +> ### Title: Combine hierarchical classifications +> ### Aliases: combine_classifications +> +> ### ** Examples +> +> # Combine an unbalanced industry classification with a balanced +> # geographic classification +> +> industry <- c("111", "112", "12") +> region <- c("11", "21", "22") +> +> combine_classifications( ++ expand_classification(industry, pad = "0"), ++ expand_classification(region) ++ ) +[[1]] +[1] "1" "1" "1" + +[[2]] +[1] "11" "11" "12" + +[[3]] +[1] "111" "112" "120" + +[[4]] +[1] "111.1" "112.2" "120.2" + +[[5]] +[1] "111.11" "112.21" "120.22" + > > > @@ -405,7 +445,7 @@ levels 1 2 3 > > contrib(index) time -product 1 2 +product 1 2 a.1 0.0000000 0.5081686 a.2 0.2440169 0.6442213 b.1 0.3905243 2.0513858 @@ -945,7 +985,7 @@ levels 1 2 3 4 5 6 > flush(stderr()); flush(stdout()) > > ### Name: [.piar_index -> ### Title: Extract and replace index values +> ### Title: Extract index values > ### Aliases: [.piar_index [<-.piar_index > > ### ** Examples @@ -1094,7 +1134,7 @@ $ea > flush(stderr()); flush(stdout()) > > ### Name: window.piar_index -> ### Title: Index window +> ### Title: Window a price index > ### Aliases: window.piar_index window<-.piar_index > > ### ** Examples @@ -1126,7 +1166,7 @@ levels 1 2 3 > cleanEx() > options(digits = 7L) > base::cat("Time elapsed: ", proc.time() - base::get("ptime", pos = 'CheckExEnv'),"\n") -Time elapsed: 0.126 0.008 0.134 0 0 +Time elapsed: 0.277 0.027 0.316 0 0 > grDevices::dev.off() null device 1 diff --git a/tests/testthat/test-combine_classifications.R b/tests/testthat/test-combine_classifications.R new file mode 100644 index 00000000..79b22ace --- /dev/null +++ b/tests/testthat/test-combine_classifications.R @@ -0,0 +1,26 @@ +test_that("combining classifications works", { + x <- c("111", "121", "122") + y <- c("01", "02", "01") + expect_identical( + combine_classifications( + expand_classification(x), + expand_classification(y) + ), + expand_classification(paste(x, y, sep = "."), c(1, 1, 1, 2, 1)) + ) +}) + +test_that("corner cases work", { + x <- list(c("11", "12"), c("111", "121")) + expect_identical(combine_classifications(x, list()), x) + expect_identical(combine_classifications(list(), x), x) +}) + +test_that("combining classifications errors with unequal length", { + expect_error( + combine_classifications( + list(c("1", "1"), c("11", "12")), + list(c("01", "02"), c("011")) + ) + ) +})