From 58f9ceed4fb0d0839d5fe53f7149c1d03c13da00 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Thu, 6 Nov 2025 17:18:45 +0100 Subject: [PATCH 01/25] net_diversity() no longer offers a clusters option, which can be obtained more generally using migraph::over_membership() --- R/measure_heterogeneity.R | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/R/measure_heterogeneity.R b/R/measure_heterogeneity.R index 23ddf072..7f723cf9 100644 --- a/R/measure_heterogeneity.R +++ b/R/measure_heterogeneity.R @@ -9,8 +9,7 @@ #' in a network attribute. #' - `node_richness()` measures the number of unique categories #' of an attribute to which each node is connected. -#' - `net_diversity()` measures the heterogeneity of ties across a network -#' or within clusters by node attributes. +#' - `net_diversity()` measures the heterogeneity of ties across a network. #' - `node_diversity()` measures the heterogeneity of each node's #' local neighbourhood. #' - `net_heterophily()` measures how embedded nodes in the network @@ -86,22 +85,7 @@ net_diversity <- function(.data, attribute, clusters = NULL){ if(missing(.data)) {expect_nodes(); .data <- .G()} # nocov blau <- function(features) { 1 - sum((table(features)/length(features))^2) } attr <- manynet::node_attribute(.data, attribute) - if (is.null(clusters)) { - blauout <- blau(attr) - } else if (is.numeric(clusters) && is.vector(clusters)) { - blauout <- vapply(unique(clusters), - function(i) blau(attr[clusters == i]), - numeric(1)) - names(blauout) <- paste0("Cluster ", unique(clusters)) - } else if (is.character(clusters)) { - clu <- manynet::node_attribute(.data, clusters) - blauout <- vapply(unique(clu), - function(i) blau(attr[clu == i]), - numeric(1)) - names(blauout) <- paste0("Cluster ", unique(clu)) - blauout <- blauout[order(names(blauout))] - } else snet_abort("`clusters` must be the name of a nodal variable in the object.") - make_network_measure(blauout, .data, call = deparse(sys.call())) + make_network_measure(out, .data, call = deparse(sys.call())) } #' @rdname measure_heterogeneity From d922111c9ab3b30b43d05efb25dfcc7068d9bfd3 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Thu, 6 Nov 2025 17:20:09 +0100 Subject: [PATCH 02/25] Improved net_diversity() to allow using the Teachman's index instead of Blau index --- R/measure_heterogeneity.R | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/R/measure_heterogeneity.R b/R/measure_heterogeneity.R index 7f723cf9..3a6dd9a1 100644 --- a/R/measure_heterogeneity.R +++ b/R/measure_heterogeneity.R @@ -56,7 +56,7 @@ node_richness <- function(.data, attribute){ #' by other names #' (Gini-Simpson Index, Gini impurity, Gini's diversity index, #' Gibbs-Martin index, and probability of interspecific encounter (PIE)): -#' \deqn{1 - \sum\limits_{i = 1}^k {p_i^2 }}, +#' \deqn{1 - \sum\limits_{i = 1}^k {p_i^2 }} #' where \eqn{p_i} is the proportion of group members in \eqn{i}th category #' and \eqn{k} is the number of categories for an attribute of interest. #' This index can be interpreted as the probability that two members @@ -64,13 +64,31 @@ node_richness <- function(.data, attribute){ #' This index finds its minimum value (0) when there is no variety, #' i.e. when all individuals are classified in the same category. #' The maximum value depends on the number of categories and -#' whether nodes can be evenly distributed across categories. +#' whether nodes can be evenly distributed across categories. +#' +#' Teachman's index (1980) is based on information theory +#' and is calculated as: +#' \deqn{- \sum\limits_{i = 1}^k {p_i \log(p_i)}} +#' where \eqn{p_i} is the proportion of group members in \eqn{i}th category +#' and \eqn{k} is the number of categories for an attribute of interest. +#' This index finds its minimum value (0) when there is no variety, +#' i.e. when all individuals are classified in the same category. +#' The maximum value depends on the number of categories and +#' whether nodes can be evenly distributed across categories. +#' It thus shares similar properties to Blau's index, +#' but includes also a notion of richness that tends to give more weight to +#' rare categories and thus tends to highlight imbalances more. #' @references #' ## On diversity #' Blau, Peter M. 1977. #' _Inequality and heterogeneity_. #' New York: Free Press. #' +#' Teachman, Jay D. 1980. +#' Analysis of population diversity: Measures of qualitative variation. +#' _Sociological Methods & Research_, 8:341-362. +#' \doi{10.1177/004912418000800305} +#' #' Page, Scott E. 2010. #' _Diversity and Complexity_. #' Princeton: Princeton University Press. @@ -81,10 +99,19 @@ node_richness <- function(.data, attribute){ #' net_diversity(marvel_friends, "Attractive") #' net_diversity(marvel_friends, "Gender", "Rich") #' @export -net_diversity <- function(.data, attribute, clusters = NULL){ +net_diversity <- function(.data, attribute, + method = c("blau","teachman","cv","gini")){ if(missing(.data)) {expect_nodes(); .data <- .G()} # nocov blau <- function(features) { 1 - sum((table(features)/length(features))^2) } + teachman <- function(features) { + p <- table(features)/length(features) + -sum(p * log(p)) + } attr <- manynet::node_attribute(.data, attribute) + + out <- switch(method, + blau = blau(attr), + teachman = teachman(attr), make_network_measure(out, .data, call = deparse(sys.call())) } @@ -93,12 +120,13 @@ net_diversity <- function(.data, attribute, clusters = NULL){ #' node_diversity(marvel_friends, "Gender") #' node_diversity(marvel_friends, "Attractive") #' @export -node_diversity <- function(.data, attribute){ +node_diversity <- function(.data, attribute, + method = c("blau","teachman","cv","gini")){ if(missing(.data)) {expect_nodes(); .data <- .G()} # nocov out <- vapply(igraph::ego(manynet::as_igraph(.data)), function(x) net_diversity( igraph::induced_subgraph(manynet::as_igraph(.data), x), - attribute), + attribute, method = method), FUN.VALUE = numeric(1)) make_node_measure(out, .data) } From 70de9a3e34726ecf6c4ba42adbe53229a8135112 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Thu, 6 Nov 2025 17:23:55 +0100 Subject: [PATCH 03/25] Improved net_diversity() and node_diversity() to allow the use of variation or gini coefficients for numeric attributes --- R/measure_heterogeneity.R | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/R/measure_heterogeneity.R b/R/measure_heterogeneity.R index 3a6dd9a1..eed883d0 100644 --- a/R/measure_heterogeneity.R +++ b/R/measure_heterogeneity.R @@ -23,7 +23,13 @@ #' @inheritParams mark_is #' @param attribute Name of a nodal attribute or membership vector #' to use as categories for the diversity measure. -#' @param clusters A nodal cluster membership vector or name of a vertex attribute. +#' @param method Which method to use for `net_diversity()`. +#' Either "blau" (Blau's index) or "teachman" (Teachman's index) for +#' categorical attributes, or "variation" (coefficient of variation) +#' or "gini" (Gini coefficient) for numeric attributes. +#' Default is "blau". +#' If an incompatible method is chosen for the attribute type, +#' a suitable alternative will be used instead with a message. #' @name measure_heterogeneity #' @family measures NULL @@ -97,21 +103,30 @@ node_richness <- function(.data, attribute){ #' marvel_friends <- to_unsigned(ison_marvel_relationships, "positive") #' net_diversity(marvel_friends, "Gender") #' net_diversity(marvel_friends, "Attractive") -#' net_diversity(marvel_friends, "Gender", "Rich") #' @export net_diversity <- function(.data, attribute, - method = c("blau","teachman","cv","gini")){ + method = c("blau","teachman","variation","gini")){ if(missing(.data)) {expect_nodes(); .data <- .G()} # nocov blau <- function(features) { 1 - sum((table(features)/length(features))^2) } teachman <- function(features) { p <- table(features)/length(features) -sum(p * log(p)) } + cv <- function(values) { + sd(values, na.rm = TRUE) / mean(values, na.rm = TRUE) } + gini <- function(values) { + x <- sort(values) + n <- length(x) + G <- sum(x * (1:n)) + return((2 * G) / (n * sum(x)) - (n + 1) / n) + } attr <- manynet::node_attribute(.data, attribute) out <- switch(method, blau = blau(attr), teachman = teachman(attr), + variation = cv(attr), + gini = gini(attr)) make_network_measure(out, .data, call = deparse(sys.call())) } @@ -121,7 +136,7 @@ net_diversity <- function(.data, attribute, #' node_diversity(marvel_friends, "Attractive") #' @export node_diversity <- function(.data, attribute, - method = c("blau","teachman","cv","gini")){ + method = c("blau","teachman","variation","gini")){ if(missing(.data)) {expect_nodes(); .data <- .G()} # nocov out <- vapply(igraph::ego(manynet::as_igraph(.data)), function(x) net_diversity( From 538bfd4b8ccaad7f7edd15c87dae7f6aa0419025 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Thu, 6 Nov 2025 17:25:07 +0100 Subject: [PATCH 04/25] Improved net_diversity() and node_diversity() to correct the method where incompatible methods are used --- R/measure_heterogeneity.R | 31 ++++++++++++- man/measure_heterogeneity.Rd | 48 +++++++++++++++++---- tests/testthat/test-measure_heterogeneity.R | 11 ++--- 3 files changed, 71 insertions(+), 19 deletions(-) diff --git a/R/measure_heterogeneity.R b/R/measure_heterogeneity.R index eed883d0..7cf972c1 100644 --- a/R/measure_heterogeneity.R +++ b/R/measure_heterogeneity.R @@ -121,6 +121,19 @@ net_diversity <- function(.data, attribute, return((2 * G) / (n * sum(x)) - (n + 1) / n) } attr <- manynet::node_attribute(.data, attribute) + method <- match.arg(method) + if(is.numeric(attr) && method %in% c("blau","teachman")){ + snet_info("{.val {method}} index is not appropriate for numeric attributes.") + snet_info("Using {.val variation} coefficient instead", + "({.val gini} coefficient also available).") + method <- "cv" + } + if(is.character(attr) && method %in% c("variation","gini")){ + snet_info("{.val {method}} coefficient is not appropriate for categorical attributes.") + snet_info("Using {.val blau} index instead", + "({.val teachman} index also available).") + method <- "blau" + } out <- switch(method, blau = blau(attr), @@ -138,6 +151,20 @@ net_diversity <- function(.data, attribute, node_diversity <- function(.data, attribute, method = c("blau","teachman","variation","gini")){ if(missing(.data)) {expect_nodes(); .data <- .G()} # nocov + attr <- manynet::node_attribute(.data, attribute) + method <- match.arg(method) + if(is.numeric(attr) && method %in% c("blau","teachman")){ + snet_info("{.val {method}} index is not appropriate for numeric attributes.") + snet_info("Using {.val variation} coefficient instead", + "({.val gini} coefficient also available).") + method <- "cv" + } + if(is.character(attr) && method %in% c("variation","gini")){ + snet_info("{.val {method}} coefficient is not appropriate for categorical attributes.") + snet_info("Using {.val blau} index instead", + "({.val teachman} index also available).") + method <- "blau" + } out <- vapply(igraph::ego(manynet::as_igraph(.data)), function(x) net_diversity( igraph::induced_subgraph(manynet::as_igraph(.data), x), @@ -217,7 +244,7 @@ node_heterophily <- function(.data, attribute){ #' @references #' ## On assortativity #' Newman, Mark E.J. 2002. -#' "Assortative Mxing in Networks". +#' "Assortative mixing in networks". #' _Physical Review Letters_, 89(20): 208701. #' \doi{10.1103/physrevlett.89.208701} #' @examples @@ -234,7 +261,7 @@ net_assortativity <- function(.data){ #' @references #' ## On spatial autocorrelation #' Moran, Patrick Alfred Pierce. 1950. -#' "Notes on Continuous Stochastic Phenomena". +#' "Notes on continuous stochastic phenomena". #' _Biometrika_ 37(1): 17-23. #' \doi{10.2307/2332142} #' @examples diff --git a/man/measure_heterogeneity.Rd b/man/measure_heterogeneity.Rd index ac64226b..88d64855 100644 --- a/man/measure_heterogeneity.Rd +++ b/man/measure_heterogeneity.Rd @@ -8,6 +8,7 @@ \alias{node_diversity} \alias{net_heterophily} \alias{node_heterophily} +\alias{node_homophily} \alias{net_assortativity} \alias{net_spatial} \title{Measures of network diversity} @@ -16,14 +17,20 @@ net_richness(.data, attribute) node_richness(.data, attribute) -net_diversity(.data, attribute, clusters = NULL) +net_diversity( + .data, + attribute, + method = c("blau", "teachman", "variation", "gini") +) -node_diversity(.data, attribute) +node_diversity(.data, attribute, method = c("blau", "teachman", "cv", "gini")) net_heterophily(.data, attribute) node_heterophily(.data, attribute) +node_homophily(.data, attribute) + net_assortativity(.data) net_spatial(.data, attribute) @@ -41,7 +48,13 @@ net_spatial(.data, attribute) \item{attribute}{Name of a nodal attribute or membership vector to use as categories for the diversity measure.} -\item{clusters}{A nodal cluster membership vector or name of a vertex attribute.} +\item{method}{Which method to use for \code{net_diversity()}. +Either "blau" (Blau's index) or "teachman" (Teachman's index) for +categorical attributes, or "variation" (coefficient of variation) +or "gini" (Gini coefficient) for numeric attributes. +Default is "blau". +If an incompatible method is chosen for the attribute type, +a suitable alternative will be used instead with a message.} } \description{ These functions offer ways to measure the heterogeneity of an attribute @@ -52,8 +65,7 @@ across this attribute: in a network attribute. \item \code{node_richness()} measures the number of unique categories of an attribute to which each node is connected. -\item \code{net_diversity()} measures the heterogeneity of ties across a network -or within clusters by node attributes. +\item \code{net_diversity()} measures the heterogeneity of ties across a network. \item \code{node_diversity()} measures the heterogeneity of each node's local neighbourhood. \item \code{net_heterophily()} measures how embedded nodes in the network @@ -71,7 +83,7 @@ Blau's index (1977) uses a formula known also in other disciplines by other names (Gini-Simpson Index, Gini impurity, Gini's diversity index, Gibbs-Martin index, and probability of interspecific encounter (PIE)): -\deqn{1 - \sum\limits_{i = 1}^k {p_i^2 }}, +\deqn{1 - \sum\limits_{i = 1}^k {p_i^2 }} where \eqn{p_i} is the proportion of group members in \eqn{i}th category and \eqn{k} is the number of categories for an attribute of interest. This index can be interpreted as the probability that two members @@ -80,6 +92,19 @@ This index finds its minimum value (0) when there is no variety, i.e. when all individuals are classified in the same category. The maximum value depends on the number of categories and whether nodes can be evenly distributed across categories. + +Teachman's index (1980) is based on information theory +and is calculated as: +\deqn{- \sum\limits_{i = 1}^k {p_i \log(p_i)}} +where \eqn{p_i} is the proportion of group members in \eqn{i}th category +and \eqn{k} is the number of categories for an attribute of interest. +This index finds its minimum value (0) when there is no variety, +i.e. when all individuals are classified in the same category. +The maximum value depends on the number of categories and +whether nodes can be evenly distributed across categories. +It thus shares similar properties to Blau's index, +but includes also a notion of richness that tends to give more weight to +rare categories and thus tends to highlight imbalances more. } \section{net_heterophily}{ @@ -99,13 +124,13 @@ node_richness(ison_networkers, "Discipline") marvel_friends <- to_unsigned(ison_marvel_relationships, "positive") net_diversity(marvel_friends, "Gender") net_diversity(marvel_friends, "Attractive") -net_diversity(marvel_friends, "Gender", "Rich") node_diversity(marvel_friends, "Gender") node_diversity(marvel_friends, "Attractive") net_heterophily(marvel_friends, "Gender") net_heterophily(marvel_friends, "Attractive") node_heterophily(marvel_friends, "Gender") node_heterophily(marvel_friends, "Attractive") +node_homophily(marvel_friends, "Gender") net_assortativity(ison_networkers) net_spatial(ison_lawfirm, "age") } @@ -116,6 +141,11 @@ Blau, Peter M. 1977. \emph{Inequality and heterogeneity}. New York: Free Press. +Teachman, Jay D. 1980. +Analysis of population diversity: Measures of qualitative variation. +\emph{Sociological Methods & Research}, 8:341-362. +\doi{10.1177/004912418000800305} + Page, Scott E. 2010. \emph{Diversity and Complexity}. Princeton: Princeton University Press. @@ -138,7 +168,7 @@ McPherson, Miller, Lynn Smith-Lovin, and James M. Cook. 2001. \subsection{On assortativity}{ Newman, Mark E.J. 2002. -"Assortative Mxing in Networks". +"Assortative mixing in networks". \emph{Physical Review Letters}, 89(20): 208701. \doi{10.1103/physrevlett.89.208701} } @@ -146,7 +176,7 @@ Newman, Mark E.J. 2002. \subsection{On spatial autocorrelation}{ Moran, Patrick Alfred Pierce. 1950. -"Notes on Continuous Stochastic Phenomena". +"Notes on continuous stochastic phenomena". \emph{Biometrika} 37(1): 17-23. \doi{10.2307/2332142} } diff --git a/tests/testthat/test-measure_heterogeneity.R b/tests/testthat/test-measure_heterogeneity.R index 0c121fd3..e55ddd5c 100644 --- a/tests/testthat/test-measure_heterogeneity.R +++ b/tests/testthat/test-measure_heterogeneity.R @@ -1,9 +1,9 @@ #*************** Test the heterogeneity family of functions ******************# -test_that("diversity function works", { +test_that("variety functions works", { expect_equal(as.numeric(net_diversity(ison_marvel_relationships, "Gender")), 0.306, tolerance = 0.001) - expect_equal(as.numeric(net_diversity(ison_marvel_relationships, "Gender", "Rich")), - c(0.3367,0.1653), tolerance = 0.001) + expect_equal(top3(node_diversity(ison_lawfirm, "gender")), + c(0.285, 0.375,0), tolerance = 0.01) }) test_that("heterophily function works", { @@ -26,11 +26,6 @@ test_that("richness function works", { expect_s3_class(node_richness(ison_networkers, "type"), "node_measure") }) -test_that("node_diversity works", { - expect_equal(top3(node_diversity(ison_lawfirm, "gender")), - c(0.285, 0.375,0), tolerance = 0.01) -}) - test_that("net_spatial works", { expect_values(net_spatial(ison_lawfirm, "age"), 0.126) }) \ No newline at end of file From 79c35aefb252b91f637f81d7aaf08a4c3bc84c0e Mon Sep 17 00:00:00 2001 From: James Hollway Date: Thu, 6 Nov 2025 21:06:29 +0100 Subject: [PATCH 05/25] Improved vector printing to be more succinct and suggest `print_all()` --- NEWS.md | 6 ++++++ R/class_marks.R | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 7d42993e..84868d19 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# manynet 1.6.8 + +## Making + +- Improved vector printing to be more succinct and suggest `print_all()` + # manynet 1.6.7 ## Making diff --git a/R/class_marks.R b/R/class_marks.R index 3dda4091..a690066f 100644 --- a/R/class_marks.R +++ b/R/class_marks.R @@ -55,9 +55,9 @@ print_tblvec <- function(y, names, n){ body <- pillar::tbl_format_body(tibs, setup)[c(TRUE, FALSE, TRUE)] if(setup$extra_cols_total > 0){ print(body) - cat(pillar::style_subtle(paste("# ... with", + cat(pillar::style_subtle(paste("# ... and", setup$extra_cols_total, - "more values from this nodeset unprinted.", - "Use `print(..., n = Inf)` to print all values."))) + "more values from this nodeset.", + "Use `print_all(...)` to print all values."))) } else print(body) } From 7c90f206e85701620f68d6607a5943aa8119f83a Mon Sep 17 00:00:00 2001 From: James Hollway Date: Thu, 6 Nov 2025 21:09:57 +0100 Subject: [PATCH 06/25] Added references for richness --- R/measure_heterogeneity.R | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/R/measure_heterogeneity.R b/R/measure_heterogeneity.R index 7cf972c1..f55d3675 100644 --- a/R/measure_heterogeneity.R +++ b/R/measure_heterogeneity.R @@ -34,7 +34,16 @@ #' @family measures NULL -#' @rdname measure_heterogeneity +#' @rdname measure_heterogeneity +#' @section Richness: +#' Richness is a simple count of the number of different categories +#' present for a given attribute. +#' @references +#' ## On richness +#' Magurran, Anne E. 1988. +#' _Ecological Diversity and Its Measurement_. +#' Princeton: Princeton University Press. +#' \doi{10.1007/978-94-015-7358-0} #' @examples #' net_richness(ison_networkers) #' @export From 7482ac65c38e496f1252384b2d4a310a2d5c7932 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Thu, 6 Nov 2025 21:10:24 +0100 Subject: [PATCH 07/25] Updated documentation --- R/measure_heterogeneity.R | 31 +++++++++++++++++-- man/measure_heterogeneity.Rd | 58 ++++++++++++++++++++++++++++++++---- 2 files changed, 82 insertions(+), 7 deletions(-) diff --git a/R/measure_heterogeneity.R b/R/measure_heterogeneity.R index f55d3675..3c22aa73 100644 --- a/R/measure_heterogeneity.R +++ b/R/measure_heterogeneity.R @@ -66,7 +66,7 @@ node_richness <- function(.data, attribute){ } #' @rdname measure_heterogeneity -#' @section net_diversity: +#' @section Diversity: #' Blau's index (1977) uses a formula known also in other disciplines #' by other names #' (Gini-Simpson Index, Gini impurity, Gini's diversity index, @@ -93,6 +93,33 @@ node_richness <- function(.data, attribute){ #' It thus shares similar properties to Blau's index, #' but includes also a notion of richness that tends to give more weight to #' rare categories and thus tends to highlight imbalances more. +#' +#' The coefficient of variation (CV) is a standardised measure of dispersion +#' of a probability distribution or frequency distribution. +#' It is defined as the ratio of the standard deviation \eqn{\sigma} +#' to the mean \eqn{\mu}: +#' \deqn{CV = \frac{\sigma}{\mu}} +#' It is often expressed as a percentage. +#' The CV is useful because the standard deviation of data must always be understood +#' in the context of the mean of the data. +#' The CV is particularly useful when comparing the degree of variation +#' from one data series to another, +#' even if the means are drastically different from each other. +#' +#' The Gini coefficient is a measure of statistical dispersion +#' that is intended to represent the income or wealth distribution +#' of a nation's residents, +#' and is commonly used as a measure of inequality. +#' It is defined as a ratio with values between 0 and 1, +#' where 0 corresponds with perfect equality +#' (everyone has the same income) and 1 corresponds with perfect inequality +#' (one person has all the income, and everyone else has zero income). +#' The Gini coefficient can be calculated from the Lorenz curve, +#' which plots the proportion of the total income of the population +#' that is cumulatively earned by the bottom x% of the population. +#' The Gini coefficient is defined as the area between the line of equality +#' and the Lorenz curve, +#' divided by the total area under the line of equality. #' @references #' ## On diversity #' Blau, Peter M. 1977. @@ -183,7 +210,7 @@ node_diversity <- function(.data, attribute, } #' @rdname measure_heterogeneity -#' @section net_heterophily: +#' @section Homophily: #' Given a partition of a network into a number of mutually exclusive groups then #' The E-I index is the number of ties between (or _external_) nodes #' grouped in some mutually exclusive categories diff --git a/man/measure_heterogeneity.Rd b/man/measure_heterogeneity.Rd index 88d64855..af68a0e5 100644 --- a/man/measure_heterogeneity.Rd +++ b/man/measure_heterogeneity.Rd @@ -8,6 +8,7 @@ \alias{node_diversity} \alias{net_heterophily} \alias{node_heterophily} +\alias{net_homophily} \alias{node_homophily} \alias{net_assortativity} \alias{net_spatial} @@ -23,13 +24,19 @@ net_diversity( method = c("blau", "teachman", "variation", "gini") ) -node_diversity(.data, attribute, method = c("blau", "teachman", "cv", "gini")) +node_diversity( + .data, + attribute, + method = c("blau", "teachman", "variation", "gini") +) net_heterophily(.data, attribute) node_heterophily(.data, attribute) -node_homophily(.data, attribute) +net_homophily(.data, attribute, method = c("ie", "ei", "yule", "geary")) + +node_homophily(.data, attribute, method = c("ie", "ei", "yule", "geary")) net_assortativity(.data) @@ -77,7 +84,13 @@ of nodes with the same attribute. (global Moran's I) in a network. } } -\section{net_diversity}{ +\section{Richness}{ + +Richness is a simple count of the number of different categories +present for a given attribute. +} + +\section{Diversity}{ Blau's index (1977) uses a formula known also in other disciplines by other names @@ -105,9 +118,36 @@ whether nodes can be evenly distributed across categories. It thus shares similar properties to Blau's index, but includes also a notion of richness that tends to give more weight to rare categories and thus tends to highlight imbalances more. + +The coefficient of variation (CV) is a standardised measure of dispersion +of a probability distribution or frequency distribution. +It is defined as the ratio of the standard deviation \eqn{\sigma} +to the mean \eqn{\mu}: +\deqn{CV = \frac{\sigma}{\mu}} +It is often expressed as a percentage. +The CV is useful because the standard deviation of data must always be understood +in the context of the mean of the data. +The CV is particularly useful when comparing the degree of variation +from one data series to another, +even if the means are drastically different from each other. + +The Gini coefficient is a measure of statistical dispersion +that is intended to represent the income or wealth distribution +of a nation's residents, +and is commonly used as a measure of inequality. +It is defined as a ratio with values between 0 and 1, +where 0 corresponds with perfect equality +(everyone has the same income) and 1 corresponds with perfect inequality +(one person has all the income, and everyone else has zero income). +The Gini coefficient can be calculated from the Lorenz curve, +which plots the proportion of the total income of the population +that is cumulatively earned by the bottom x\% of the population. +The Gini coefficient is defined as the area between the line of equality +and the Lorenz curve, +divided by the total area under the line of equality. } -\section{net_heterophily}{ +\section{Homophily}{ Given a partition of a network into a number of mutually exclusive groups then The E-I index is the number of ties between (or \emph{external}) nodes @@ -130,11 +170,19 @@ net_heterophily(marvel_friends, "Gender") net_heterophily(marvel_friends, "Attractive") node_heterophily(marvel_friends, "Gender") node_heterophily(marvel_friends, "Attractive") -node_homophily(marvel_friends, "Gender") +net_homophily(marvel_friends, "Gender") net_assortativity(ison_networkers) net_spatial(ison_lawfirm, "age") } \references{ +\subsection{On richness}{ + +Magurran, Anne E. 1988. +\emph{Ecological Diversity and Its Measurement}. +Princeton: Princeton University Press. +\doi{10.1007/978-94-015-7358-0} +} + \subsection{On diversity}{ Blau, Peter M. 1977. From 58eb04a28355b30060ca4ad08e13c0e091ffe2ac Mon Sep 17 00:00:00 2001 From: James Hollway Date: Thu, 6 Nov 2025 21:15:06 +0100 Subject: [PATCH 08/25] Added `net_homophily()` and `node_homophily()` for measuring homophily according to different methods (closes #115) --- DESCRIPTION | 4 +- NAMESPACE | 2 + NEWS.md | 12 ++++ R/measure_heterogeneity.R | 125 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 141 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 406791b3..c035f015 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: manynet Title: Many Ways to Make, Modify, Mark, and Measure Myriad Networks -Version: 1.6.7 -Date: 2025-11-05 +Version: 1.6.8 +Date: 2025-11-07 Description: Many tools for making, modifying, marking, measuring, and motifs and memberships of many different types of networks. All functions operate with matrices, edge lists, and 'igraph', 'network', and 'tidygraph' objects, diff --git a/NAMESPACE b/NAMESPACE index 07bd5079..d5d6004c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -446,6 +446,7 @@ export(net_factions) export(net_harmonic) export(net_hazard) export(net_heterophily) +export(net_homophily) export(net_immunity) export(net_indegree) export(net_independence) @@ -571,6 +572,7 @@ export(node_fluid) export(node_harmonic) export(node_heterophily) export(node_hierarchy) +export(node_homophily) export(node_hub) export(node_in_adopter) export(node_in_automorphic) diff --git a/NEWS.md b/NEWS.md index 84868d19..82b0d1a1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,18 @@ - Improved vector printing to be more succinct and suggest `print_all()` +## Measuring + +- Improved `net_diversity()` + - No longer offers a cluster option, which can be obtained using `over_membership()` + - Added more methods for calculating diversity, including Teachman's, coefficient of variation, and the Gini coefficient +- Improved `net_diversity()` and `node_diversity()` to use and declare methods appropriate for the vector of attributes +- Updated documentation for richness, diversity, and homophily measures +- Added `net_homophily()` and `node_homophily()` for measuring homophily according to different methods including + - Krackhardt's EI index of heterophily as well as its inverse as a measure of homophily + - Yule's Q as a further measure of homophily + - Geary's C as a measure of homophily for numeric attributes + # manynet 1.6.7 ## Making diff --git a/R/measure_heterogeneity.R b/R/measure_heterogeneity.R index 3c22aa73..bb7adc88 100644 --- a/R/measure_heterogeneity.R +++ b/R/measure_heterogeneity.R @@ -275,6 +275,131 @@ node_heterophily <- function(.data, attribute){ make_node_measure(ei, .data) } +#' @rdname measure_heterogeneity +#' @examples +#' net_homophily(marvel_friends, "Gender") +#' @export +net_homophily <- function(.data, attribute, + method = c("ie","ei","yule","geary")){ + if(missing(.data)) {expect_nodes(); .data <- .G()} # nocov + # mode <- attr_mode(.data, attribute) + # if(is_twomode(.data) && !is.null(mode)){ + # if(mode){ + # snet_info("Attribute only present on second mode of two-mode network.") + # snet_info("Calculating homophily on first mode instead.") + # attribute <- manynet::node_attribute(.data, attribute)[ + # !manynet::node_is_mode(.data)] + # } else { + # snet_info("Attribute only present on first mode of two-mode network.") + # snet_info("Calculating homophily on second mode instead.") + # attribute <- manynet::node_attribute(.data, attribute)[ + # manynet::node_is_mode(.data)] + # } + # .data <- manynet::to_mode(.data, mode = mode) + # } + if (length(attribute) == 1 && is.character(attribute)) { + attribute <- manynet::node_attribute(.data, attribute) + } + method <- match.arg(method) + if(is.numeric(attribute) && method %in% c("ie","ei","yule")){ + snet_info("{.val {method}} index is not appropriate for numeric attributes.") + snet_info("Using {.val geary}'s C instead.") + method <- "geary" + } + if(!is.numeric(attribute) && method %in% c("geary")){ + snet_info("{.val {method}} index is not appropriate for categorical attributes.") + snet_info("Using {.val ie} index instead.") + method <- "ie" + } + + m <- manynet::as_matrix(to_unweighted(.data)) + + ei <- function(m, attribute){ + same <- outer(attribute, attribute, "==") + nInternal <- sum(m * same, na.rm = TRUE) + nExternal <- sum(m, na.rm = TRUE) - nInternal + (nExternal - nInternal) / sum(m, na.rm = TRUE) + } + + yule <- function(m, attribute){ + same <- outer(attribute, attribute, "==") + a <- sum(m * same, na.rm = TRUE) + b <- sum(m * (!same), na.rm = TRUE) + c <- sum((1 - m) * same, na.rm = TRUE) + d <- sum((1 - m) * (!same), na.rm = TRUE) + (a*d - b*c)/(a*d + b*c) + } + + geary <- function(m, attribute){ + # identify valid nodes + valid <- !is.na(attribute) + attr_valid <- attribute[valid] + m_valid <- m[valid, valid, drop = FALSE] + + # recompute n and mean on valid nodes + n <- length(attr_valid) + xbar <- mean(attr_valid, na.rm = TRUE) + + # pairwise squared differences + diffsq <- (outer(attr_valid, attr_valid, "-"))^2 + + # weight sum + W <- sum(m_valid, na.rm = TRUE) + + # numerator and denominator + num <- (n - 1) * sum(m_valid * diffsq, na.rm = TRUE) + den <- 2 * W * sum((attr_valid - xbar)^2, na.rm = TRUE) + + if (den == 0) return(NA_real_) + num / den + } + + res <- switch(match.arg(method), + ie = -ei(m, attribute), + ei = ei(m, attribute), + yule = yule(m, attribute), + geary = geary(m, attribute)) + + make_network_measure(res, .data, call = deparse(sys.call())) +} + + +attr_mode <- function(.data, attribute){ + if(is_twomode(.data)){ + miss <- is.na(node_attribute(.data, attribute)) + mode <- node_is_mode(.data) + if(all(miss[mode])) return(FALSE) # attribute only on first (FALSE) mode + if(all(miss[!mode])) return(TRUE) # attribute only on second (TRUE) mode + } else NULL +} + +#' @rdname measure_heterogeneity +#' @export +node_homophily <- function(.data, attribute, + method = c("ie","ei","yule","geary")){ + if(missing(.data)) {expect_nodes(); .data <- .G()} # nocov + if (length(attribute) == 1 && is.character(attribute)) { + attribute <- manynet::node_attribute(.data, attribute) + } + method <- match.arg(method) + if(is.numeric(attribute) && method %in% c("ie","ei","yule")){ + snet_info("{.val {method}} index is not appropriate for numeric attributes.") + snet_info("Using {.val geary}'s C instead.") + method <- "geary" + } + if(!is.numeric(attribute) && method %in% c("geary")){ + snet_info("{.val {method}} index is not appropriate for categorical attributes.") + snet_info("Using {.val ie} index instead.") + method <- "ie" + } + out <- vapply(igraph::ego(manynet::as_igraph(.data)), + function(x) net_homophily( + igraph::induced_subgraph(manynet::as_igraph(.data), x), + attribute, method = method), + FUN.VALUE = numeric(1)) + make_node_measure(out, .data) +} + #' @rdname measure_heterogeneity #' @importFrom igraph assortativity_degree #' @references From 78940ac7509423ff64fe26fcd67076c1a6cdab66 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Fri, 7 Nov 2025 17:03:52 +0100 Subject: [PATCH 09/25] Fixed sd namespace issue --- R/measure_heterogeneity.R | 2 +- tests/testthat/test-measure_heterogeneity.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/measure_heterogeneity.R b/R/measure_heterogeneity.R index bb7adc88..6f867eb0 100644 --- a/R/measure_heterogeneity.R +++ b/R/measure_heterogeneity.R @@ -149,7 +149,7 @@ net_diversity <- function(.data, attribute, -sum(p * log(p)) } cv <- function(values) { - sd(values, na.rm = TRUE) / mean(values, na.rm = TRUE) } + stats::sd(values, na.rm = TRUE) / mean(values, na.rm = TRUE) } gini <- function(values) { x <- sort(values) n <- length(x) diff --git a/tests/testthat/test-measure_heterogeneity.R b/tests/testthat/test-measure_heterogeneity.R index e55ddd5c..52478d55 100644 --- a/tests/testthat/test-measure_heterogeneity.R +++ b/tests/testthat/test-measure_heterogeneity.R @@ -1,6 +1,6 @@ #*************** Test the heterogeneity family of functions ******************# -test_that("variety functions works", { +test_that("diversity functions works", { expect_equal(as.numeric(net_diversity(ison_marvel_relationships, "Gender")), 0.306, tolerance = 0.001) expect_equal(top3(node_diversity(ison_lawfirm, "gender")), c(0.285, 0.375,0), tolerance = 0.01) From 6cfbff2c15e74a1fe7cea39a35f0d548e0c7859e Mon Sep 17 00:00:00 2001 From: James Hollway Date: Wed, 12 Nov 2025 11:17:48 +0100 Subject: [PATCH 10/25] Fixed description of to_directed() and to_redirected() in data tutorial --- inst/tutorials/tutorial1/data.Rmd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inst/tutorials/tutorial1/data.Rmd b/inst/tutorials/tutorial1/data.Rmd index 2ce2c3fa..9cc8f8f4 100644 --- a/inst/tutorials/tutorial1/data.Rmd +++ b/inst/tutorials/tutorial1/data.Rmd @@ -592,7 +592,8 @@ For example: - `to_unnamed()` removes/anonymises all vertex/node labels - `to_named()` adds some random (U.S.) childrens' names, which can be useful for identifying particular nodes - `to_undirected()` replaces directed ties with an undirected tie (if an arc in either direction is present) -- `to_redirected()` replaces undirected ties with directed ties (arcs) or, if already directed, swaps arcs' direction +- `to_directed()` replaces undirected ties with directed ties (arcs) or, if already directed, does nothing +- `to_redirected()` swaps the direction of directed ties (arcs) or, if undirected, does nothing - `to_unweighted()` binarises or dichotomises a network around a particular threshold (by default `1`) - `to_unsigned()` returns just the "positive" or "negative" ties from a signed network, respectively - `to_uniplex()` reduces a multigraph or multiplex network to one with a single set of edges or ties From ec146001a621444ffee91df89307f13af4f88363 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Mon, 17 Nov 2025 21:03:09 +0100 Subject: [PATCH 11/25] Fixed node_is_latent(), node_is_infected(), and node_is_recovered() to work with new play_diffusion() networks --- R/manynet-utils.R | 3 ++- R/mark_nodes.R | 51 +++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/R/manynet-utils.R b/R/manynet-utils.R index 8252b1ac..8e1cd337 100644 --- a/R/manynet-utils.R +++ b/R/manynet-utils.R @@ -6,7 +6,8 @@ utils::globalVariables(c(".data", "obs", "nodes","event","exposure", "student","students","colleges", "node","value","var","active","time", - "A","B","C","D")) + "A","B","C","D", + "n")) # Helper function for declaring available methods available_methods <- function(fun_vctr) { diff --git a/R/mark_nodes.R b/R/mark_nodes.R index 0f3d6f58..7173e606 100644 --- a/R/mark_nodes.R +++ b/R/mark_nodes.R @@ -202,8 +202,21 @@ NULL #' node_is_latent(play_diffusion(create_tree(6), latency = 1), time = 1) #' @export node_is_latent <- function(.data, time = 0){ - if(inherits(.data, "diff_model")){ - event <- nodes <- n <- NULL + if(is_changing(.data)){ + t <- time + latent <- as_changelist(.data) %>% + dplyr::filter(time <= t & value %in% c("E", "I")) %>% + dplyr::group_by(node) %>% + dplyr::mutate(n = dplyr::n()) %>% + dplyr::filter(n == 1 & value == "E") + if (is_labelled(.data)) { + out <- seq_len(net_nodes(.data)) %in% latent$node + names(out) <- node_names(.data) + } else { + out <- seq_len(net_nodes(.data)) %in% latent$node + } + make_node_mark(out, .data) + } else if(inherits(.data, "diff_model")){ latent <- summary(.data) %>% dplyr::filter(t <= time & event %in% c("E", "I")) %>% group_by(nodes) %>% @@ -231,8 +244,21 @@ node_is_latent <- function(.data, time = 0){ #' node_is_infected(play_diffusion(create_tree(6)), time = 1) #' @export node_is_infected <- function(.data, time = 0) { - if(inherits(.data, "diff_model")){ - event <- nodes <- n <- NULL + if(is_changing(.data)){ + t <- time + infected <- as_changelist(.data) %>% + dplyr::filter(time <= t & value %in% c("I", "R")) %>% + dplyr::group_by(node) %>% + dplyr::mutate(n = dplyr::n()) %>% + dplyr::filter(n == 1 & value == "I") + if (is_labelled(.data)) { + out <- seq_len(net_nodes(.data)) %in% infected$node + names(out) <- node_names(.data) + } else { + out <- seq_len(net_nodes(.data)) %in% infected$node + } + make_node_mark(out, .data) + } else if(inherits(.data, "diff_model")){ infected <- summary(.data) %>% dplyr::filter(t <= time & event %in% c("I", "R")) %>% group_by(nodes) %>% @@ -259,8 +285,21 @@ node_is_infected <- function(.data, time = 0) { #' node_is_recovered(play_diffusion(create_tree(6), recovery = 0.5), time = 3) #' @export node_is_recovered <- function(.data, time = 0){ - if(inherits(.data, "diff_model")){ - event <- nodes <- n <- NULL + if(is_changing(.data)){ + t <- time + recovered <- as_changelist(.data) %>% + dplyr::filter(time <= t & value %in% c("R")) %>% + dplyr::group_by(node) %>% + dplyr::mutate(n = dplyr::n()) %>% + dplyr::filter(n == 1 & value == "R") + if (is_labelled(.data)) { + out <- seq_len(net_nodes(.data)) %in% recovered$node + names(out) <- node_names(.data) + } else { + out <- seq_len(net_nodes(.data)) %in% recovered$node + } + make_node_mark(out, .data) + } else if(inherits(.data, "diff_model")){ recovered <- summary(.data) %>% dplyr::filter(t <= time & event == "R") %>% group_by(nodes) %>% From 1bf0de0bd6fea9b40f34b9c75883c64d4d10f651 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Mon, 17 Nov 2025 21:03:56 +0100 Subject: [PATCH 12/25] Updated node_is_exposed() to work with changing data, now also accepts a time argument --- R/mark_nodes.R | 18 +++++++++++------- man/mark_diff.Rd | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/R/mark_nodes.R b/R/mark_nodes.R index 7173e606..3fd359f4 100644 --- a/R/mark_nodes.R +++ b/R/mark_nodes.R @@ -339,13 +339,17 @@ node_is_recovered <- function(.data, time = 0){ #' (expos <- node_is_exposed(manynet::create_tree(14), mark = c(1,3))) #' which(expos) #' @export -node_is_exposed <- function(.data, mark){ - event <- nodes <- NULL - if (missing(mark) && inherits(.data, "diff_model")){ - mark <- summary(.data) %>% - dplyr::filter(t == 0 & event == "I") %>% - dplyr::select(nodes) %>% unlist() - .data <- attr(.data, "network") +node_is_exposed <- function(.data, mark, time = 0){ + if (missing(mark)){ + if(is_changing(.data)){ + t <- time + return(make_node_mark(node_exposure(.data, time = t)>0, .data)) + } else if(inherits(.data, "diff_model")){ + mark <- summary(.data) %>% + dplyr::filter(t == 0 & event == "I") %>% + dplyr::select(nodes) %>% unlist() + .data <- attr(.data, "network") + } } if(is.logical(mark)) mark <- which(mark) out <- rep(F, manynet::net_nodes(.data)) diff --git a/man/mark_diff.Rd b/man/mark_diff.Rd index c52dd719..64e46f80 100644 --- a/man/mark_diff.Rd +++ b/man/mark_diff.Rd @@ -14,7 +14,7 @@ node_is_infected(.data, time = 0) node_is_recovered(.data, time = 0) -node_is_exposed(.data, mark) +node_is_exposed(.data, mark, time = 0) } \arguments{ \item{.data}{An object of a manynet-consistent class: From eebc7d4b3dc1bd95d7d2d72c63ced433947c4cbd Mon Sep 17 00:00:00 2001 From: James Hollway Date: Mon, 17 Nov 2025 21:04:50 +0100 Subject: [PATCH 13/25] Fixed net_diversity() referencing "cv" internally instead of "variation" --- R/measure_heterogeneity.R | 4 ++-- man/measure_heterogeneity.Rd | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/R/measure_heterogeneity.R b/R/measure_heterogeneity.R index 6f867eb0..17fbcc96 100644 --- a/R/measure_heterogeneity.R +++ b/R/measure_heterogeneity.R @@ -138,7 +138,7 @@ node_richness <- function(.data, attribute){ #' @examples #' marvel_friends <- to_unsigned(ison_marvel_relationships, "positive") #' net_diversity(marvel_friends, "Gender") -#' net_diversity(marvel_friends, "Attractive") +#' net_diversity(marvel_friends, "Appearances") #' @export net_diversity <- function(.data, attribute, method = c("blau","teachman","variation","gini")){ @@ -162,7 +162,7 @@ net_diversity <- function(.data, attribute, snet_info("{.val {method}} index is not appropriate for numeric attributes.") snet_info("Using {.val variation} coefficient instead", "({.val gini} coefficient also available).") - method <- "cv" + method <- "variation" } if(is.character(attr) && method %in% c("variation","gini")){ snet_info("{.val {method}} coefficient is not appropriate for categorical attributes.") diff --git a/man/measure_heterogeneity.Rd b/man/measure_heterogeneity.Rd index af68a0e5..bd694bd9 100644 --- a/man/measure_heterogeneity.Rd +++ b/man/measure_heterogeneity.Rd @@ -163,7 +163,9 @@ net_richness(ison_networkers) node_richness(ison_networkers, "Discipline") marvel_friends <- to_unsigned(ison_marvel_relationships, "positive") net_diversity(marvel_friends, "Gender") -net_diversity(marvel_friends, "Attractive") +marvel_friends <- mutate_nodes(marvel_friends, + Attractive = ifelse(Attractive==1, "Attractive", "Not Attractive")) +net_diversity(marvel_friends, "Appearances") node_diversity(marvel_friends, "Gender") node_diversity(marvel_friends, "Attractive") net_heterophily(marvel_friends, "Gender") From 942540c1f7ae13e51570bb5195b316e47850dfa8 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Tue, 18 Nov 2025 08:40:02 +0100 Subject: [PATCH 14/25] Improved mnet printing to describe changing networks correctly --- DESCRIPTION | 2 +- R/class_networks.R | 5 +++-- man/measure_heterogeneity.Rd | 2 -- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c035f015..61ef452d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: manynet Title: Many Ways to Make, Modify, Mark, and Measure Myriad Networks Version: 1.6.8 -Date: 2025-11-07 +Date: 2025-11-18 Description: Many tools for making, modifying, marking, measuring, and motifs and memberships of many different types of networks. All functions operate with matrices, edge lists, and 'igraph', 'network', and 'tidygraph' objects, diff --git a/R/class_networks.R b/R/class_networks.R index a82f11bb..2d4448e2 100644 --- a/R/class_networks.R +++ b/R/class_networks.R @@ -180,9 +180,10 @@ describe_ties <- function(x){ #' @export describe_changes <- function(x){ if(is_longitudinal(x)){ - paste(" over", max(tie_attribute(x, "wave")), "waves") + waves <- tie_attribute(x, "wave") + if(is.null(waves)) waves <- as_changelist(x)$time + paste(" over", max(waves), "waves") } else if (is_dynamic(x)){ - if("time" %in% net_tie_attributes(x)){ paste(" from", min(tie_attribute(x, "time"), na.rm = TRUE), "to", max(tie_attribute(x, "time"), na.rm = TRUE)) diff --git a/man/measure_heterogeneity.Rd b/man/measure_heterogeneity.Rd index bd694bd9..c575dc3c 100644 --- a/man/measure_heterogeneity.Rd +++ b/man/measure_heterogeneity.Rd @@ -163,8 +163,6 @@ net_richness(ison_networkers) node_richness(ison_networkers, "Discipline") marvel_friends <- to_unsigned(ison_marvel_relationships, "positive") net_diversity(marvel_friends, "Gender") -marvel_friends <- mutate_nodes(marvel_friends, - Attractive = ifelse(Attractive==1, "Attractive", "Not Attractive")) net_diversity(marvel_friends, "Appearances") node_diversity(marvel_friends, "Gender") node_diversity(marvel_friends, "Attractive") From d06c0f159e98fa301f4de71ab31664a6bde09402 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Tue, 18 Nov 2025 14:13:14 +0100 Subject: [PATCH 15/25] Added delete_changes() for removing all changes from an mnet object --- NAMESPACE | 1 + R/manip_nodes.R | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index d5d6004c..8bc24682 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -353,6 +353,7 @@ export(create_star) export(create_tree) export(create_wheel) export(create_windmill) +export(delete_changes) export(delete_nodes) export(delete_ties) export(describe_changes) diff --git a/R/manip_nodes.R b/R/manip_nodes.R index 3dafb6f6..d36df743 100644 --- a/R/manip_nodes.R +++ b/R/manip_nodes.R @@ -327,6 +327,12 @@ add_changes <- function(.data, changes){ .data %>% mutate_nodes(diffusion = "S") } +#' @rdname manip_changes +#' @export +delete_changes <- function(.data){ + igraph::delete_graph_attr(.data, "changes") +} + #' @rdname manip_changes #' @export mutate_changes <- function(.data, ...) UseMethod("mutate_changes") From 61002343b7f7f26ba0c9f0b9f1858106130d0782 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Tue, 18 Nov 2025 14:13:49 +0100 Subject: [PATCH 16/25] to_waves() now splits changing networks --- R/manip_split.R | 52 ++++++++++++++++++++++++++++++++++---------- man/manip_changes.Rd | 3 +++ 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/R/manip_split.R b/R/manip_split.R index b0c0fe67..e5f4ba0e 100644 --- a/R/manip_split.R +++ b/R/manip_split.R @@ -182,19 +182,49 @@ to_waves <- function(.data, attribute = "wave", panels = NULL, #' @export to_waves.tbl_graph <- function(.data, attribute = "wave", panels = NULL, cumulative = FALSE) { - wp <- unique(tie_attribute(.data, attribute)) - if(!is.null(panels)) - wp <- intersect(panels, wp) - if(length(wp) > 1) { - out <- lapply(wp, function(l){ - filter_ties(.data, !!as.name(attribute) == l) + if(is_changing(.data)){ + cl <- as_changelist(.data) + if(!attribute %in% names(cl) && "time" %in% names(cl)){ + attribute <- "time" + } + # Get all unique times in order + times <- sort(unique(cl$time)) + + # Iterate over times + waves <- lapply(times, function(t) { + # Latest changes by time t + changes <- cl %>% + dplyr::filter(time <= t) %>% + dplyr::group_by(node) %>% + dplyr::summarise(var = var, + latest_value = value[which.max(time)], + .groups = "drop") + for(v in unique(changes$var)){ + upd <- rep(NA, net_nodes(.data)) + upd[changes[var = v,]$node] <- changes[var = v,]$latest_value + old <- node_attribute(.data, v) + out <- .data %>% mutate_nodes(!!v := dplyr::coalesce(upd, old)) + } + out <- delete_changes(out) + out }) - names(out) <- wp + names(waves) <- paste("Wave", times) + return(waves) } else { - out <- filter_ties(.data, !!as.name(attribute) == wp) - } - if (isTRUE(cumulative)) { - out <- cumulative_ties(out, attribute) + wp <- unique(tie_attribute(.data, attribute)) + if(!is.null(panels)) + wp <- intersect(panels, wp) + if(length(wp) > 1) { + out <- lapply(wp, function(l){ + filter_ties(.data, !!as.name(attribute) == l) + }) + names(out) <- wp + } else { + out <- filter_ties(.data, !!as.name(attribute) == wp) + } + if (isTRUE(cumulative)) { + out <- cumulative_ties(out, attribute) + } } out[order(names(out))] } diff --git a/man/manip_changes.Rd b/man/manip_changes.Rd index 4d99324a..a8bae34c 100644 --- a/man/manip_changes.Rd +++ b/man/manip_changes.Rd @@ -3,6 +3,7 @@ \name{manip_changes} \alias{manip_changes} \alias{add_changes} +\alias{delete_changes} \alias{mutate_changes} \alias{filter_changes} \alias{select_changes} @@ -12,6 +13,8 @@ \usage{ add_changes(.data, changes) +delete_changes(.data) + mutate_changes(.data, ...) filter_changes(.data, ..., .by = NULL) From 1524147459be63114ecd456808f84d21cf491a98 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Tue, 18 Nov 2025 22:15:47 +0100 Subject: [PATCH 17/25] Fixed as_diffusion() to not trim off final wave --- R/manip_as.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/manip_as.R b/R/manip_as.R index 8c463f01..70ebac18 100644 --- a/R/manip_as.R +++ b/R/manip_as.R @@ -1006,7 +1006,7 @@ as_diffusion.mnet <- function(.data, twomode = FALSE, events) { dplyr::reframe(I_new = sum(value == "I"), E_new = sum(value == "E"), R_new = sum(value == "R")) - report <- dplyr::tibble(time = seq_len(max(events$time)) - 1, + report <- dplyr::tibble(time = 0:max(events$time), n = nodes) %>% dplyr::left_join(sumchanges, by = dplyr::join_by(time)) report[is.na(report)] <- 0 From 9571cfbf95188deb5fdeabe69afcb4addb9b7eac Mon Sep 17 00:00:00 2001 From: James Hollway Date: Tue, 18 Nov 2025 22:16:22 +0100 Subject: [PATCH 18/25] Fixed is_longitudinal() to only test whether edges are in waves --- R/mark_net.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/mark_net.R b/R/mark_net.R index c1343561..0a9941d8 100644 --- a/R/mark_net.R +++ b/R/mark_net.R @@ -105,9 +105,10 @@ is_list <- function(.data) { is_longitudinal <- function(.data) { if(is_manynet(.data)) { ig <- as_igraph(.data) - catts <- names(igraph::graph_attr(ig, "changes")) + # catts <- names(igraph::graph_attr(ig, "changes")) tatts <- igraph::edge_attr_names(ig) - return("time" %in% catts | "wave" %in% tatts | "panel" %in% tatts) + return(#"time" %in% catts | + "wave" %in% tatts | "panel" %in% tatts) } else if(is_list(.data)){ all(lapply(.data, net_nodes)==net_nodes(.data[[1]])) } From 8ef599c3981fdc9b286c88888ef9a0f5cb5c8329 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Tue, 18 Nov 2025 22:16:51 +0100 Subject: [PATCH 19/25] net_infection_complete() example doesn't need recovery --- R/measure_diffusion.R | 2 +- man/measure_diffusion_infection.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/measure_diffusion.R b/R/measure_diffusion.R index 4e508810..7c2ffebe 100644 --- a/R/measure_diffusion.R +++ b/R/measure_diffusion.R @@ -215,7 +215,7 @@ net_immunity <- function(.data, normalized = TRUE){ #' @rdname measure_diffusion_infection #' @examples #' smeg <- generate_smallworld(15, 0.025) -#' smeg_diff <- play_diffusion(smeg, recovery = 0.2) +#' smeg_diff <- play_diffusion(smeg) #' net_infection_complete(smeg_diff) #' @export net_infection_complete <- function(.data){ diff --git a/man/measure_diffusion_infection.Rd b/man/measure_diffusion_infection.Rd index 387295ca..5998b0f7 100644 --- a/man/measure_diffusion_infection.Rd +++ b/man/measure_diffusion_infection.Rd @@ -42,7 +42,7 @@ highest infection rate is observed. } \examples{ smeg <- generate_smallworld(15, 0.025) - smeg_diff <- play_diffusion(smeg, recovery = 0.2) + smeg_diff <- play_diffusion(smeg) net_infection_complete(smeg_diff) net_infection_total(smeg_diff) net_infection_peak(smeg_diff) From dcda95412f80a3865d21689de7eaba0b8b98c4e5 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Tue, 18 Nov 2025 22:17:04 +0100 Subject: [PATCH 20/25] Fixed node_diversity() method assignment --- R/measure_heterogeneity.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/measure_heterogeneity.R b/R/measure_heterogeneity.R index 17fbcc96..7dce2973 100644 --- a/R/measure_heterogeneity.R +++ b/R/measure_heterogeneity.R @@ -193,7 +193,7 @@ node_diversity <- function(.data, attribute, snet_info("{.val {method}} index is not appropriate for numeric attributes.") snet_info("Using {.val variation} coefficient instead", "({.val gini} coefficient also available).") - method <- "cv" + method <- "variation" } if(is.character(attr) && method %in% c("variation","gini")){ snet_info("{.val {method}} coefficient is not appropriate for categorical attributes.") From 1c0ef51f9f45cc8fb2900a46b21d3a7b491ccf71 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Tue, 18 Nov 2025 22:17:44 +0100 Subject: [PATCH 21/25] Improved to_waves() to handle both changing and longitudinal data --- R/manip_split.R | 55 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/R/manip_split.R b/R/manip_split.R index e5f4ba0e..d560f49f 100644 --- a/R/manip_split.R +++ b/R/manip_split.R @@ -182,13 +182,46 @@ to_waves <- function(.data, attribute = "wave", panels = NULL, #' @export to_waves.tbl_graph <- function(.data, attribute = "wave", panels = NULL, cumulative = FALSE) { - if(is_changing(.data)){ + out <- NULL + if(is_changing(.data) && is_longitudinal(.data)){ + cl <- as_changelist(.data) + el <- as_edgelist(.data) + + # Get all unique times in order + times <- sort(unique(cl$time)) + if(!is.null(panels)) + times <- intersect(panels, times) + + waves <- lapply(times, function(t) { + # Latest changes by time t + changes <- cl %>% + dplyr::filter(time <= t) %>% + dplyr::group_by(node) %>% + dplyr::reframe(var = var, + latest_value = value[which.max(time)], + .groups = "drop") + for(v in unique(changes$var)){ + upd <- rep(NA, net_nodes(.data)) + upd[changes[var = v,]$node] <- changes[var = v,]$latest_value + old <- node_attribute(.data, v) + if(inherits(old, "logi")) old <- as.logical(old) + out <- add_node_attribute(.data, v, dplyr::coalesce(upd, old)) + } + out <- delete_changes(out) + out <- filter_ties(out, wave == t) + out + }) + names(waves) <- paste("Wave", times) + out <- waves + } else if(is_changing(.data)){ cl <- as_changelist(.data) if(!attribute %in% names(cl) && "time" %in% names(cl)){ attribute <- "time" } # Get all unique times in order times <- sort(unique(cl$time)) + if(!is.null(panels)) + times <- intersect(panels, times) # Iterate over times waves <- lapply(times, function(t) { @@ -196,21 +229,22 @@ to_waves.tbl_graph <- function(.data, attribute = "wave", panels = NULL, changes <- cl %>% dplyr::filter(time <= t) %>% dplyr::group_by(node) %>% - dplyr::summarise(var = var, + dplyr::reframe(var = var, latest_value = value[which.max(time)], .groups = "drop") for(v in unique(changes$var)){ upd <- rep(NA, net_nodes(.data)) upd[changes[var = v,]$node] <- changes[var = v,]$latest_value old <- node_attribute(.data, v) - out <- .data %>% mutate_nodes(!!v := dplyr::coalesce(upd, old)) + if(inherits(old, "logi")) old <- as.logical(old) + out <- add_node_attribute(.data, v, dplyr::coalesce(upd, old)) } out <- delete_changes(out) out }) names(waves) <- paste("Wave", times) - return(waves) - } else { + out <- waves + } else if(is_longitudinal(.data)){ wp <- unique(tie_attribute(.data, attribute)) if(!is.null(panels)) wp <- intersect(panels, wp) @@ -223,10 +257,11 @@ to_waves.tbl_graph <- function(.data, attribute = "wave", panels = NULL, out <- filter_ties(.data, !!as.name(attribute) == wp) } if (isTRUE(cumulative)) { - out <- cumulative_ties(out, attribute) + out <- .cumulative_ties(out, attribute) } + out <- out[order(names(out))] } - out[order(names(out))] + if(is.null(out)) .data else out } #' @export @@ -248,7 +283,7 @@ to_waves.data.frame <- function(.data, attribute = "wave", panels = NULL, out <- .data[,attribute == wp] } if (isTRUE(cumulative)) { - out <- cumulative_ties(out, attribute) + out <- .cumulative_ties(out, attribute) } out } @@ -270,12 +305,12 @@ to_waves.diff_model <- function(.data, attribute = "t", panels = NULL, Recovered = node_is_recovered(diff, time = k)) } if (isTRUE(cumulative)) { - out <- cumulative_ties(out, attribute) + out <- .cumulative_ties(out, attribute) } out } -cumulative_ties <- function(x, attribute) { +.cumulative_ties <- function(x, attribute) { edges <- to <- from <- NULL thisRequires("zoo") thisRequires("purrr") From bd05a079c7ce62480a8041dc87fdb909fbc9fd5e Mon Sep 17 00:00:00 2001 From: James Hollway Date: Wed, 19 Nov 2025 10:15:41 +0100 Subject: [PATCH 22/25] Fixed as_diffusion.mnet() to return correct E and R compartments --- R/class_models.R | 1 + R/manip_as.R | 28 +++++++++++++++------------- R/measure_diffusion.R | 4 ++-- man/measure_diffusion_node.Rd | 4 ++-- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/R/class_models.R b/R/class_models.R index 52c85980..35d0b2a7 100644 --- a/R/class_models.R +++ b/R/class_models.R @@ -17,6 +17,7 @@ print.diff_model <- function(x, ..., verbose = FALSE){ if(!verbose){ x$n <- NULL x$s <- NULL + x$S_new <- NULL x$I_new <- NULL x$E_new <- NULL x$R_new <- NULL diff --git a/R/manip_as.R b/R/manip_as.R index 70ebac18..e5e54a04 100644 --- a/R/manip_as.R +++ b/R/manip_as.R @@ -1001,24 +1001,26 @@ as_diffusion.diff_model <- function(.data, twomode = FALSE, events) { #' @export as_diffusion.mnet <- function(.data, twomode = FALSE, events) { events <- as_changelist(.data) - nodes <- net_nodes(.data) + nodes <- c(net_nodes(.data)) sumchanges <- events %>% dplyr::group_by(time) %>% - dplyr::reframe(I_new = sum(value == "I"), + dplyr::reframe(S_new = sum(value == "S"), E_new = sum(value == "E"), + I_new = sum(value == "I"), R_new = sum(value == "R")) report <- dplyr::tibble(time = 0:max(events$time), n = nodes) %>% dplyr::left_join(sumchanges, by = dplyr::join_by(time)) report[is.na(report)] <- 0 - report$R <- cumsum(report$R_new) - report$I <- cumsum(report$I_new) - report$R - report$E <- ifelse(report$E_new == 0 & - cumsum(report$E_new) == max(cumsum(report$E_new)), - report$E_new, cumsum(report$E_new)) - report$E <- ifelse(report$R + report$I + report$E > report$n, - report$n - (report$R + report$I), - report$E) - report$S <- report$n - report$R - report$I - report$E + + if(all(report$E_new == 0)){ + report$S = report$n + cumsum(report$S_new - report$I_new) + report$E = rep(0, nrow(report)) + } else { + report$S = report$n + cumsum(report$S_new - report$E_new) # susceptible decreases as they become exposed + report$E = cumsum(report$E_new) - cumsum(report$I_new) # exposed become infectious + } + report$I = cumsum(report$I_new) - cumsum(report$R_new) # infectious recover + report$R = cumsum(report$R_new) - cumsum(report$S_new) # recovered accumulate report$s <- vapply(report$time, function(t){ twin <- dplyr::filter(events, events$time <= t) infected <- dplyr::filter(twin, twin$value == "I")$node @@ -1028,11 +1030,11 @@ as_diffusion.mnet <- function(.data, twomode = FALSE, events) { expos[recovered] <- F sum(expos) }, numeric(1) ) - if (any(report$R + report$I + report$E + report$S != report$n)) { + if (any((report$R + report$I + report$E + report$S) != report$n)) { snet_abort("Oops, something is wrong") } report <- dplyr::select(report, - dplyr::any_of(c("time", "n", "S", "s", "E", "E_new", + dplyr::any_of(c("time", "n", "S", "s", "S_new", "E", "E_new", "I", "I_new", "R", "R_new"))) # make_diff_model(events, report, .data) class(report) <- c("diff_model", class(report)) diff --git a/R/measure_diffusion.R b/R/measure_diffusion.R index 7c2ffebe..a0558062 100644 --- a/R/measure_diffusion.R +++ b/R/measure_diffusion.R @@ -426,8 +426,8 @@ node_thresholds <- function(.data, normalized = TRUE, lag = 1){ } #' @rdname measure_diffusion_node -#' @section Infection length: -#' `node_infection_length()` measures the average length of time that nodes +#' @section Recovery: +#' `node_recovery()` measures the average length of time that nodes #' that become infected remain infected in a compartmental model with recovery. #' Infections that are not concluded by the end of the study period are #' calculated as infinite. diff --git a/man/measure_diffusion_node.Rd b/man/measure_diffusion_node.Rd index 4178db22..60755615 100644 --- a/man/measure_diffusion_node.Rd +++ b/man/measure_diffusion_node.Rd @@ -79,9 +79,9 @@ That is, \eqn{theta} is now a proportion, and works regardless of whether \eqn{w} is weighted or not. } -\section{Infection length}{ +\section{Recovery}{ -\code{node_infection_length()} measures the average length of time that nodes +\code{node_recovery()} measures the average length of time that nodes that become infected remain infected in a compartmental model with recovery. Infections that are not concluded by the end of the study period are calculated as infinite. From b0d86e3326a253435cbf7de573f42dc2f615a27e Mon Sep 17 00:00:00 2001 From: James Hollway Date: Wed, 19 Nov 2025 10:23:56 +0100 Subject: [PATCH 23/25] #minor bump --- DESCRIPTION | 4 ++-- NEWS.md | 26 ++++++++++++++++++++++---- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 61ef452d..6e414dd7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: manynet Title: Many Ways to Make, Modify, Mark, and Measure Myriad Networks -Version: 1.6.8 -Date: 2025-11-18 +Version: 1.7.0 +Date: 2025-11-19 Description: Many tools for making, modifying, marking, measuring, and motifs and memberships of many different types of networks. All functions operate with matrices, edge lists, and 'igraph', 'network', and 'tidygraph' objects, diff --git a/NEWS.md b/NEWS.md index 82b0d1a1..bc839c98 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,20 +1,38 @@ -# manynet 1.6.8 +# manynet 1.7.0 ## Making - Improved vector printing to be more succinct and suggest `print_all()` +- Improved printing methods for changing mnet objects + +## Modifying + +- Added `delete_changes()` for deleting all changes from a changing network +- Improved `to_waves()` to work with networks that are changing, longitudinal, or both +- Fixed `as_diffusion()` to not trim off final wave in the report +- Fixed `as_diffusion()` to return correct E and R compartments for waning diffusion models + +## Marking + +- Improved `is_longitudinal()` to only check for tie waves and not nodal changes ## Measuring -- Improved `net_diversity()` +- Improved `net_diversity()` and `node_diversity()` - No longer offers a cluster option, which can be obtained using `over_membership()` - Added more methods for calculating diversity, including Teachman's, coefficient of variation, and the Gini coefficient -- Improved `net_diversity()` and `node_diversity()` to use and declare methods appropriate for the vector of attributes -- Updated documentation for richness, diversity, and homophily measures + - Improved `net_diversity()` and `node_diversity()` to use and declare methods appropriate for the vector of attributes - Added `net_homophily()` and `node_homophily()` for measuring homophily according to different methods including - Krackhardt's EI index of heterophily as well as its inverse as a measure of homophily - Yule's Q as a further measure of homophily - Geary's C as a measure of homophily for numeric attributes +- Updated documentation for richness, diversity, and homophily measures +- Fixed `node_is_latent()`, `node_is_infected()`, `node_is_recoverd()` to work with changing networks +- Improved `node_is_exposed()` to work with changing networks and now accepts a time argument + +## Tutorials + +- Fixed description of `to_directed()` and `to_redirected()` in the data tutorial # manynet 1.6.7 From 5cf653586d120fe4a7327cd6a20ef025b4fe4ee5 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Wed, 19 Nov 2025 10:27:26 +0100 Subject: [PATCH 24/25] Refactored geary --- R/measure_heterogeneity.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/measure_heterogeneity.R b/R/measure_heterogeneity.R index 7dce2973..7c57659c 100644 --- a/R/measure_heterogeneity.R +++ b/R/measure_heterogeneity.R @@ -306,7 +306,7 @@ net_homophily <- function(.data, attribute, snet_info("Using {.val geary}'s C instead.") method <- "geary" } - if(!is.numeric(attribute) && method %in% c("geary")){ + if(!is.numeric(attribute) && method == "geary"){ snet_info("{.val {method}} index is not appropriate for categorical attributes.") snet_info("Using {.val ie} index instead.") method <- "ie" @@ -387,7 +387,7 @@ node_homophily <- function(.data, attribute, snet_info("Using {.val geary}'s C instead.") method <- "geary" } - if(!is.numeric(attribute) && method %in% c("geary")){ + if(!is.numeric(attribute) && method == "geary"){ snet_info("{.val {method}} index is not appropriate for categorical attributes.") snet_info("Using {.val ie} index instead.") method <- "ie" From 44508be44fbe89dfca3ee03d60cfd9601546f24f Mon Sep 17 00:00:00 2001 From: James Hollway Date: Wed, 19 Nov 2025 11:09:13 +0100 Subject: [PATCH 25/25] Update cran comments --- cran-comments.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cran-comments.md b/cran-comments.md index 7c5065eb..7166b318 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -7,4 +7,6 @@ ## R CMD check results -0 errors | 0 warnings | 0 notes \ No newline at end of file +0 errors | 0 warnings | 0 notes + +- Required for versions of autograph and migraph that fix errors \ No newline at end of file