diff --git a/DESCRIPTION b/DESCRIPTION index 5c76d875..ad08686a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: netdiffuseR Title: Analysis of Diffusion and Contagion Processes on Networks -Version: 1.25.0 +Version: 1.26.0 Authors@R: c( person("George", "Vega Yon", email="g.vegayon@gmail.com", role=c("aut", "cre"), comment=c(ORCID = "0000-0002-3171-0844", what="Rewrite functions with Rcpp, plus new features") @@ -57,11 +57,13 @@ URL: https://github.com/USCCANA/netdiffuseR, https://USCCANA.github.io/netdiffuseR/ BugReports: https://github.com/USCCANA/netdiffuseR/issues Classification/MSC: 90C35, 90B18, 91D30 -Collate: +Collate: 'RcppExports.R' 'imports.r' 'graph_data.r' 'adjmat.r' + 'adoption_mechanisms.R' + 'disadoption_mechanisms.R' 'bass.r' 'bootnet.r' 'citer_environment.R' @@ -70,8 +72,10 @@ Collate: 'degree_adoption_diagnostic.R' 'diffnet-c.R' 'diffnet-class.r' + 'diffnet-epi.R' 'diffnet-indexing.r' 'diffnet-methods.r' + 'epi_metrics.R' 'egonets.R' 'formula.r' 'igraph.r' @@ -89,8 +93,11 @@ Collate: 'rdiffnet.r' 'read_write_foreign.r' 'select_egoalter.R' + 'source_attribution.R' 'spatial.R' 'stats.R' + 'status_accessors.R' 'struct_equiv.R' 'struct_test.R' 'survey_to_diffnet.R' + 'transmission.R' diff --git a/NAMESPACE b/NAMESPACE index 4a6bdac3..f058ec29 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -40,6 +40,8 @@ S3method(dimnames,diffnet) S3method(fitbass,default) S3method(fitbass,diffnet) S3method(ftable,diffnet_adopters) +S3method(generation_time,default) +S3method(generation_time,diffnet_epi) S3method(hist,diffnet_bootnet) S3method(hist,diffnet_struct_test) S3method(image,diffnet_diffmap) @@ -51,6 +53,8 @@ S3method(is_undirected,default) S3method(is_undirected,diffnet) S3method(is_valued,default) S3method(is_valued,diffnet) +S3method(peak_prevalence,diffnet) +S3method(peak_time,diffnet) S3method(plot,diffnet) S3method(plot,diffnet_adopters) S3method(plot,diffnet_bass) @@ -59,6 +63,7 @@ S3method(plot,diffnet_degSeq) S3method(plot,diffnet_diffmap) S3method(plot,diffnet_hr) S3method(plot,diffnet_mentor) +S3method(plot,netdiffuseR_repr) S3method(plot_diffnet,default) S3method(plot_diffnet,diffnet) S3method(plot_diffnet2,default) @@ -70,14 +75,29 @@ S3method(print,degree_adoption_diagnostic) S3method(print,diffnet) S3method(print,diffnet_bootnet) S3method(print,diffnet_diffmap) +S3method(print,diffnet_epi) S3method(print,diffnet_se) S3method(print,diffnet_struct_test) +S3method(print,netdiffuseR_generation_time) +S3method(print,netdiffuseR_repr) +S3method(print,netdiffuseR_sar) +S3method(print,netdiffuseR_survival) S3method(recode,data.frame) S3method(recode,matrix) +S3method(repr_number,default) +S3method(repr_number,diffnet_epi) +S3method(secondary_attack_rate,default) +S3method(secondary_attack_rate,diffnet_epi) S3method(str,diffnet) S3method(summary,diffnet) S3method(summary,diffnet_adoptChanges) +S3method(summary,diffnet_epi) +S3method(survival_curve,diffnet) S3method(t,diffnet) +S3method(toa,diffnet) +S3method(toa_all,diffnet) +S3method(tod,diffnet) +S3method(tod_all,diffnet) S3method(transformGraphBy,dgCMatrix) S3method(transformGraphBy,diffnet) export("%*%") @@ -85,12 +105,17 @@ export("diffnet.attrs<-") export("diffnet.toa<-") export(adjmat_to_edgelist) export(adopt_changes) +export(adoptmech_logit) +export(adoptmech_probit) +export(adoptmech_threshold) export(approx_geodesic) export(approx_geodist) export(as.dgCMatrix) export(as_dgCMatrix) export(as_diffnet) +export(as_diffnet_epi) export(as_spmat) +export(as_transmission_tree) export(bass_F) export(bass_dF) export(bass_f) @@ -114,6 +139,10 @@ export(diffnet_to_network) export(diffnet_to_networkDynamic) export(diffreg) export(diffusionMap) +export(disadoptmech_bithreshold) +export(disadoptmech_logit) +export(disadoptmech_probit) +export(disadoptmech_random) export(drawColorKey) export(drop_isolated) export(edgelist_to_adjmat) @@ -123,12 +152,14 @@ export(ego_variance) export(egonet_attrs) export(exposure) export(fitbass) +export(generation_time) export(graph_power) export(grid_distribution) export(hazard_rate) export(igraph_to_diffnet) export(igraph_vertex_rescale) export(infection) +export(is.diffnet_epi) export(is_multiple) export(is_self) export(is_undirected) @@ -150,6 +181,8 @@ export(nnodes) export(nodes) export(nslices) export(nvertices) +export(peak_prevalence) +export(peak_time) export(permute_graph) export(plot_adopters) export(plot_diffnet) @@ -165,6 +198,7 @@ export(read_pajek) export(read_ucinet) export(read_ucinet_head) export(recode) +export(repr_number) export(resample_graph) export(rescale_vertex_igraph) export(rewire_graph) @@ -175,17 +209,28 @@ export(rgraph_er) export(rgraph_ws) export(ring_lattice) export(round_to_seq) +export(secondary_attack_rate) export(select_egoalter) +export(source_attribution_earliest) +export(source_attribution_uniform) +export(source_attribution_weighted) export(split_behaviors) export(struct_equiv) export(struct_test) export(struct_test_asymp) export(survey_to_diffnet) +export(survival_curve) export(susceptibility) export(threshold) +export(toa) +export(toa_all) export(toa_diff) export(toa_mat) +export(tod) +export(tod_all) export(transformGraphBy) +export(transmission_tree) +export(transmission_tree_from_events) export(vertex_covariate_compare) export(vertex_covariate_dist) export(vertex_mahalanobis_dist) diff --git a/NEWS.md b/NEWS.md index 05b028b5..f416c1fc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,6 +11,9 @@ * New dataset `epigames` and `epigamesDiffNet`: a simulated epidemic game network with 594 nodes and 15 time periods from the WKU Epi Games study. +* `exposure()` and `rdiffnet()` now support `mode = "stochastic"`, allowing + for probabilistic interpretation of edge weights in exposure calculations. + ## Internal changes * Fixed CRAN example error in `round_to_seq()`: `plot(w, x)` replaced with @@ -19,7 +22,6 @@ * Removed `configure` framework. R already provides paths and configuration for OpenMP. - # Changes in netdiffuseR version 1.24.0 (2025-12-09) * New function `degree_adoption_diagnostic()` analyzes the correlation between network diff --git a/R/adjmat.r b/R/adjmat.r index ca5bc83c..bf9eadb9 100644 --- a/R/adjmat.r +++ b/R/adjmat.r @@ -537,6 +537,109 @@ toa_mat.default <- function(per, t0, t1) { ) } +# Status-array helpers. +# +# A -status- array carries the multi-cycle state of a diffnet: 1 wherever +# node i is adopted at time t for behaviour q, 0 otherwise. It need not be +# monotone (adoption + recovery cycles, behavioural relapse, etc.). +# +# Single-behaviour layout: an n x T integer matrix. +# Multi-behaviour layout: a length-Q list of n x T integer matrices. +# +# These helpers mirror -toa_mat- (which builds the absorbing case from -toa-) +# but consume -status- directly. + +# Internal: build {adopt, cumadopt} matrices from a single-behaviour status. +# - cumadopt is exactly status (alias; no copy thanks to R's COW). +# - adopt[i, t] = 1 iff status[i, t] == 1 AND (t == 1 OR status[i, t-1] == 0). +# That is, every "fresh" entry into the adopted state. +status_mat_single <- function(status, t0, t1, labels = NULL) { + storage.mode(status) <- "integer" + n <- nrow(status) + T <- ncol(status) + + cumadopt <- status + adopt <- matrix(0L, n, T) + if (T >= 1L) adopt[, 1L] <- cumadopt[, 1L] + if (T >= 2L) { + prev <- cumadopt[, 1:(T - 1L), drop = FALSE] + curr <- cumadopt[, 2:T, drop = FALSE] + fresh <- (curr == 1L) & (prev == 0L) + sub <- adopt[, 2:T, drop = FALSE] + sub[fresh] <- 1L + adopt[, 2:T] <- sub + } + + rn <- if (length(labels)) labels else seq_len(n) + dimnames(adopt) <- list(rn, t0:t1) + dimnames(cumadopt) <- list(rn, t0:t1) + list(adopt = adopt, cumadopt = cumadopt) +} + +# Public-facing dispatcher: returns the same shape as -toa_mat-. +status_mat <- function(status, t0, t1, labels = NULL) { + if (is.list(status)) { + lapply(status, status_mat_single, t0 = t0, t1 = t1, labels = labels) + } else { + status_mat_single(status, t0 = t0, t1 = t1, labels = labels) + } +} + +# Derive -toa- from a status array: first time of 1 per node (per behaviour). +# Returns an integer vector for single-behaviour, an n x Q matrix otherwise. +# Names / rownames are intentionally NOT preserved — -new_diffnet- assigns +# them from -meta$ids- downstream so the diffnet's labelling stays canonical. +toa_from_status <- function(status, t0 = 1L) { + if (is.list(status)) { + Q <- length(status) + n <- nrow(status[[1L]]) + out <- matrix(NA_integer_, n, Q) + for (q in seq_len(Q)) out[, q] <- toa_from_status(status[[q]], t0 = t0) + return(out) + } + + n <- nrow(status) + vapply(seq_len(n), function(i) { + idx <- which(status[i, ] == 1L) + if (length(idx)) as.integer(idx[1L] + t0 - 1L) else NA_integer_ + }, integer(1L)) +} + +# Validation for -status- supplied to -new_diffnet-. +validate_status <- function(status, n, num_of_behaviors) { + if (num_of_behaviors == 1L) { + if (!is.matrix(status)) + stop("-status- for a single-behaviour diffnet must be an n x T matrix.") + if (nrow(status) != n) + stop("-status- has ", nrow(status), " rows but the graph has ", n, + " nodes.") + if (any(!status %in% c(0L, 1L, NA_integer_, 0, 1))) + stop("-status- entries must be 0 or 1.") + } else { + if (!is.list(status)) + stop("-status- for a multi-behaviour diffnet must be a length-Q list ", + "of n x T matrices.") + if (length(status) != num_of_behaviors) + stop("-status- has length ", length(status), + " but -toa- carries ", num_of_behaviors, " behaviours.") + for (q in seq_along(status)) { + if (!is.matrix(status[[q]])) + stop("-status[[", q, "]]- must be an n x T matrix.") + if (nrow(status[[q]]) != n) + stop("-status[[", q, "]]- has ", nrow(status[[q]]), + " rows but the graph has ", n, " nodes.") + if (any(!status[[q]] %in% c(0L, 1L, NA_integer_, 0, 1))) + stop("-status[[", q, "]]- entries must be 0 or 1.") + } + # All behaviours must share a common T. + Ts <- vapply(status, ncol, integer(1L)) + if (length(unique(Ts)) > 1L) + stop("All entries of -status- must share the same number of columns ", + "(time periods).") + } + invisible(TRUE) +} + # @rdname toa_mat # @export toa_mat.numeric <- function(times, labels=NULL, diff --git a/R/adoption_mechanisms.R b/R/adoption_mechanisms.R new file mode 100644 index 00000000..0758a129 --- /dev/null +++ b/R/adoption_mechanisms.R @@ -0,0 +1,80 @@ +#' Adoption mechanisms for \code{rdiffnet} +#' +#' A family of pluggable kernels that decide which nodes adopt at each +#' simulation step. Pass any of these as the \code{adoption_mechanism} +#' argument of \code{\link{rdiffnet}}, or write your own function that +#' follows the same contract. +#' +#' @param expo Numeric vector of length \eqn{n}. Per-node exposure at +#' the current time step (\code{expo[, , q]} for behaviour \eqn{q}). +#' @param thresholds Numeric vector of length \eqn{n}. Per-node adoption +#' threshold (\code{thr[, q]}). Used by the deterministic kernel; +#' passed but ignored by the stochastic kernels so user-defined +#' mechanisms can choose whether to use it. +#' @param not_adopted Logical vector of length \eqn{n}. \code{TRUE} for +#' nodes that have not yet adopted behaviour \eqn{q} +#' (\code{is.na(toa[, q])}). +#' @param time Integer scalar. Current simulation time step. +#' @param pars Named list of mechanism-specific parameters. Each +#' kernel documents which fields it expects. +#' +#' @return Integer vector of node indices that adopt at this step. +#' +#' @details +#' The contract is intentionally minimal so that any user can write a +#' mechanism without reading the package internals: receive the current +#' state, decide who adopts, return the indices. The three kernels +#' below cover the common cases. +#' +#' \describe{ +#' \item{\code{adoptmech_threshold}}{Tom Valente's deterministic +#' threshold rule. Adopt iff \code{expo[i] >= thresholds[i]}. +#' Ignores \code{pars}.} +#' \item{\code{adoptmech_logit}}{Bernoulli rule with logit link. +#' Adopt with probability \code{plogis(beta0 + beta_expo * expo[i])}. +#' Requires \code{pars$beta0} and \code{pars$beta_expo}.} +#' \item{\code{adoptmech_probit}}{Bernoulli rule with probit link. +#' Adopt with probability \code{pnorm(beta0 + beta_expo * expo[i])}. +#' Requires \code{pars$beta0} and \code{pars$beta_expo}.} +#' } +#' +#' @examples +#' set.seed(2026) +#' +#' # Default deterministic threshold +#' dn <- rdiffnet(n = 30, t = 6, seed.graph = "small-world", +#' seed.p.adopt = 0.1, stop.no.diff = FALSE) +#' +#' # Stochastic logit mechanism +#' dn <- rdiffnet(n = 30, t = 6, seed.graph = "small-world", +#' seed.p.adopt = 0.1, stop.no.diff = FALSE, +#' adoption_mechanism = adoptmech_logit, +#' adoption_pars = list(beta0 = -2, beta_expo = 5)) +#' +#' @author Aníbal Olivera M. +#' @name adoption_mechanisms +NULL + +#' @rdname adoption_mechanisms +#' @export +adoptmech_threshold <- function(expo, thresholds, not_adopted, time, pars) { + which((expo >= thresholds) & not_adopted) +} + +#' @rdname adoption_mechanisms +#' @export +adoptmech_logit <- function(expo, thresholds, not_adopted, time, pars) { + if (is.null(pars$beta0) || is.null(pars$beta_expo)) + stop("-adoptmech_logit- requires -adoption_pars- with both -beta0- and -beta_expo-.") + p <- stats::plogis(pars$beta0 + pars$beta_expo * expo) + which((stats::runif(length(p)) < p) & not_adopted) +} + +#' @rdname adoption_mechanisms +#' @export +adoptmech_probit <- function(expo, thresholds, not_adopted, time, pars) { + if (is.null(pars$beta0) || is.null(pars$beta_expo)) + stop("-adoptmech_probit- requires -adoption_pars- with both -beta0- and -beta_expo-.") + p <- stats::pnorm(pars$beta0 + pars$beta_expo * expo) + which((stats::runif(length(p)) < p) & not_adopted) +} diff --git a/R/data.r b/R/data.r index 4ed3b835..9fe59d19 100644 --- a/R/data.r +++ b/R/data.r @@ -980,6 +980,21 @@ NULL # "epigames" #' A directed dynamic graph with 594 vertices and 15 time periods. The attributes #' in the graph are described in \code{\link{epigames}}. #' +#' By default, this \code{diffnet} object is **non-cumulative** (each slice represents +#' ephemeral daily contacts) and **valued** (edge weights represent contact duration in seconds). +#' +#' To reconstruct the classic cumulative/binarized network, you can run: +#' +#' \preformatted{ +#' epigames_cumul <- epigamesDiffNet +#' +#' # 1. Accumulate the history across time periods +#' epigames_cumul$graph <- Reduce("+", epigames_cumul$graph, accumulate = TRUE) +#' +#' # 2. Apply a logical cut-off to binarize the network +#' epigames_cumul$graph <- lapply(epigames_cumul$graph, function(m) { m@x[] <- 1; m }) +#' } +#' #' Non-adopters have \code{toa = NA}. #' #' @format A \code{\link{diffnet}} class object. diff --git a/R/degree_adoption_diagnostic.R b/R/degree_adoption_diagnostic.R index 1fa9adae..7d1600d3 100644 --- a/R/degree_adoption_diagnostic.R +++ b/R/degree_adoption_diagnostic.R @@ -78,12 +78,14 @@ #' #' # Different degree aggregation strategies #' result_first <- degree_adoption_diagnostic(kfamilyDiffNet, degree_strategy = "first") -#' result_last <- degree_adoption_diagnostic(kfamilyDiffNet, degree_strategy = "last") +#' result_last <- degree_adoption_diagnostic(kfamilyDiffNet, degree_strategy = "last") #' #' # Multi-diffusion (toy) ---------------------------------------------------- #' set.seed(999) -#' n <- 40; t <- 5; q <- 2 -#' garr <- rgraph_ws(n, t, p=.3) +#' n <- 40 +#' t <- 5 +#' q <- 2 +#' garr <- rgraph_ws(n, t, p = .3) #' diffnet_multi <- rdiffnet(seed.graph = garr, t = t, seed.p.adopt = rep(list(0.1), q)) #' #' # pooled (one combined analysis) @@ -96,20 +98,19 @@ #' @family statistics #' @export degree_adoption_diagnostic <- function( - graph, - degree_strategy = c("mean", "first", "last"), - bootstrap = TRUE, - R = 1000, - conf.level = 0.95, - toa = NULL, - t0 = NULL, t1 = NULL, - name = NULL, - behavior = NULL, - combine = c("none", "pooled", "average", "earliest"), - min_adopters = 3, - valued = getOption("diffnet.valued", FALSE), - ... -) { + graph, + degree_strategy = c("mean", "first", "last"), + bootstrap = TRUE, + R = 1000, + conf.level = 0.95, + toa = NULL, + t0 = NULL, t1 = NULL, + name = NULL, + behavior = NULL, + combine = c("none", "pooled", "average", "earliest"), + min_adopters = 3, + valued = getOption("diffnet.valued", FALSE), + ...) { # Check that bootstrap is a logical scalar if (!is.logical(bootstrap) || length(bootstrap) != 1 || is.na(bootstrap)) { stop("'bootstrap' must be a logical scalar") @@ -154,8 +155,10 @@ degree_adoption_diagnostic <- function( } behavior_indices <- match(behavior, colnames(toa)) if (any(is.na(behavior_indices))) { - stop("Some behavior names not found in colnames(toa): ", - paste(behavior[is.na(behavior_indices)], collapse = ", ")) + stop( + "Some behavior names not found in colnames(toa): ", + paste(behavior[is.na(behavior_indices)], collapse = ", ") + ) } } else if (is.numeric(behavior)) { behavior_indices <- behavior @@ -186,8 +189,10 @@ degree_adoption_diagnostic <- function( combined_data <- prepare_combined_data(degrees, toa, combine, min_adopters, Q) if (nrow(combined_data) < min_adopters) { - stop("Insufficient adopters for correlation analysis. (n=", nrow(combined_data), - ", minimum = ", min_adopters, ").") + stop( + "Insufficient adopters for correlation analysis. (n=", nrow(combined_data), + ", minimum = ", min_adopters, ")." + ) } # Compute correlations @@ -200,8 +205,8 @@ degree_adoption_diagnostic <- function( NULL } - # Determine if undirected (graph is always a diffnet here) - undirected <- isTRUE(is_undirected(graph)) + # Determine if undirected by checking matrices + undirected <- check_undirected_graph(graph) # Return results structure(list( @@ -232,8 +237,12 @@ process_graph_input <- function(graph, toa, t0, t1, name, ...) { # If graph is a list, ensure all elements are dgCMatrix if (is.list(graph)) { graph <- lapply(graph, function(g) { - if (inherits(g, "dgCMatrix")) return(g) - if (is.matrix(g)) return(as(Matrix::Matrix(g, sparse = TRUE), "dgCMatrix")) + if (inherits(g, "dgCMatrix")) { + return(g) + } + if (is.matrix(g)) { + return(as(Matrix::Matrix(g, sparse = TRUE), "dgCMatrix")) + } stop("All elements of the graph list must be matrices or dgCMatrix.") }) } @@ -271,31 +280,16 @@ compute_degree_measures <- function(graph, degree_strategy, valued) { indegree <- rowMeans(dgr(graph, cmode = "indegree", valued = valued), na.rm = TRUE) outdegree <- rowMeans(dgr(graph, cmode = "outdegree", valued = valued), na.rm = TRUE) } else { - deg_matrix <- dgr(graph, valued = valued) - if (length(dim(deg_matrix)) == 3) { - # Dynamic case - if (degree_strategy == "first") { - indegree <- deg_matrix[, 1, "indegree"] - outdegree <- deg_matrix[, 1, "outdegree"] - } else if (degree_strategy == "last") { - last_time <- dim(deg_matrix)[2] - indegree <- deg_matrix[, last_time, "indegree"] - outdegree <- deg_matrix[, last_time, "outdegree"] - } - } else if (length(dim(deg_matrix)) == 2) { - # Static case: check for column names, else use position - cn <- colnames(deg_matrix) - if (!is.null(cn) && all(c("indegree", "outdegree") %in% cn)) { - indegree <- deg_matrix[, "indegree"] - outdegree <- deg_matrix[, "outdegree"] - } else if (ncol(deg_matrix) >= 2) { - indegree <- deg_matrix[, 1] - outdegree <- deg_matrix[, 2] - } else { - stop("Degree matrix does not have expected columns for static graph.") - } - } else { - stop("Unexpected degree matrix dimensions in compute_degree_measures.") + # Request in-degree and out-degree separately and explicitly + indeg_mat <- dgr(graph, cmode = "indegree", valued = valued) + outdeg_mat <- dgr(graph, cmode = "outdegree", valued = valued) + + if (degree_strategy == "first") { + indegree <- if (is.matrix(indeg_mat)) indeg_mat[, 1] else indeg_mat + outdegree <- if (is.matrix(outdeg_mat)) outdeg_mat[, 1] else outdeg_mat + } else { # last + indegree <- if (is.matrix(indeg_mat)) indeg_mat[, ncol(indeg_mat)] else indeg_mat + outdegree <- if (is.matrix(outdeg_mat)) outdeg_mat[, ncol(outdeg_mat)] else outdeg_mat } } @@ -328,8 +322,8 @@ analyze_multi_behaviors_separately <- function(degrees, toa, min_adopters, boots toa = toa_q[adopters_q] ) - correlations_matrix[1, q] <- cor_safe(data_q$indegree, data_q$toa ) - correlations_matrix[2, q] <- cor_safe(data_q$outdegree, data_q$toa ) + correlations_matrix[1, q] <- cor_safe(data_q$indegree, data_q$toa) + correlations_matrix[2, q] <- cor_safe(data_q$outdegree, data_q$toa) sample_sizes[q] <- nrow(data_q) if (bootstrap) { @@ -341,11 +335,7 @@ analyze_multi_behaviors_separately <- function(degrees, toa, min_adopters, boots } # Determine if undirected - undirected <- if (inherits(graph, "diffnet")) { - is_undirected(graph) - } else { - check_undirected_graph(graph) - } + undirected <- check_undirected_graph(graph) structure(list( correlations = correlations_matrix, @@ -391,7 +381,9 @@ prepare_combined_data <- function(degrees, toa, combine, min_adopters, Q) { } else if (combine == "earliest") { # Earliest TOA across behaviors per actor toa_min <- apply(toa, 1, function(row) { - if (all(is.na(row))) return(NA_real_) + if (all(is.na(row))) { + return(NA_real_) + } min(row, na.rm = TRUE) }) toa_min[is.infinite(toa_min)] <- NA @@ -414,12 +406,12 @@ compute_correlations <- function(data) { compute_bootstrap_results <- function(combined_data, R, conf.level) { # Compute baseline correlations - base_corr <- compute_correlations(combined_data) - indeg_corr <- base_corr[["indegree_toa"]] + base_corr <- compute_correlations(combined_data) + indeg_corr <- base_corr[["indegree_toa"]] outdeg_corr <- base_corr[["outdegree_toa"]] indeg_boot_list <- NULL - out_boot_list <- NULL + out_boot_list <- NULL # Out-degree if (!is.na(outdeg_corr)) { @@ -430,13 +422,16 @@ compute_bootstrap_results <- function(combined_data, R, conf.level) { } boot_obj_out <- boot::boot(combined_data, statistic = safe_bootstrap_out, R = R) bias_out <- mean(boot_obj_out$t, na.rm = TRUE) - outdeg_corr - se_out <- stats::sd(boot_obj_out$t, na.rm = TRUE) - - ci_out <- tryCatch({ - bci <- boot::boot.ci(boot_obj_out, conf = conf.level, type = "perc") - # Percentile CI vector (low, high) - if (!is.null(bci$percent)) bci$percent[4:5] else NULL - }, error = function(e) NULL) + se_out <- stats::sd(boot_obj_out$t, na.rm = TRUE) + + ci_out <- tryCatch( + { + bci <- boot::boot.ci(boot_obj_out, conf = conf.level, type = "perc") + # Percentile CI vector (low, high) + if (!is.null(bci$percent)) bci$percent[4:5] else NULL + }, + error = function(e) NULL + ) out_boot_list <- list( correlation = outdeg_corr, @@ -462,12 +457,15 @@ compute_bootstrap_results <- function(combined_data, R, conf.level) { } boot_obj_in <- boot::boot(combined_data, statistic = safe_bootstrap_in, R = R) bias_in <- mean(boot_obj_in$t, na.rm = TRUE) - indeg_corr - se_in <- stats::sd(boot_obj_in$t, na.rm = TRUE) - - ci_in <- tryCatch({ - bci <- boot::boot.ci(boot_obj_in, conf = conf.level, type = "perc") - if (!is.null(bci$percent)) bci$percent[4:5] else NULL - }, error = function(e) NULL) + se_in <- stats::sd(boot_obj_in$t, na.rm = TRUE) + + ci_in <- tryCatch( + { + bci <- boot::boot.ci(boot_obj_in, conf = conf.level, type = "perc") + if (!is.null(bci$percent)) bci$percent[4:5] else NULL + }, + error = function(e) NULL + ) indeg_boot_list <- list( correlation = indeg_corr, @@ -504,11 +502,15 @@ create_empty_result <- function(degree_strategy, original_call, combine, sample_ } check_undirected_graph <- function(graph) { + # If the input is a diffnet, we extract its raw list of matrices + if (inherits(graph, "diffnet")) { + graph <- graph$graph + } if (is.list(graph)) { return(all(sapply(graph, function(g) isSymmetric(as.matrix(g))))) } if (is.array(graph) && length(dim(graph)) == 3) { - return(all(sapply(seq_len(dim(graph)[3]), function(t) isSymmetric(as.matrix(graph[,,t]))))) + return(all(sapply(seq_len(dim(graph)[3]), function(t) isSymmetric(as.matrix(graph[, , t]))))) } if (is.matrix(graph)) { return(isSymmetric(as.matrix(graph))) @@ -568,7 +570,7 @@ print_single_behavior_results <- function(x, undirected) { # Print correlations cat("Correlations:\n") if (undirected) { - deg_r <- indeg_r # For undirected graphs, in-degree = out-degree = degree + deg_r <- indeg_r # For undirected graphs, in-degree = out-degree = degree cat(sprintf(" Degree - Time of Adoption: %.3f\n", deg_r)) } else { cat(sprintf(" In-degree - Time of Adoption: %.3f\n", indeg_r)) @@ -582,16 +584,24 @@ print_single_behavior_results <- function(x, undirected) { bootstrap_data <- x$bootstrap deg_ci <- if (undirected && !is.null(bootstrap_data$indegree$conf_int)) { bootstrap_data$indegree$conf_int - } else NULL + } else { + NULL + } indeg_ci <- if (!is.null(bootstrap_data$indegree$conf_int)) { bootstrap_data$indegree$conf_int - } else NULL + } else { + NULL + } outdeg_ci <- if (!is.null(bootstrap_data$outdegree$conf_int)) { bootstrap_data$outdegree$conf_int - } else NULL + } else { + NULL + } lvl <- if (!is.null(bootstrap_data$indegree$conf_level)) { bootstrap_data$indegree$conf_level * 100 - } else NA_real_ + } else { + NA_real_ + } if (undirected) { explain_degree_correlation("Degree", deg_r, deg_ci, lvl_arg = lvl) @@ -648,16 +658,24 @@ print_multi_behavior_results <- function(x, undirected) { bootstrap_data <- if (!is.null(x$bootstrap)) x$bootstrap[[j]] else NULL deg_ci <- if (undirected && !is.null(bootstrap_data) && !is.null(bootstrap_data$indegree$conf_int)) { bootstrap_data$indegree$conf_int - } else NULL + } else { + NULL + } indeg_ci <- if (!is.null(bootstrap_data) && !is.null(bootstrap_data$indegree$conf_int)) { bootstrap_data$indegree$conf_int - } else NULL + } else { + NULL + } outdeg_ci <- if (!is.null(bootstrap_data) && !is.null(bootstrap_data$outdegree$conf_int)) { bootstrap_data$outdegree$conf_int - } else NULL + } else { + NULL + } lvl <- if (!is.null(bootstrap_data) && !is.null(bootstrap_data$indegree$conf_level)) { bootstrap_data$indegree$conf_level * 100 - } else NA_real_ + } else { + NA_real_ + } cat(sprintf(" [%s]\n", bname)) if (undirected) { @@ -696,14 +714,20 @@ explain_degree_correlation <- function(label, r, ci, lvl_arg = NA_real_, thr = 0 format_interpretation_no_ci <- function(label, r, abs_big, degree_term, thr) { if (!abs_big) { - cat(sprintf(" %s: Weak relationship between %s and adoption timing:\n |r| \u2264 %.1f; no CI.\n", - label, degree_term, thr)) - } else if (r > 0) { - cat(sprintf(" %s: Central actors (high %s) tended to adopt early (supporters):\n |r| > %.1f; no CI.\n", - label, degree_term, thr)) + cat(sprintf( + " %s: Weak relationship between %s and adoption timing:\n |r| \u2264 %.1f; no CI.\n", + label, degree_term, thr + )) + } else if (r < 0) { + cat(sprintf( + " %s: Central actors (high %s) tended to adopt early (supporters):\n |r| > %.1f; no CI.\n", + label, degree_term, thr + )) } else { - cat(sprintf(" %s: Central actors (high %s) tended to adopt late (opposers):\n |r| > %.1f; no CI.\n", - label, degree_term, thr)) + cat(sprintf( + " %s: Central actors (high %s) tended to adopt late (opposers):\n |r| > %.1f; no CI.\n", + label, degree_term, thr + )) } } @@ -711,34 +735,51 @@ format_interpretation_with_ci <- function(label, r, ci, abs_big, degree_term, th lvl_local <- if (!is.na(lvl_arg)) lvl_arg else 95 ci_includes_zero <- (length(ci) >= 2) && is.finite(ci[1]) && is.finite(ci[2]) && (ci[1] <= 0 && ci[2] >= 0) + ci_low <- if (length(ci) >= 1) ci[1] else NA_real_ + ci_high <- if (length(ci) >= 2) ci[2] else NA_real_ + if (!abs_big) { - cat(sprintf(" %s: Weak relationship between %s and adoption timing; %s statistically supported:\n |r| \u2264 %.1f; CI (%.1f%%) %s 0.\n", - label, degree_term, - if (ci_includes_zero) "NOT" else "", - thr, lvl_local, - if (ci_includes_zero) "includes" else "excludes")) - } else if (r > 0) { - cat(sprintf(" %s: Central actors (high %s) tended to adopt early (supporters); %s statistically supported:\n |r| > %.1f; CI (%.1f%%) %s 0.\n", - label, degree_term, - if (ci_includes_zero) "NOT" else "", - thr, lvl_local, - if (ci_includes_zero) "includes" else "excludes")) + cat(sprintf( + " %s: Weak relationship between %s and adoption timing; %s statistically supported:\n |r| \u2264 %.1f; CI (%.1f%%) = [%.3f, %.3f]\n", + label, degree_term, + if (ci_includes_zero) "NOT" else "", + thr, lvl_local, + ci_low, ci_high + )) + } else if (r < 0) { + cat(sprintf( + " %s: Central actors (high %s) tended to adopt early (supporters); %s statistically supported:\n |r| > %.1f; CI (%.1f%%) = [%.3f, %.3f]\n", + label, degree_term, + if (ci_includes_zero) "NOT" else "", + thr, lvl_local, + ci_low, ci_high + )) } else { - cat(sprintf(" %s: Central actors (high %s) tended to adopt late (opposers); %s statistically supported:\n |r| > %.1f; CI (%.1f%%) %s 0.\n", - label, degree_term, - if (ci_includes_zero) "NOT" else "", - thr, lvl_local, - if (ci_includes_zero) "includes" else "excludes")) + cat(sprintf( + " %s: Central actors (high %s) tended to adopt late (opposers); %s statistically supported:\n |r| > %.1f; CI (%.1f%%) = [%.3f, %.3f]\n", + label, degree_term, + if (ci_includes_zero) "NOT" else "", + thr, lvl_local, + ci_low, ci_high + )) } } # Safe correlation: returns NA (no warnings) if zero-variance or too few pairs cor_safe <- function(x, y) { - x <- as.numeric(x); y <- as.numeric(y) + x <- as.numeric(x) + y <- as.numeric(y) ok <- is.finite(x) & is.finite(y) - if (!any(ok)) return(NA_real_) - x <- x[ok]; y <- y[ok] - if (length(x) < 2L) return(NA_real_) - if (sd(x) == 0 || sd(y) == 0) return(NA_real_) + if (!any(ok)) { + return(NA_real_) + } + x <- x[ok] + y <- y[ok] + if (length(x) < 2L) { + return(NA_real_) + } + if (sd(x) == 0 || sd(y) == 0) { + return(NA_real_) + } stats::cor(x, y) } diff --git a/R/diffnet-class.r b/R/diffnet-class.r index d92ec8da..660762c8 100644 --- a/R/diffnet-class.r +++ b/R/diffnet-class.r @@ -356,6 +356,25 @@ check_as_diffnet_attrs <- function( #' @param as.df Logical scalar. When TRUE returns a data.frame. #' @param name Character scalar. Name of the diffusion network (descriptive). #' @param behavior Character vector. Name of the behavior(s) been analyzed (innovation). +#' @param status Optional state representation. Single-behavior: an +#' \eqn{n \times T} integer matrix with \code{1} on the cells where node +#' \eqn{i} is adopted at time \eqn{t} and \code{0} otherwise (need not be +#' monotone — multi-cycle adoption / disadoption is supported). Multi-behavior: +#' a length-\eqn{Q} list of such matrices. When \code{status} is supplied, +#' it becomes the canonical state of the diffnet and \code{toa} is derived +#' from it as the first time each node enters the adopted state. Passing +#' both \code{toa} and \code{status} emits a warning and uses \code{status}; +#' the warning reports whether the supplied \code{toa} is consistent with +#' the \code{toa} derived from \code{status}. +#' @param transmission Optional transmission tree (who-infected-whom). Either +#' a \code{data.frame} with the columns documented in +#' \code{\link{as_transmission_tree}}, or a pre-built transmission list with +#' components \code{tree} and \code{pars}. When supplied, the returned +#' object is promoted to the \code{\link{diffnet_epi}} subclass. \code{NULL} +#' (default) returns a plain \code{diffnet}. +#' @param transmission_pars Optional named list stored verbatim in +#' \code{x$transmission$pars}. Only consulted when \code{transmission} is a +#' data.frame. #' #' @seealso Default options are listed at \code{\link{netdiffuseR-options}} #' @details @@ -561,9 +580,9 @@ as_diffnet.networkDynamic <- function(graph, toavar, ...) { #' @export #' @rdname diffnet-class new_diffnet <- function( - graph, toa, - t0=min(toa, na.rm = TRUE), - t1=max(toa, na.rm = TRUE), + graph, toa = NULL, + t0 = NULL, + t1 = NULL, vertex.dyn.attrs = NULL, vertex.static.attrs = NULL, id.and.per.vars = NULL, @@ -572,7 +591,10 @@ new_diffnet <- function( self = getOption("diffnet.self"), multiple = getOption("diffnet.multiple"), name = "Diffusion Network", - behavior = NULL + behavior = NULL, + status = NULL, + transmission = NULL, + transmission_pars = list() ) { # Step 0.0: Check if its diffnet! -------------------------------------------- @@ -581,12 +603,37 @@ new_diffnet <- function( return(graph) } - # Step 0.1: Setting num_of_behavior ------------------------------------------ + # Step 0.1: Reconcile -toa- and -status- ------------------------------------ + # -status- is the canonical multi-cycle state; -toa- remains the simple + # absorbing entry point. We do not derive -toa- from -status- yet; that + # happens in Step 1.5 once we know meta$n (so we can validate status + # dimensions first and emit a clear error if they mismatch). + if (is.null(toa) && is.null(status)) + stop("-new_diffnet- requires either -toa- or -status-.") + user_supplied_toa <- toa + + # Step 0.2: Resolve t0 / t1 defaults ---------------------------------------- + if (is.null(t0)) + t0 <- if (!is.null(status)) 1L else min(toa, na.rm = TRUE) + if (is.null(t1)) { + if (!is.null(status)) { + T_ <- if (is.list(status)) ncol(status[[1L]]) else ncol(status) + t1 <- t0 + T_ - 1L + } else { + t1 <- max(toa, na.rm = TRUE) + } + } - if (inherits(toa, "matrix")) + # Step 0.3: Setting num_of_behavior ------------------------------------------ + # If -status- is provided, the number of behaviours comes from it (a list + # carries Q entries; a matrix means Q == 1). Otherwise we read it off -toa-. + if (!is.null(status)) { + num_of_behaviors <- if (is.list(status)) length(status) else 1L + } else if (inherits(toa, "matrix")) { num_of_behaviors <- dim(toa)[2] - else + } else { num_of_behaviors <- 1 + } if (length(behavior) == 0L) behavior <- rep("Unknown", num_of_behaviors) @@ -602,8 +649,31 @@ new_diffnet <- function( if (meta$type=="static") warning("-graph- is static and will be recycled (see ?new_diffnet).") + # Step 1.2: Validate -status- and derive -toa- from it ----------------- + # Once we know meta$n we can validate the status array shape; if validation + # passes we derive -toa- from -status- and (when both were supplied) emit a + # consistency warning. + if (!is.null(status)) { + validate_status(status, n = meta$n, num_of_behaviors = num_of_behaviors) + + derived_toa <- toa_from_status(status) + if (!is.null(user_supplied_toa)) { + consistent <- tryCatch( + identical(as.integer(user_supplied_toa), as.integer(derived_toa)), + error = function(e) FALSE + ) + if (consistent) { + warning("Both -toa- and -status- supplied; using -status-. ") + } else { + warning("Both -toa- and -status- supplied; using -status-. ", + "Note: supplied -toa- does NOT match the -toa- derived ", + "from -status-; the supplied -toa- is being ignored.") + } + } + toa <- derived_toa + } - # Step 1.2: Checking that lengths fit + # Step 1.3: Checking that lengths fit if ((num_of_behaviors == 1L) && (length(toa) != meta$n)) { stop( @@ -623,7 +693,7 @@ new_diffnet <- function( } # Step 2.1: Checking class of TOA and coercing if necessary - if (!inherits(toa, "integer")) { + if (!is.integer(toa)) { warning("Coercing -toa- into integer.") toa[] <- as.integer(toa) @@ -639,8 +709,14 @@ new_diffnet <- function( } } - # Step 3.1: Creating Time of adoption matrix --------------------------------- - mat <- toa_mat(toa, labels = meta$ids, t0=t0, t1=t1) + # Step 3.1: Creating the {adopt, cumadopt} matrices -------------------------- + if (!is.null(status)) { + # Build {adopt, cumadopt} directly from the -status- array. + mat <- status_mat(status, t0 = t0, t1 = t1, labels = meta$ids) + } else { + # Legacy path: build absorbing {adopt, cumadopt} from -toa-. + mat <- toa_mat(toa, labels = meta$ids, t0 = t0, t1 = t1) + } # Step 3.2: Verifying dimensions and fixing meta$pers @@ -770,23 +846,44 @@ new_diffnet <- function( } - return( - structure( - list( - graph = graph, - toa = toa, - adopt = adopt, - cumadopt = cumadopt, - # Attributes - vertex.static.attrs = vertex.static.attrs, - vertex.dyn.attrs = vertex.dyn.attrs, - graph.attrs = graph.attrs, - meta = meta - ), - class="diffnet" - ) + # -$status- is the canonical multi-cycle state; for absorbing histories + # it equals -$cumadopt- bit-for-bit (no copy thanks to R's COW). Internal + # functions across the package keep reading -$cumadopt- unchanged. + # -$transmission- is NOT a base-class slot; it only appears on -diffnet_epi- + # (the subclass created by -as_diffnet_epi()-/-as_transmission_tree()-). + out <- structure( + list( + graph = graph, + toa = toa, + adopt = adopt, + cumadopt = cumadopt, + status = cumadopt, + # Attributes + vertex.static.attrs = vertex.static.attrs, + vertex.dyn.attrs = vertex.dyn.attrs, + graph.attrs = graph.attrs, + meta = meta + ), + class="diffnet" ) + # Optional -transmission-: promote to diffnet_epi in a single call by + # delegating to -as_transmission_tree- (data.frame) or -as_diffnet_epi- + # (pre-built transmission list). + if (!is.null(transmission)) { + if (is.data.frame(transmission)) { + out <- as_transmission_tree(out, transmission, pars = transmission_pars) + } else if (is.list(transmission) && + all(c("tree", "pars") %in% names(transmission))) { + out <- as_diffnet_epi(out, transmission = transmission) + } else { + stop("-transmission- must be NULL, a data.frame, or a list with ", + "-tree- and -pars-. See ?as_transmission_tree and ?as_diffnet_epi.") + } + } + + return(out) + } #' @export @@ -888,6 +985,7 @@ diffnet.toa <- function(graph) { nper <- ncol(mat[[1]]) graph$adopt <- unname(mat$adopt) graph$cumadopt <- unname(mat$cumadopt) + graph$status <- graph$cumadopt # keep canonical alias in sync graph diff --git a/R/diffnet-epi.R b/R/diffnet-epi.R new file mode 100644 index 00000000..77358199 --- /dev/null +++ b/R/diffnet-epi.R @@ -0,0 +1,175 @@ +#' The \code{diffnet_epi} subclass +#' +#' \code{diffnet_epi} is an S3 subclass of \code{\link{diffnet}} carrying the +#' epidemiological extension: a \code{$transmission} slot with a +#' who-infected-whom tree and, eventually, the methods that operate on it +#' (offspring-distribution analysis, secondary attack rate, generation time, +#' survival, reproduction number, transmission-tree visualisation). A +#' \code{diffnet_epi} \emph{is a} \code{diffnet}: every method defined for the +#' base class dispatches transparently on the subclass thanks to S3 +#' inheritance. +#' +#' @param x A \code{diffnet} object. +#' @param transmission Either \code{NULL} (creates an empty epi diffnet), a +#' pre-built transmission list with the components \code{tree} and +#' \code{pars}, or — for the data.frame entry point — pass the data.frame +#' to \code{\link{as_transmission_tree}} instead. See examples. +#' @param pars Optional named list stored verbatim in +#' \code{x$transmission$pars}. Only consulted when \code{transmission} is +#' \code{NULL} or carries no \code{pars} of its own. +#' @param attribution Optional source-attribution rule. When non-\code{NULL}, +#' the tree is reconstructed from \code{x}'s graph slices and \code{toa} +#' via \code{\link{transmission_tree_from_events}} using this rule. Accepts +#' one of \code{"uniform"} / \code{"weighted"} / \code{"earliest"} or a +#' function with the \code{\link{source_attribution}} contract. Mutually +#' exclusive with \code{transmission}. +#' @param seed Optional integer forwarded to +#' \code{\link{transmission_tree_from_events}} so the stochastic +#' attribution rules (\code{"uniform"}, \code{"weighted"}) produce a +#' reproducible tree. Ignored when \code{attribution} is \code{NULL}. +#' @param ... Further arguments. Accepted for compatibility with the +#' \code{\link[base]{print}} generic; currently ignored by +#' \code{print.diffnet_epi}. +#' +#' @return +#' \describe{ +#' \item{\code{as_diffnet_epi(x, ...)}}{A \code{diffnet_epi} object — the +#' input \code{x} with \code{class(x) <- c("diffnet_epi", "diffnet")} and +#' a \code{$transmission} slot.} +#' \item{\code{is.diffnet_epi(x)}}{\code{TRUE} iff \code{x} inherits from +#' \code{diffnet_epi}.} +#' \item{\code{print(x)} for a \code{diffnet_epi}}{Same output as +#' \code{\link{print.diffnet}} plus a final line summarising the +#' transmission tree.} +#' } +#' +#' @details +#' The transmission tree is the canonical input to offspring-distribution +#' analyses (Lloyd-Smith \emph{et al.}, 2005) and likelihood-based estimators +#' of the reproduction number and serial interval (White & Pagano, 2008). +#' Attaching one to a diffnet is what turns it into an epidemiological object +#' in this package's sense — hence the dedicated subclass. +#' +#' Promotion is monotone: once a diffnet has been promoted, it stays a +#' \code{diffnet_epi}. Attaching an empty tree (\code{transmission = NULL}) is +#' allowed and useful for downstream code that wants to build the tree +#' incrementally — e.g. \code{rdiffnet()} with a future \code{source_attribution} +#' callback (M8). +#' +#' @section Class hierarchy: +#' \preformatted{ +#' class(x) -> c("diffnet_epi", "diffnet") +#' } +#' S3 dispatch tries \code{*.diffnet_epi} first, then falls back to +#' \code{*.diffnet}. \code{print.diffnet_epi} chains into +#' \code{print.diffnet} via \code{NextMethod()}. +#' +#' @references +#' Lloyd-Smith, J. O., Schreiber, S. J., Kopp, P. E., & Getz, W. M. (2005). +#' Superspreading and the effect of individual variation on disease emergence. +#' \emph{Nature} 438:355-359. \doi{10.1038/nature04153} +#' +#' White, L. F., & Pagano, M. (2008). A likelihood-based method for real-time +#' estimation of the serial interval and reproductive number of an epidemic. +#' \emph{Statistics in Medicine} 27:2999-3016. \doi{10.1002/sim.3136} +#' +#' @examples +#' set.seed(2026) +#' gr <- lapply(1:5, function(t) rgraph_ba(t = 4L)) +#' dn <- new_diffnet(gr, toa = c(1L, 2L, NA, 3L, 5L), t0 = 1L, t1 = 5L) +#' +#' # Empty promotion (no tree yet) +#' dn_epi <- as_diffnet_epi(dn) +#' is.diffnet_epi(dn_epi) # TRUE +#' inherits(dn_epi, "diffnet") # also TRUE +#' nrow(transmission_tree(dn_epi)) # 0 +#' +#' # Attach a tree (preferred entry point: as_transmission_tree) +#' tree <- data.frame( +#' date = c(1L, 2L), +#' source = c(NA_integer_, 1L), +#' target = c(1L, 2L), +#' source_exposure_date = c(NA_integer_, 1L) +#' ) +#' dn_epi <- as_transmission_tree(dn, tree) +#' is.diffnet_epi(dn_epi) # TRUE (promoted automatically) +#' transmission_tree(dn_epi) # 2 rows +#' +#' # Reconstruct the tree from x's graph + toa via source-attribution +#' # (general primitive — useful when you have observed adoption times +#' # but no transmission log, like contact-tracing or experiment data). +#' dn_epi <- as_diffnet_epi(dn, attribution = "uniform", seed = 2026) +#' transmission_tree(dn_epi) +#' +#' @name diffnet_epi +#' @aliases diffnet_epi +#' @author Aníbal Olivera M. +NULL + +#' @rdname diffnet_epi +#' @export +as_diffnet_epi <- function(x, transmission = NULL, pars = list(), + attribution = NULL, seed = NULL) { + + if (!inherits(x, "diffnet")) + stop("-x- must be a diffnet object.") + + if (!is.null(attribution) && !is.null(transmission)) + stop("Pass either -transmission- (pre-built tree) or -attribution- ", + "(reconstruct tree from x's graph and toa), not both.") + + # M13: reconstruct the tree from x's graph + toa using the chosen + # source-attribution rule. Returns a data.frame in canonical schema + # which we then wrap into the standard transmission = list(tree, pars). + if (!is.null(attribution)) { + tree <- transmission_tree_from_events( + x, attribution = attribution, pars = pars, seed = seed + ) + transmission <- list(tree = tree, pars = pars) + } + + # Monotone promotion: prepend diffnet_epi to the class vector if absent. + if (!inherits(x, "diffnet_epi")) + class(x) <- c("diffnet_epi", class(x)) + + if (is.null(transmission)) { + # Empty epi diffnet (allowed by design). + if (is.null(x$transmission)) + x$transmission <- list(tree = .empty_transmission_tree(), pars = pars) + } else if (is.list(transmission) && + all(c("tree", "pars") %in% names(transmission))) { + # Pre-built transmission list. + x$transmission <- transmission + } else { + stop("-transmission- must be NULL or a list with -tree- and -pars-. ", + "To attach a data.frame tree, use -as_transmission_tree()- instead.") + } + + x +} + +#' @rdname diffnet_epi +#' @export +is.diffnet_epi <- function(x) inherits(x, "diffnet_epi") + +#' @rdname diffnet_epi +#' @export +print.diffnet_epi <- function(x, ...) { + # Delegate the base diffnet body via NextMethod, then append one line. + NextMethod() + + tr <- x$transmission$tree + n_edges <- if (is.null(tr)) 0L else nrow(tr) + if (n_edges > 0L) { + n_seeds <- sum(is.na(tr$source)) + n_virs <- length(unique(tr$virus_id)) + cat(sprintf( + "\n Transmission tree : %d edges, %d seeds, %d virus%s", + n_edges, n_seeds, n_virs, if (n_virs == 1L) "" else "es" + )) + } else { + cat("\n Transmission tree : empty (use -as_transmission_tree()-)") + } + + invisible(x) +} diff --git a/R/diffnet-indexing.r b/R/diffnet-indexing.r index bd8236be..990edd3d 100644 --- a/R/diffnet-indexing.r +++ b/R/diffnet-indexing.r @@ -163,6 +163,7 @@ diffnet.subset.slices <- function(graph, k) { graph$cumadopt[beforeslice,k] <- 1 graph$cumadopt[afterslice,k[nslices]] <- 1 graph$cumadopt <- graph$cumadopt[,k] + graph$status <- graph$cumadopt # keep canonical alias in sync # Changing toa mat (truncating it) graph$toa[beforeslice] <- pers[k][1] @@ -404,6 +405,7 @@ diffnet_check_attr_class <- function(value, meta) { # 2.0: Matrices x$adopt <- x$adopt[i,,drop=FALSE] x$cumadopt <- x$cumadopt[i,,drop=FALSE] + x$status <- x$cumadopt # keep canonical alias in sync x$vertex.static.attrs <- x$vertex.static.attrs[i,,drop=FALSE] x$toa <- x$toa[i] diff --git a/R/disadoption_mechanisms.R b/R/disadoption_mechanisms.R new file mode 100644 index 00000000..f6deef6d --- /dev/null +++ b/R/disadoption_mechanisms.R @@ -0,0 +1,184 @@ +#' Disadoption mechanisms for \code{rdiffnet} +#' +#' A family of factories that build per-step disadoption rules. Each +#' factory takes its parameters and returns a closure that satisfies the +#' \code{disadopt} contract of \code{\link{rdiffnet}}: a function of +#' \code{(expo, cumadopt, time)} that returns a list of length \eqn{Q} +#' (one entry per behaviour) with the integer node indices that +#' disadopt at the current step. +#' +#' Use any of these as the \code{disadopt} argument of \code{rdiffnet}, +#' or write your own factory that returns a function with the same +#' signature. +#' +#' @details +#' The four kernels below cover the common cases: +#' +#' \describe{ +#' \item{\code{disadoptmech_random}}{Each currently-adopted node +#' disadopts independently with probability \code{prob}. Models +#' constant-rate recovery (the SIR \eqn{\gamma}).} +#' \item{\code{disadoptmech_bithreshold}}{Currently-adopted nodes +#' disadopt when their exposure crosses an upper threshold, +#' \code{threshold_dis}. Pair with \code{adoptmech_threshold} to +#' instantiate the bi-threshold model of Alipour \emph{et al.} +#' (2024) — adopt at the lower threshold, disadopt at the upper.} +#' \item{\code{disadoptmech_logit}}{Bernoulli rule with logit link. +#' Disadopt with probability +#' \code{plogis(beta0 + beta_expo * expo)}. To make recovery +#' \emph{less likely} as exposure grows, set \code{beta_expo < 0}.} +#' \item{\code{disadoptmech_probit}}{Bernoulli rule with probit link. +#' Disadopt with probability +#' \code{pnorm(beta0 + beta_expo * expo)}.} +#' } +#' +#' @param prob Numeric scalar in \eqn{[0, 1]}. Per-step disadoption +#' probability for \code{disadoptmech_random}. +#' @param threshold_dis Numeric scalar or vector of length \eqn{n}. +#' Upper-threshold cut-off for \code{disadoptmech_bithreshold}. +#' A scalar is recycled across all nodes. +#' @param beta0 Numeric scalar. Intercept of the logit/probit +#' disadoption probability. +#' @param beta_expo Numeric scalar. Slope on exposure for the +#' logit/probit disadoption probability. +#' +#' @return A function with signature +#' \code{function(expo, cumadopt, time)} suitable as the +#' \code{disadopt} argument of \code{\link{rdiffnet}}. +#' +#' @references +#' Alipour, F., Dokshin, F., Maleki, Z., Song, Y., & Ramazi, P. (2024). +#' Enough but not too many: A bi-threshold model for behavioral +#' diffusion. \emph{PNAS Nexus} 3(10). +#' \doi{10.1093/pnasnexus/pgae428} +#' +#' @examples +#' set.seed(2026) +#' +#' # Constant-rate recovery: each adopter recovers with prob 0.10 / step +#' dn <- rdiffnet(n = 50, t = 12, seed.graph = "small-world", +#' seed.p.adopt = 0.10, stop.no.diff = FALSE, +#' disadopt = disadoptmech_random(prob = 0.10)) +#' +#' # Bi-threshold model (Alipour 2024): adopt when exposure >= 0.30, +#' # disadopt when exposure >= 0.70. +#' dn <- rdiffnet(n = 50, t = 12, seed.graph = "small-world", +#' seed.p.adopt = 0.10, stop.no.diff = FALSE, +#' threshold.dist = 0.30, +#' disadopt = disadoptmech_bithreshold(threshold_dis = 0.70)) +#' +#' # Logit recovery, exposure-dependent +#' dn <- rdiffnet(n = 50, t = 12, seed.graph = "small-world", +#' seed.p.adopt = 0.10, stop.no.diff = FALSE, +#' disadopt = disadoptmech_logit(beta0 = -1, beta_expo = -2)) +#' +#' @author Aníbal Olivera M. +#' @name disadoption_mechanisms +NULL + +#' @rdname disadoption_mechanisms +#' @export +disadoptmech_random <- function(prob) { + if (missing(prob)) + stop("-disadoptmech_random- requires -prob-.") + if (!is.numeric(prob) || length(prob) != 1L || is.na(prob) || + prob < 0 || prob > 1) + stop("-prob- must be a single number in [0, 1].") + force(prob) + + function(expo, cumadopt, time) { + Q <- dim(cumadopt)[3L] + out <- vector("list", Q) + for (q in seq_len(Q)) { + currently <- which(cumadopt[, time, q] == 1L) + out[[q]] <- if (length(currently)) + currently[stats::runif(length(currently)) < prob] + else + integer() + } + out + } +} + +#' @rdname disadoption_mechanisms +#' @export +disadoptmech_bithreshold <- function(threshold_dis) { + if (missing(threshold_dis)) + stop("-disadoptmech_bithreshold- requires -threshold_dis-.") + if (!is.numeric(threshold_dis) || anyNA(threshold_dis)) + stop("-threshold_dis- must be a numeric scalar or vector with no NA.") + force(threshold_dis) + + function(expo, cumadopt, time) { + n <- dim(cumadopt)[1L] + Q <- dim(cumadopt)[3L] + + th <- if (length(threshold_dis) == 1L) + rep(threshold_dis, n) + else + threshold_dis + if (length(th) != n) + stop("-threshold_dis- length (", length(th), + ") does not match number of nodes (", n, ").") + + # rdiffnet hands disadopt() an -expo- of shape n x 1 x Q (current + # time slice only); -cumadopt- carries the full n x T x Q history. + out <- vector("list", Q) + for (q in seq_len(Q)) { + currently <- which(cumadopt[, time, q] == 1L) + e <- expo[, 1L, q] + out[[q]] <- currently[e[currently] >= th[currently]] + } + out + } +} + +#' @rdname disadoption_mechanisms +#' @export +disadoptmech_logit <- function(beta0, beta_expo) { + if (missing(beta0) || missing(beta_expo)) + stop("-disadoptmech_logit- requires both -beta0- and -beta_expo-.") + force(beta0); force(beta_expo) + + function(expo, cumadopt, time) { + # -expo- shape is n x 1 x Q (current slice only); -cumadopt- is full history. + Q <- dim(cumadopt)[3L] + out <- vector("list", Q) + for (q in seq_len(Q)) { + currently <- which(cumadopt[, time, q] == 1L) + if (length(currently)) { + e <- expo[currently, 1L, q] + p <- stats::plogis(beta0 + beta_expo * e) + out[[q]] <- currently[stats::runif(length(p)) < p] + } else { + out[[q]] <- integer() + } + } + out + } +} + +#' @rdname disadoption_mechanisms +#' @export +disadoptmech_probit <- function(beta0, beta_expo) { + if (missing(beta0) || missing(beta_expo)) + stop("-disadoptmech_probit- requires both -beta0- and -beta_expo-.") + force(beta0); force(beta_expo) + + function(expo, cumadopt, time) { + # -expo- shape is n x 1 x Q (current slice only); -cumadopt- is full history. + Q <- dim(cumadopt)[3L] + out <- vector("list", Q) + for (q in seq_len(Q)) { + currently <- which(cumadopt[, time, q] == 1L) + if (length(currently)) { + e <- expo[currently, 1L, q] + p <- stats::pnorm(beta0 + beta_expo * e) + out[[q]] <- currently[stats::runif(length(p)) < p] + } else { + out[[q]] <- integer() + } + } + out + } +} diff --git a/R/epi_metrics.R b/R/epi_metrics.R new file mode 100644 index 00000000..d376d4ff --- /dev/null +++ b/R/epi_metrics.R @@ -0,0 +1,762 @@ +# Epidemiological metrics for diffnet / diffnet_epi (M10). +# +# Five exported metrics + a -summary.diffnet_epi- method that brings them +# together. The split between which dispatch on -diffnet- vs -diffnet_epi-: +# +# diffnet -> peak_prevalence, peak_time, survival_curve +# (need only the $status array; useful for both behaviour- +# diffusion and epi work) +# +# diffnet_epi -> secondary_attack_rate, generation_time +# (need the $transmission tree) +# +# The user can invoke any of the first three on a plain diffnet; the latter +# two error on a plain diffnet pointing to as_transmission_tree() / +# as_diffnet_epi(). summary.diffnet_epi prints the base diffnet block plus an +# epi block that calls all five. + +# ---------------------------------------------------------------------------- +# peak_prevalence / peak_time +# ---------------------------------------------------------------------------- + +#' Peak prevalence and time of peak in a diffnet +#' +#' \code{peak_prevalence(x)} returns the highest fraction of adopted nodes +#' observed in \code{x$status} across all time slices. \code{peak_time(x)} +#' returns the time period at which the peak is reached. +#' +#' For multi-behaviour diffnets both functions return a named numeric vector +#' of length \eqn{Q}, one entry per behaviour. +#' +#' @param x A \code{\link{diffnet}} (or any subclass, such as +#' \code{\link{diffnet_epi}}). +#' @param ... Currently ignored. +#' +#' @return Numeric scalar (single behaviour) or named numeric vector +#' (multi-behaviour). \code{peak_prevalence} is in \eqn{[0, 1]}; +#' \code{peak_time} is the time-period label (as integer). +#' +#' @examples +#' set.seed(2026) +#' dn <- rdiffnet(n = 50, t = 8, seed.graph = "small-world", +#' seed.p.adopt = 0.05, stop.no.diff = FALSE) +#' peak_prevalence(dn) +#' peak_time(dn) +#' +#' @name peak_prevalence +#' @author Aníbal Olivera M. +NULL + +#' @rdname peak_prevalence +#' @export +peak_prevalence <- function(x, ...) UseMethod("peak_prevalence") + +#' @rdname peak_prevalence +#' @export +peak_prevalence.diffnet <- function(x, ...) { + st <- x$status + if (is.list(st)) { + res <- vapply(st, function(s) max(colSums(s)) / nrow(s), numeric(1L)) + names(res) <- vapply(x$meta$behavior, as.character, character(1L)) + return(res) + } + max(colSums(st)) / nrow(st) +} + +#' @rdname peak_prevalence +#' @export +peak_time <- function(x, ...) UseMethod("peak_time") + +#' @rdname peak_prevalence +#' @export +peak_time.diffnet <- function(x, ...) { + st <- x$status + pers <- x$meta$pers + if (is.list(st)) { + res <- vapply(st, function(s) as.integer(pers[which.max(colSums(s))]), + integer(1L)) + names(res) <- vapply(x$meta$behavior, as.character, character(1L)) + return(res) + } + as.integer(pers[which.max(colSums(st))]) +} + +# ---------------------------------------------------------------------------- +# survival_curve +# ---------------------------------------------------------------------------- + +#' Kaplan-Meier-style survival curve for a diffnet +#' +#' For each adopted node, \code{survival_curve(x)} computes the duration in +#' the adopted state (\code{tod(x) - toa(x)}; right-censored at \code{T} for +#' nodes that never recover) and assembles a Kaplan-Meier-style survival +#' table. +#' +#' @param x A \code{\link{diffnet}} object. +#' @param ... Currently ignored. +#' +#' @return A \code{data.frame} (with extra class \code{netdiffuseR_survival}) +#' carrying columns \code{time}, \code{n_at_risk}, \code{n_recovered}, and +#' \code{survival}. For multi-behaviour diffnets an additional +#' \code{virus_id} column tags the behaviour. Printing summarises the +#' curve; standard data.frame subscripting works on the underlying rows. +#' +#' @examples +#' set.seed(2026) +#' dn <- rdiffnet(n = 50, t = 8, seed.graph = "small-world", +#' seed.p.adopt = 0.10, stop.no.diff = FALSE, +#' disadopt = disadoptmech_random(prob = 0.15)) +#' s <- survival_curve(dn) +#' s # prints summary +#' as.data.frame(s) # full data.frame +#' +#' @name survival_curve +#' @author Aníbal Olivera M. +NULL + +#' @rdname survival_curve +#' @export +survival_curve <- function(x, ...) UseMethod("survival_curve") + +#' @rdname survival_curve +#' @export +survival_curve.diffnet <- function(x, ...) { + toa_v <- x$toa + tod_v <- tod(x) + T_end <- x$meta$nper + + if (is.null(dim(toa_v))) { + out <- .survival_one_behaviour(toa_v, tod_v, T_end) + } else { + Q <- ncol(toa_v) + parts <- lapply(seq_len(Q), function(q) { + df <- .survival_one_behaviour(toa_v[, q], tod_v[, q], T_end) + df$virus_id <- q + df[, c("virus_id", "time", "n_at_risk", "n_recovered", "survival")] + }) + out <- do.call(rbind, parts) + } + + rownames(out) <- NULL + structure(out, class = c("netdiffuseR_survival", "data.frame")) +} + +# Internal: KM table for one behaviour. +.survival_one_behaviour <- function(toa, tod, T_end) { + adopters <- which(!is.na(toa)) + if (!length(adopters)) { + return(data.frame(time = integer(0), n_at_risk = integer(0), + n_recovered = integer(0), survival = numeric(0), + stringsAsFactors = FALSE)) + } + + # duration in adopted state, censored at T_end - toa + 1 when absorbing + duration <- ifelse(is.na(tod[adopters]), + T_end - toa[adopters] + 1L, + tod[adopters] - toa[adopters]) + recovered <- !is.na(tod[adopters]) + + unique_durs <- sort(unique(duration)) + surv <- 1 + remaining <- length(adopters) + + rows <- vector("list", length(unique_durs)) + for (k in seq_along(unique_durs)) { + d <- unique_durs[k] + rec <- sum(duration == d & recovered) + cen <- sum(duration == d & !recovered) + if (rec > 0 && remaining > 0) surv <- surv * (1 - rec / remaining) + rows[[k]] <- list(time = as.integer(d), + n_at_risk = as.integer(remaining), + n_recovered = as.integer(rec), + survival = as.numeric(surv)) + remaining <- remaining - rec - cen + } + + data.frame( + time = vapply(rows, `[[`, integer(1L), "time"), + n_at_risk = vapply(rows, `[[`, integer(1L), "n_at_risk"), + n_recovered = vapply(rows, `[[`, integer(1L), "n_recovered"), + survival = vapply(rows, `[[`, numeric(1L), "survival"), + stringsAsFactors = FALSE + ) +} + +#' @rdname survival_curve +#' @export +print.netdiffuseR_survival <- function(x, ...) { + cat("Survival curve (Kaplan-Meier-style)\n") + if (!nrow(x)) { + cat(" Empty -- no adopters in this diffnet.\n") + return(invisible(x)) + } + has_virus <- "virus_id" %in% names(x) + vids <- if (has_virus) unique(x$virus_id) else 1L + for (v in vids) { + sub <- if (has_virus) x[x$virus_id == v, , drop = FALSE] else x + n_events <- sum(sub$n_recovered) + surv_at_end <- sub$survival[nrow(sub)] + # Median: first time where survival <= 0.5 + median_t <- if (any(sub$survival <= 0.5)) + sub$time[which(sub$survival <= 0.5)[1L]] else NA_integer_ + tag <- if (has_virus) sprintf(" [behaviour %d]", v) else "" + cat(sprintf(" N events%s : %d recoveries across %d distinct durations\n", + tag, n_events, nrow(sub))) + cat(sprintf(" Median survival : %s\n", + if (is.na(median_t)) "not reached" + else as.character(median_t))) + cat(sprintf(" Final survival (end of horizon) : %.3f\n", surv_at_end)) + } + cat(" -> use as.data.frame(.) for the per-time table.\n") + invisible(x) +} + +# ---------------------------------------------------------------------------- +# secondary_attack_rate (diffnet_epi only) +# ---------------------------------------------------------------------------- + +#' Secondary attack rate from a transmission tree +#' +#' For each infection event recorded in \code{$transmission$tree}, +#' \code{secondary_attack_rate(x)} reports the number of secondary infections +#' caused by that event and the number of contacts the infector had in the +#' contact network at the slice corresponding to \code{source_exposure_date} +#' (the infector's own infection date). The per-event rate is +#' \code{n_secondary / n_contacts}; the aggregate (printed by default) is +#' \code{sum(n_secondary) / sum(n_contacts)}. +#' +#' Under absorbing diffusion each \code{(source, virus_id)} has exactly one +#' \code{source_exposure_date}, so the per-event keying collapses to the +#' classic per-source rollup. Under SIRS-style re-infection (a node enters +#' state I multiple times for the same virus), each infection-life of the +#' source is its own row, matching the convention used by epiworldR for +#' tree-derived metrics. +#' +#' Under SIRS the same \code{(source, target)} pair can transmit multiple +#' times during the source's infection life (the target disadopts and gets +#' re-infected by the same source). Each such transmission is a distinct +#' row in the tree and contributes to \code{n_secondary} for that +#' source-event, while \code{n_contacts} is fixed at the source's +#' neighbourhood size at \code{source_exposure_date}. Consequently the +#' per-event \code{sar} may exceed 1 (it is no longer a probability of +#' transmission but a count of transmissions per contact). The aggregate +#' \code{attr(sar, "global")} retains its sum-over-sum interpretation. +#' +#' @param x A \code{\link{diffnet_epi}} object. +#' @param ... Currently ignored. +#' +#' @return A \code{data.frame} (with extra class \code{netdiffuseR_sar}) +#' carrying columns \code{source}, \code{virus_id}, +#' \code{source_exposure_date}, \code{n_secondary}, \code{n_contacts}, +#' \code{sar}. Printing shows the aggregate scalar; the per-event rows +#' are exposed via standard data.frame subscripting. The aggregate is +#' also stored as \code{attr(., "global")}. +#' +#' @examples +#' set.seed(2026) +#' dn <- rdiffnet(n = 40, t = 8, seed.graph = "small-world", +#' seed.p.adopt = 0.10, stop.no.diff = FALSE, +#' source_attribution = source_attribution_uniform) +#' sar <- secondary_attack_rate(dn) +#' sar # aggregate print +#' as.data.frame(sar) # per-source breakdown +#' attr(sar, "global") # aggregate scalar +#' +#' @name secondary_attack_rate +#' @author Aníbal Olivera M. +NULL + +#' @rdname secondary_attack_rate +#' @export +secondary_attack_rate <- function(x, ...) UseMethod("secondary_attack_rate") + +#' @rdname secondary_attack_rate +#' @export +secondary_attack_rate.default <- function(x, ...) { + stop("-secondary_attack_rate()- requires a -diffnet_epi-. ", + "Use -as_transmission_tree()- or -as_diffnet_epi()- first.") +} + +#' @rdname secondary_attack_rate +#' @export +secondary_attack_rate.diffnet_epi <- function(x, ...) { + + tr <- transmission_tree(x) + tr_edges <- tr[!is.na(tr$source), , drop = FALSE] + + if (!nrow(tr_edges)) { + out <- data.frame( + source = integer(0), virus_id = integer(0), + source_exposure_date = integer(0), + n_secondary = integer(0), n_contacts = integer(0), + sar = numeric(0), stringsAsFactors = FALSE + ) + return(structure(out, global = NA_real_, + class = c("netdiffuseR_sar", "data.frame"))) + } + + # M12.2: aggregate secondaries per *infection event* of the source, + # keyed by (source, virus_id, source_exposure_date). Under absorbing + # diffusion each source has one exposure_date and this collapses to + # the M12 per-source rollup; under SIRS-style re-infection each + # infection-life of the source is its own row. + key <- paste(tr_edges$source, tr_edges$virus_id, + tr_edges$source_exposure_date, sep = "::") + agg <- tapply(seq_len(nrow(tr_edges)), key, length) + parts <- strsplit(names(agg), "::", fixed = TRUE) + src <- as.integer(vapply(parts, `[`, character(1L), 1L)) + vid <- as.integer(vapply(parts, `[`, character(1L), 2L)) + sed <- as.integer(vapply(parts, `[`, character(1L), 3L)) + n_sec <- as.integer(agg) + + # Contacts at the slice corresponding to *this* infection event of the + # source, i.e. graph[[source_exposure_date]] (not graph[[toa[source]]], + # which under re-infection only carries the latest exposure date). + T_slices <- length(x$graph) + n_con <- vapply(seq_along(src), function(i) { + t_inf <- sed[i] + if (is.na(t_inf) || t_inf < 1L || t_inf > T_slices) return(0L) + g <- x$graph[[t_inf]] + s <- src[i] + as.integer(sum((g[s, ] != 0) | (g[, s] != 0))) + }, integer(1L)) + + per_event <- data.frame( + source = src, + virus_id = vid, + source_exposure_date = sed, + n_secondary = n_sec, + n_contacts = n_con, + sar = ifelse(n_con > 0, n_sec / n_con, NA_real_), + stringsAsFactors = FALSE + ) + per_event <- per_event[order(per_event$virus_id, per_event$source, + per_event$source_exposure_date), , + drop = FALSE] + rownames(per_event) <- NULL + + total_s <- sum(per_event$n_secondary) + total_c <- sum(per_event$n_contacts) + global <- if (total_c > 0) total_s / total_c else NA_real_ + + structure(per_event, global = global, + class = c("netdiffuseR_sar", "data.frame")) +} + +#' @rdname secondary_attack_rate +#' @export +print.netdiffuseR_sar <- function(x, ...) { + cat("Secondary Attack Rate\n") + cat(sprintf(" Aggregate (sum of secondaries / sum of contacts) : %.3f\n", + attr(x, "global"))) + n_events <- nrow(x) + n_sources <- length(unique(paste(x$source, x$virus_id, sep = "::"))) + if (n_events == n_sources) { + cat(sprintf(" Based on %d infector%s in the transmission tree.\n", + n_events, if (n_events == 1L) "" else "s")) + } else { + cat(sprintf(" Based on %d infection event%s from %d distinct infector%s\n", + n_events, if (n_events == 1L) "" else "s", + n_sources, if (n_sources == 1L) "" else "s")) + cat(" (some infectors re-entered I and transmitted in more than one life).\n") + } + cat(" -> use as.data.frame(.) or standard subscripting for the\n") + cat(" per-event breakdown.\n") + invisible(x) +} + +# ---------------------------------------------------------------------------- +# generation_time (diffnet_epi only) +# ---------------------------------------------------------------------------- + +#' Generation time per edge of a transmission tree +#' +#' For each edge \eqn{(source, target)} in \code{$transmission$tree}, +#' \code{generation_time(x)} computes \code{date - source_exposure_date}, +#' the time between the infector's adoption and its infectee's. Seed +#' rows (\code{source == NA}) are dropped. +#' +#' @param x A \code{\link{diffnet_epi}} object. +#' @param ... Currently ignored. +#' +#' @return A \code{data.frame} (with extra class +#' \code{netdiffuseR_generation_time}) carrying the original tree columns +#' plus \code{gen_time}. Printing shows a distributional summary +#' (\emph{N}, mean, sd, median, IQR, range); the per-edge rows are exposed +#' via standard data.frame subscripting. +#' +#' @examples +#' set.seed(2026) +#' dn <- rdiffnet(n = 40, t = 8, seed.graph = "small-world", +#' seed.p.adopt = 0.10, stop.no.diff = FALSE, +#' source_attribution = source_attribution_uniform) +#' gt <- generation_time(dn) +#' gt # summary print +#' as.data.frame(gt) # per-edge rows +#' mean(gt$gen_time) +#' +#' @name generation_time +#' @author Aníbal Olivera M. +NULL + +#' @rdname generation_time +#' @export +generation_time <- function(x, ...) UseMethod("generation_time") + +#' @rdname generation_time +#' @export +generation_time.default <- function(x, ...) { + stop("-generation_time()- requires a -diffnet_epi-. ", + "Use -as_transmission_tree()- or -as_diffnet_epi()- first.") +} + +#' @rdname generation_time +#' @export +generation_time.diffnet_epi <- function(x, ...) { + tr <- transmission_tree(x) + tr <- tr[!is.na(tr$source) & !is.na(tr$source_exposure_date), , drop = FALSE] + tr$gen_time <- as.integer(tr$date - tr$source_exposure_date) + rownames(tr) <- NULL + structure(tr, class = c("netdiffuseR_generation_time", "data.frame")) +} + +#' @rdname generation_time +#' @export +print.netdiffuseR_generation_time <- function(x, ...) { + cat("Generation time distribution\n") + if (!nrow(x)) { + cat(" Empty -- no non-seed edges in the transmission tree.\n") + return(invisible(x)) + } + g <- x$gen_time + q <- stats::quantile(g, c(0.25, 0.5, 0.75), names = FALSE, na.rm = TRUE) + cat(sprintf(" N edges : %d\n", length(g))) + cat(sprintf(" Mean : %.2f (sd %.2f)\n", mean(g), stats::sd(g))) + cat(sprintf(" Median : %.1f (IQR %.1f - %.1f)\n", q[2], q[1], q[3])) + cat(sprintf(" Range : %d - %d\n", min(g), max(g))) + cat(" -> use as.data.frame(.) for the per-edge table.\n") + invisible(x) +} + +# ---------------------------------------------------------------------------- +# repr_number (diffnet_epi only) +# ---------------------------------------------------------------------------- + +#' Empirical reproduction number from a transmission tree +#' +#' For every infection event in \code{$transmission$tree}, +#' \code{repr_number(x)} counts the number of secondary cases it caused +#' (its offspring count, \eqn{\nu_i} in Lloyd-Smith \emph{et al.}, 2005) +#' and reports the mean across cases as the empirical reproduction +#' number. Cases that did not transmit further (terminal cases) count +#' as zero in the denominator; seeds are included. +#' +#' A case is one entry into state I, keyed by +#' \code{(node, virus_id, exposure_date)}. Under absorbing diffusion +#' (the classic netdiffuseR regime) each \code{(node, virus_id)} has +#' exactly one \code{exposure_date}, so the 3-D key collapses to the +#' familiar per-node rollup. Under SIRS-style re-infection (a node +#' enters \eqn{I} multiple times for the same virus), each +#' infection-life is its own case with its own offspring tally. This +#' matches the convention used by epiworldR's +#' \code{get_reproductive_number()} and the Lloyd-Smith framework. +#' +#' @param x A \code{\link{diffnet_epi}} object. +#' @param ... Currently ignored. +#' +#' @return A \code{data.frame} (with extra class \code{netdiffuseR_repr}) +#' carrying columns \code{node}, \code{virus_id}, +#' \code{exposure_date}, \code{n_offspring}. Printing shows the +#' aggregate reproduction number (mean offspring), plus SD and range; +#' the per-case rows are exposed via standard data.frame subscripting. +#' The aggregate is also stored as \code{attr(., "global")}. A +#' \code{plot} method renders the offspring distribution as a +#' barplot. +#' +#' @details +#' The empirical reproduction number is defined as the mean offspring +#' count across all observed cases: +#' +#' \deqn{% +#' R = \frac{1}{N}\sum_{i \in \mathrm{cases}} \nu_i % +#' }{% +#' R = (1/N) * sum_i nu_i % +#' } +#' +#' where \eqn{N} is the total number of infected cases (seeds + secondary) +#' in the tree and \eqn{\nu_i} is the number of times case \eqn{i} appears +#' as a \code{source} in the tree. Terminal cases (\eqn{\nu_i = 0}) are +#' included in the denominator, so \eqn{R} is the true mean offspring, +#' not the mean among transmitters only. +#' +#' For trees built from observational data (Epigames / contact tracing), +#' \eqn{R} matches the standard tree-based reproduction-number estimator. +#' For trees produced by \code{rdiffnet()} with \code{source_attribution}, +#' the value depends on the attribution policy: \code{_uniform}, +#' \code{_weighted}, and \code{_earliest} will produce different empirical +#' \eqn{R} on the same simulation, since they distribute observed +#' adoptions across different infectors. +#' +#' @references +#' Lloyd-Smith, J. O., Schreiber, S. J., Kopp, P. E., & Getz, W. M. (2005). +#' Superspreading and the effect of individual variation on disease emergence. +#' \emph{Nature} 438:355-359. \doi{10.1038/nature04153} +#' +#' @examples +#' set.seed(2026) +#' dn <- rdiffnet(n = 40, t = 6, seed.graph = "small-world", +#' seed.p.adopt = 0.10, stop.no.diff = FALSE, +#' source_attribution = source_attribution_uniform) +#' R <- repr_number(dn) +#' R # aggregate print: mean / SD / range +#' as.data.frame(R) # per-case offspring counts +#' attr(R, "global") # the scalar R +#' \dontrun{ +#' plot(R) # offspring distribution barplot +#' } +#' +#' # SIRS-style: a disadopt function lets nodes re-enter I, and every +#' # re-infection is recorded as its own case in the returned frame. +#' \dontrun{ +#' disadopt_30 <- function(expo, cumadopt, time) { +#' q_max <- dim(cumadopt)[3]; res <- vector("list", q_max) +#' for (q in seq_len(q_max)) { +#' adopters <- which(cumadopt[, time, q] == 1L) +#' res[[q]] <- if (length(adopters)) +#' sample(adopters, ceiling(0.30 * length(adopters))) else integer() +#' } +#' res +#' } +#' set.seed(2026) +#' dn_sirs <- rdiffnet(n = 60, t = 10, seed.graph = "small-world", +#' seed.p.adopt = 0.15, stop.no.diff = FALSE, +#' disadopt = disadopt_30, +#' source_attribution = source_attribution_uniform) +#' R_sirs <- repr_number(dn_sirs) +#' table(table(paste(R_sirs$node, R_sirs$virus_id))) # nodes by # of lives +#' } +#' +#' @name repr_number +#' @author Aníbal Olivera M. +NULL + +#' @rdname repr_number +#' @export +repr_number <- function(x, ...) UseMethod("repr_number") + +#' @rdname repr_number +#' @export +repr_number.default <- function(x, ...) { + stop("-repr_number()- requires a -diffnet_epi-. ", + "Use -as_transmission_tree()- or -as_diffnet_epi()- first.") +} + +#' @rdname repr_number +#' @export +repr_number.diffnet_epi <- function(x, ...) { + + tr <- transmission_tree(x) + + if (!nrow(tr)) { + out <- data.frame( + node = integer(0), + virus_id = integer(0), + exposure_date = integer(0), + n_offspring = integer(0), + stringsAsFactors = FALSE + ) + return(structure(out, global = NA_real_, + class = c("netdiffuseR_repr", "data.frame"))) + } + + # M12.2: cases are keyed per infection event = unique + # (target, virus_id, date). Under single-adoption each (target, virus_id) + # has exactly one date, so this collapses to the M12 2-D keying for + # absorbing diffusions; under SIRS-style re-infection each entry-to-I + # is its own case (Lloyd-Smith / epiworldR convention). + cases <- unique(tr[, c("target", "virus_id", "date"), drop = FALSE]) + names(cases) <- c("node", "virus_id", "exposure_date") + key_cases <- paste(cases$node, cases$virus_id, cases$exposure_date, + sep = "::") + + # Count offspring per source-event: a row in the tree contributes +1 to + # the case that infected someone *at that source_exposure_date*. + src_rows <- tr[!is.na(tr$source), + c("source", "virus_id", "source_exposure_date"), + drop = FALSE] + if (nrow(src_rows)) { + key_src <- paste(src_rows$source, src_rows$virus_id, + src_rows$source_exposure_date, sep = "::") + src_tab <- table(factor(key_src, levels = key_cases)) + n_offspring <- as.integer(src_tab) + } else { + n_offspring <- rep(0L, nrow(cases)) + } + + cases$n_offspring <- n_offspring + cases <- cases[order(cases$virus_id, cases$node, + cases$exposure_date), , + drop = FALSE] + rownames(cases) <- NULL + + global <- mean(cases$n_offspring) + + structure(cases, global = global, + class = c("netdiffuseR_repr", "data.frame")) +} + +#' @rdname repr_number +#' @export +print.netdiffuseR_repr <- function(x, ...) { + cat("Reproduction number (empirical, from transmission tree)\n") + if (!nrow(x)) { + cat(" Empty -- no cases in the transmission tree.\n") + return(invisible(x)) + } + nv <- length(unique(x$virus_id)) + if (nv > 1L) { + cat(sprintf(" Aggregate over %d diffusions (pooled).\n", nv)) + cat(sprintf(" Mean offspring (R) : %.3f\n", attr(x, "global"))) + if (nrow(x) > 1L) + cat(sprintf(" SD : %.3f\n", stats::sd(x$n_offspring))) + cat(sprintf(" Range : %d - %d\n", + min(x$n_offspring), max(x$n_offspring))) + cat(sprintf(" Based on %d cases across %d diffusions.\n", nrow(x), nv)) + cat(" Per-diffusion R:\n") + for (v in sort(unique(x$virus_id))) { + sub <- x$n_offspring[x$virus_id == v] + cat(sprintf(" diffusion %s: R = %.3f (n = %d)\n", + format(v), mean(sub), length(sub))) + } + cat(" -> use as.data.frame(.) for the per-case offspring count,\n") + cat(" subset by $virus_id for per-diffusion rows,\n") + cat(" or plot(.) for the offspring distribution.\n") + } else { + cat(sprintf(" Mean offspring (R) : %.3f\n", attr(x, "global"))) + if (nrow(x) > 1L) + cat(sprintf(" SD : %.3f\n", stats::sd(x$n_offspring))) + cat(sprintf(" Range : %d - %d\n", + min(x$n_offspring), max(x$n_offspring))) + cat(sprintf(" Based on %d case%s in the transmission tree.\n", + nrow(x), if (nrow(x) == 1L) "" else "s")) + cat(" -> use as.data.frame(.) for the per-case offspring count,\n") + cat(" or plot(.) for the offspring distribution.\n") + } + invisible(x) +} + +#' @rdname repr_number +#' @param y Unused. Present for S3 consistency with \code{\link[graphics]{plot}}. +#' @param main Plot title. When \code{NULL} (default), a sensible title is +#' chosen automatically: it includes "pooled over k diffusions" when the +#' tree carries multiple diffusion processes (i.e., multiple +#' \code{virus_id} values), otherwise just "Offspring distribution". +#' @param xlab,ylab Axis labels forwarded to \code{\link[graphics]{barplot}}. +#' @export +plot.netdiffuseR_repr <- function(x, y = NULL, + main = NULL, + xlab = "Number of offspring (secondary cases)", + ylab = "Number of cases", + ...) { + if (!nrow(x)) { + if (is.null(main)) main <- "Offspring distribution" + graphics::plot.new() + graphics::title(main = main, sub = "Empty transmission tree") + return(invisible(x)) + } + nv <- length(unique(x$virus_id)) + if (is.null(main)) { + main <- if (nv > 1L) + sprintf("Offspring distribution (pooled over %d diffusions)", nv) + else + "Offspring distribution" + } + sub <- if (nv > 1L) + sprintf("R = %.3f across %d cases / %d diffusions", + attr(x, "global"), nrow(x), nv) + else + sprintf("R = %.3f across %d cases", attr(x, "global"), nrow(x)) + + k <- max(x$n_offspring) + tab <- table(factor(x$n_offspring, levels = 0:k)) + graphics::barplot(as.numeric(tab), + names.arg = names(tab), + main = main, sub = sub, + xlab = xlab, ylab = ylab, ...) + invisible(x) +} + +# ---------------------------------------------------------------------------- +# summary.diffnet_epi +# ---------------------------------------------------------------------------- + +#' Summary method for \code{diffnet_epi} objects +#' +#' Extends \code{\link[=summary.diffnet]{summary.diffnet}} with an +#' epidemiological block: peak prevalence + peak time, secondary attack rate +#' (aggregate), and generation time (summary stats). The base diffnet block +#' is printed first via \code{NextMethod()}; the epi block follows. +#' +#' @param object A \code{\link{diffnet_epi}} object. +#' @param ... Forwarded to \code{summary.diffnet}. +#' +#' @return Invisibly, the object returned by \code{summary.diffnet}. The +#' epi block is printed as a side effect. +#' @export +#' @author Aníbal Olivera M. +summary.diffnet_epi <- function(object, ...) { + base_summary <- NextMethod() + + cat("\n Epidemiological metrics ----------------------------------\n") + + # Peak prevalence and peak time (work via the base diffnet method) + pp <- peak_prevalence(object) + pt <- peak_time(object) + if (length(pp) == 1L) { + cat(sprintf(" Peak prevalence : %.3f at t = %d\n", pp, pt)) + } else { + for (q in seq_along(pp)) { + cat(sprintf(" Peak prevalence [%s] : %.3f at t = %d\n", + names(pp)[q], pp[q], pt[q])) + } + } + + # SAR (diffnet_epi-only; safe to call here) + sar <- secondary_attack_rate(object) + cat(sprintf(" Secondary Attack Rate (aggregate) : %.3f (n infectors = %d)\n", + attr(sar, "global"), nrow(sar))) + + # Generation time (diffnet_epi-only) + gt <- generation_time(object) + if (nrow(gt) > 0L) { + cat(sprintf( + " Generation time : mean %.2f, median %.1f (N edges = %d)\n", + mean(gt$gen_time), stats::median(gt$gen_time), nrow(gt) + )) + } else { + cat(" Generation time : empty (seed-only tree)\n") + } + + # Survival curve (works on plain diffnet too; meaningful only when + # there is at least one recovery event in $status) + sc <- survival_curve(object) + if (nrow(sc) > 0L) { + n_rec <- sum(sc$n_recovered) + surv_end <- sc$survival[nrow(sc)] + if (n_rec > 0L) { + med_ix <- which(sc$survival <= 0.5)[1L] + med_t <- if (length(med_ix) && !is.na(med_ix)) sc$time[med_ix] else NA_integer_ + cat(sprintf( + " Survival curve : %d recoveries; final survival %.3f%s\n", + n_rec, surv_end, + if (is.na(med_t)) "" else sprintf("; median t = %s", as.character(med_t)) + )) + } else { + cat(" Survival curve : flat at 1 (no recoveries observed)\n") + } + } else { + cat(" Survival curve : empty (no adopters)\n") + } + + invisible(base_summary) +} diff --git a/R/random_graph.R b/R/random_graph.R index c5316840..de7a31b1 100644 --- a/R/random_graph.R +++ b/R/random_graph.R @@ -293,6 +293,7 @@ rgraph_ba <- function( graph$cumadopt, matrix(0, nrow=nnew-n, ncol=graph$meta$nper) ) + graph$status <- graph$cumadopt # keep canonical alias in sync graph$toa <- c(graph$toa, rep(NA, nnew-n)) names(graph$toa) <- graph$meta$ids @@ -300,6 +301,7 @@ rgraph_ba <- function( # Names dimnames(graph$adopt) <- list(graph$meta$ids, graph$meta$pers) dimnames(graph$cumadopt) <- list(graph$meta$ids, graph$meta$pers) + dimnames(graph$status) <- list(graph$meta$ids, graph$meta$pers) for (i in 1:length(out)) dimnames(graph$graph[[i]]) <- list(graph$meta$ids, graph$meta$ids) diff --git a/R/rdiffnet.r b/R/rdiffnet.r index 122938f0..ee7c9d11 100644 --- a/R/rdiffnet.r +++ b/R/rdiffnet.r @@ -22,11 +22,38 @@ #' it can also be an \eqn{n \times Q} matrix or a list of \eqn{Q} single behavior inputs. Sets the adoption #' threshold for each node. #' @param exposure.args List. Arguments to be passed to \code{\link{exposure}}. +#' @param exposure.mode Character scalar. Either "deterministic" (default) or "stochastic". #' @param name Character scalar. Passed to \code{\link{as_diffnet}}. #' @param behavior Character scalar or a list or character scalar (multiple behaviors only). Passed to \code{\link{as_diffnet}}. #' @param stop.no.diff Logical scalar. When \code{TRUE}, the function will return #' with error if there was no diffusion. Otherwise it throws a warning. #' @param disadopt Function of disadoption, with current exposition, cumulative adoption, and time as possible inputs. +#' @param adoption_mechanism Function. Per-step adoption rule. Receives +#' \code{(expo, thresholds, not_adopted, time, pars)} and returns the integer +#' indices that adopt at the current step. Defaults to +#' \code{\link{adoptmech_threshold}} (Tom Valente's deterministic threshold +#' rule). Pass \code{\link{adoptmech_logit}} or \code{\link{adoptmech_probit}} +#' for stochastic adoption, or any user-defined function with the same +#' signature. +#' @param adoption_pars Named list. Mechanism-specific parameters forwarded +#' verbatim as \code{pars} to \code{adoption_mechanism}. Stochastic +#' kernels (\code{adoptmech_logit}, \code{adoptmech_probit}) require +#' \code{beta0} and \code{beta_expo}. +#' @param source_attribution Optional lineage-tracking callback. When +#' non-\code{NULL}, \code{rdiffnet} records the inferred infector of every +#' fresh adopter during the simulation, builds a transmission tree, and +#' returns a \code{\link{diffnet_epi}} (auto-promoted). Three modes: +#' \itemize{ +#' \item{\code{NULL} (default) — no lineage tracking; the output is a +#' plain \code{\link{diffnet}}.} +#' \item{A single function — applied to every behaviour (broadcast). +#' See \code{\link{source_attribution_uniform}} / +#' \code{\link{source_attribution_weighted}} / +#' \code{\link{source_attribution_earliest}} for the bundled +#' kernels.} +#' \item{A length-\eqn{Q} list — per-behaviour attributor; +#' \code{NULL} entries skip lineage tracking for that behaviour.} +#' } #' @return A random \code{\link{diffnet}} class object. #' @family simulation functions #' @details @@ -101,6 +128,10 @@ #' \code{normalized} \tab \code{TRUE} #' } #' +#' When \code{exposure.mode = "stochastic"}, the \code{valued} argument in +#' \code{exposure.args} is forced to \code{TRUE} (with a message) to ensure that +#' edge weights are treated as probabilities. +#' #' @examples #' # (Single behavior): -------------------------------------------------------- #' @@ -399,12 +430,29 @@ rdiffnet <- function( rewire.args = list(), threshold.dist = runif(n), exposure.args = list(), + exposure.mode = "deterministic", name = "A diffusion network", behavior = "Random contagion", stop.no.diff = TRUE, - disadopt = NULL + disadopt = NULL, + adoption_mechanism = NULL, + adoption_pars = NULL, + source_attribution = NULL ) { + if (is.null(adoption_mechanism)) + adoption_mechanism <- adoptmech_threshold + if (!is.function(adoption_mechanism)) + stop("-adoption_mechanism- must be a function (see ?adoption_mechanisms).") + + # adoption_mechanism contract extension (M8): rdiffnet passes -behavior- and + # -expo_all- to the mechanism only if it declares them (formal inspection), + # so M6 kernels keep working unchanged. + .mech_formals <- names(formals(adoption_mechanism)) + .mech_has_dots <- "..." %in% .mech_formals + .mech_wants_behavior <- "behavior" %in% .mech_formals || .mech_has_dots + .mech_wants_expo_all <- "expo_all" %in% .mech_formals || .mech_has_dots + # Checking options for (arg in names(default_rewire.args)) if (!length(rewire.args[[arg]])) @@ -414,6 +462,14 @@ rdiffnet <- function( if (!length(exposure.args[[arg]])) exposure.args[[arg]] <- default_exposure.args[[arg]] + exposure.args$mode <- exposure.mode + + # If stochastic mode is selected, ensure valued is TRUE (enabling weights as probabilities) + if (exposure.mode == "stochastic" && !exposure.args$valued) { + message("exposure.mode='stochastic' requires valued=TRUE to use weights as probabilities. Setting exposure.args$valued=TRUE.") + exposure.args$valued <- TRUE + } + if (inherits(exposure.args[["attrs"]], "matrix")) { # Checking if the attrs matrix is has dims n x t if (any(dim(exposure.args[["attrs"]]) != dim(matrix(NA, nrow = n, ncol = t)))) { @@ -535,6 +591,27 @@ rdiffnet <- function( toa[d[[q]],q] <- 1L } + # Step 1.4: Normalize -source_attribution-, seed the tree if active (M8) ----- + attrs_q <- rdiffnet_normalize_source_attribution( + source_attribution, num_of_behaviors + ) + tracking_lineage <- !all(vapply(attrs_q, is.null, logical(1))) + tree_rows <- list() + if (tracking_lineage) { + for (q in 1:num_of_behaviors) { + if (is.null(attrs_q[[q]])) next + seeds_q <- as.integer(d[[q]]) + virus_name <- as.character(behavior[[q]]) + for (s in seeds_q) { + tree_rows[[length(tree_rows) + 1L]] <- list( + date = 1L, source = NA_integer_, target = s, + source_exposure_date = NA_integer_, + virus_id = as.integer(q), virus = virus_name + ) + } + } + } + # Step 2.0: Thresholds ------------------------------------------------------- thr <- rdiffnet_make_threshold(threshold.dist, n, num_of_behaviors) @@ -553,19 +630,61 @@ rdiffnet <- function( for (q in 1:num_of_behaviors) { - # 3.2 Identifying who adopts based on the threshold - whoadopts <- which( (expo[,,q] >= thr[,q]) & is.na(toa[,q])) + # 3.2 Identifying who adopts via the configured adoption_mechanism. + # The contract extension (M8) passes -behavior- and -expo_all- only + # when the mechanism declares them (or has -...-), so M6 kernels are + # untouched. + mech_call <- list( + expo = as.vector(expo[, , q]), + thresholds = thr[, q], + not_adopted = is.na(toa[, q]), + time = i, + pars = adoption_pars + ) + if (.mech_wants_behavior) mech_call$behavior <- q + if (.mech_wants_expo_all) mech_call$expo_all <- expo + whoadopts <- do.call(adoption_mechanism, mech_call) + + # 3.3 Source-attribution: for each fresh adopter, infer the infector + # *before* we mutate cumadopt/toa for this step. (M8) + if (tracking_lineage && !is.null(attrs_q[[q]]) && length(whoadopts) > 0) { + attr_fn <- attrs_q[[q]] + graph_i <- sgraph[[i]] + virus_name <- as.character(behavior[[q]]) + for (target in whoadopts) { + # Adopted neighbours at slice i (pre-update state). + row_i <- as.vector(graph_i[target, ]) + col_i <- as.vector(graph_i[, target]) + nbrs <- which((row_i != 0) | (col_i != 0)) + nbrs <- nbrs[cumadopt[nbrs, i, q] == 1L] + if (length(nbrs)) { + ord <- order(toa[nbrs, q]) + nbrs <- nbrs[ord] + weights <- pmax(row_i[nbrs], col_i[nbrs]) + src <- attr_fn(target, nbrs, weights, i, adoption_pars) + } else { + src <- NA_integer_ + } + sed <- if (is.na(src)) NA_integer_ else as.integer(toa[src, q]) + tree_rows[[length(tree_rows) + 1L]] <- list( + date = as.integer(i), source = as.integer(src), + target = as.integer(target), + source_exposure_date = sed, + virus_id = as.integer(q), virus = virus_name + ) + } + } - # 3.3 Updating the cumadopt + # 3.4 Updating the cumadopt cumadopt[whoadopts, i:t, q] <- 1L - # 3.4 Updating the toa + # 3.5 Updating the toa if (length(whoadopts) > 0) { toa[cbind(whoadopts, q)] <- i } } - # 3.5 identifiying the disadopters + # 3.6 identifiying the disadopters if (length(disadopt)) { # Run the disadoption algorithm. This will return the following: @@ -620,7 +739,7 @@ rdiffnet <- function( toa <- array(as.integer(toa), dim = dim(toa)) } - new_diffnet( + out <- new_diffnet( graph = sgraph, toa = toa, self = isself, @@ -630,6 +749,20 @@ rdiffnet <- function( name = name, behavior = behavior ) + + # M8: if lineage tracking was active in the simulation loop, build the + # transmission tree from the accumulated rows and auto-promote the diffnet + # to a -diffnet_epi-. -tracking_lineage- being TRUE without rows is + # possible (an empty simulation with NULL attributors), in which case we + # promote with an empty tree. + if (tracking_lineage) { + full_tree <- rdiffnet_tree_rows_to_df(tree_rows) + out <- as_diffnet_epi(out, + transmission = list(tree = full_tree, + pars = list())) + } + + out } rdiffnet_validate_args <- function(seed.p.adopt, seed.nodes, behavior) { @@ -803,6 +936,7 @@ split_behaviors <- function(diffnet_obj) { diffnets[[q]]$adopt <- diffnet_obj$adopt[[q]] diffnets[[q]]$cumadopt <- diffnet_obj$cumadopt[[q]] + diffnets[[q]]$status <- diffnets[[q]]$cumadopt # canonical alias diffnets[[q]]$meta$behavior <- behaviors_names[q] } diff --git a/R/rewire.r b/R/rewire.r index e0335082..3c7c8dba 100644 --- a/R/rewire.r +++ b/R/rewire.r @@ -498,6 +498,7 @@ rewire_qap <- function(graph) { # Adoptions graph$cumadopt <- graph$cumadopt[neword,,drop=FALSE] + graph$status <- graph$cumadopt # keep canonical alias in sync graph$adopt <- graph$adopt[neword,,drop=FALSE] graph$toa <- graph$toa[neword] diff --git a/R/source_attribution.R b/R/source_attribution.R new file mode 100644 index 00000000..16935c5a --- /dev/null +++ b/R/source_attribution.R @@ -0,0 +1,171 @@ +#' Source-attribution kernels for \code{rdiffnet}'s lineage tracking +#' +#' A family of pluggable rules that decide, for each freshly adopted node +#' in an \code{\link{rdiffnet}} simulation, \emph{which} of its already +#' adopted neighbours infected it. Pass any of these as the +#' \code{source_attribution} argument of \code{\link{rdiffnet}}, or write +#' your own function that follows the same contract. +#' +#' @param target Integer scalar. Index of the freshly adopted node. +#' @param adopted_neighbours Integer vector of indices of neighbours of +#' \code{target} that were already adopted at the previous time step. +#' Pre-sorted by ascending time-of-adoption (earliest infector first). +#' Empty when \code{target} is a seed. +#' @param weights Numeric vector of edge weights aligned with +#' \code{adopted_neighbours} (same length, same order). Carries the +#' non-zero entries of \code{sgraph[[time]][target, ]}. \code{NULL} or +#' constant when the graph is unweighted. +#' @param time Integer scalar. Current simulation time step. +#' @param pars Named list. Passed verbatim from \code{adoption_pars} so +#' user-defined attributors can read whatever they need. +#' +#' @return A single integer index — one of \code{adopted_neighbours}, +#' identifying the attributed source. \code{NA_integer_} when +#' \code{adopted_neighbours} is empty (seed or spontaneously adopted +#' node). +#' +#' @details +#' The contract is intentionally minimal. \code{rdiffnet()} pre-computes +#' \code{adopted_neighbours} and \code{weights} once per fresh adoption +#' and hands them sorted by toa, so user-defined attributors don't have +#' to query the simulation state themselves. The three kernels shipped +#' with the package: +#' +#' \describe{ +#' \item{\code{source_attribution_uniform}}{Samples uniformly across +#' \code{adopted_neighbours}. The default when the user passes +#' \code{TRUE} (deferred) and the simplest reasonable choice when +#' nothing distinguishes the candidates.} +#' \item{\code{source_attribution_weighted}}{Samples with probability +#' proportional to \code{weights}. Falls back to uniform when the +#' graph carries no weights (all entries equal). Appropriate for +#' contact-network simulations where edge weights encode contact +#' intensity.} +#' \item{\code{source_attribution_earliest}}{Returns the +#' earliest-infected adopted neighbour. Mirrors the heuristic that +#' the playground's \code{derive_tree} used post-hoc; useful as a +#' deterministic baseline.} +#' } +#' +#' @references +#' Lloyd-Smith, J. O., Schreiber, S. J., Kopp, P. E., & Getz, W. M. (2005). +#' Superspreading and the effect of individual variation on disease emergence. +#' \emph{Nature} 438:355-359. \doi{10.1038/nature04153} +#' +#' @examples +#' set.seed(2026) +#' +#' # Use a kernel directly inside rdiffnet(): +#' dn <- rdiffnet(n = 30, t = 6, seed.graph = "small-world", +#' seed.p.adopt = 0.1, stop.no.diff = FALSE, +#' source_attribution = source_attribution_weighted) +#' +#' is.diffnet_epi(dn) # TRUE — auto-promoted +#' nrow(transmission_tree(dn)) # one row per fresh adoption + seeds +#' +#' @name source_attribution +#' @author Aníbal Olivera M. +NULL + +#' @rdname source_attribution +#' @export +source_attribution_uniform <- function(target, adopted_neighbours, weights, + time, pars) { + if (!length(adopted_neighbours)) return(NA_integer_) + if (length(adopted_neighbours) == 1L) return(as.integer(adopted_neighbours[1L])) + as.integer(sample(adopted_neighbours, size = 1L)) +} + +#' @rdname source_attribution +#' @export +source_attribution_weighted <- function(target, adopted_neighbours, weights, + time, pars) { + if (!length(adopted_neighbours)) return(NA_integer_) + if (length(adopted_neighbours) == 1L) return(as.integer(adopted_neighbours[1L])) + + # No discriminating weights -> fall back to uniform silently. Users who + # really want weighted attribution on an unweighted graph need to encode + # the weights they care about into the simulation's graph. + if (is.null(weights) || all(is.na(weights)) || + length(unique(weights)) == 1L) { + return(as.integer(sample(adopted_neighbours, size = 1L))) + } + + # Guard against zero / negative weights (sample() with prob requires non- + # negative non-zero sums). + w <- pmax(as.numeric(weights), 0) + if (sum(w) == 0) + return(as.integer(sample(adopted_neighbours, size = 1L))) + + as.integer(sample(adopted_neighbours, size = 1L, prob = w)) +} + +#' @rdname source_attribution +#' @export +source_attribution_earliest <- function(target, adopted_neighbours, weights, + time, pars) { + if (!length(adopted_neighbours)) return(NA_integer_) + # By contract -adopted_neighbours- is sorted by ascending toa, so the + # earliest infector is at position 1. + as.integer(adopted_neighbours[1L]) +} + +# Internal: validate -source_attribution- and normalize it to a length-Q list. +# Returns a list of length num_of_behaviors with either NULL or a function per +# behaviour. +rdiffnet_normalize_source_attribution <- function(source_attribution, + num_of_behaviors) { + if (is.null(source_attribution)) { + return(rep(list(NULL), num_of_behaviors)) + } + + if (is.function(source_attribution)) { + # Broadcast: same attributor for every behaviour. + return(rep(list(source_attribution), num_of_behaviors)) + } + + if (is.list(source_attribution)) { + if (length(source_attribution) != num_of_behaviors) + stop("-source_attribution- list must have length ", num_of_behaviors, + " (one entry per behaviour), got ", length(source_attribution), ".") + bad <- which(!vapply(source_attribution, + function(z) is.null(z) || is.function(z), + logical(1))) + if (length(bad)) + stop("Every -source_attribution[[q]]- must be NULL or a function. ", + "Offending position(s): ", paste(bad, collapse = ", "), ".") + return(source_attribution) + } + + stop("-source_attribution- must be NULL, a function, or a length-Q list of ", + "functions / NULLs.") +} + +# Internal: assemble accumulated tree rows (list of one-row lists) into the +# canonical six-column transmission data.frame. -as_transmission_tree- / +# -as_diffnet_epi- will validate the result. +rdiffnet_tree_rows_to_df <- function(rows) { + if (!length(rows)) { + return(data.frame( + date = integer(0), + source = integer(0), + target = integer(0), + source_exposure_date = integer(0), + virus_id = integer(0), + virus = character(0), + stringsAsFactors = FALSE + )) + } + data.frame( + date = vapply(rows, `[[`, integer(1L), "date"), + source = vapply(rows, function(r) as.integer(r$source), + integer(1L)), + target = vapply(rows, `[[`, integer(1L), "target"), + source_exposure_date = vapply(rows, + function(r) as.integer(r$source_exposure_date), + integer(1L)), + virus_id = vapply(rows, `[[`, integer(1L), "virus_id"), + virus = vapply(rows, `[[`, character(1L), "virus"), + stringsAsFactors = FALSE + ) +} diff --git a/R/stats.R b/R/stats.R index dadcac6d..eb529c0b 100644 --- a/R/stats.R +++ b/R/stats.R @@ -275,6 +275,21 @@ dgr.array <- function(graph, cmode, undirected, self, valued) { #' @param groupvar Passed to \code{\link{struct_equiv}}. #' @param lags Integer scalar. When different from 0, the resulting exposure #' matrix will be the lagged exposure as specified (see examples). +#' @param mode Character scalar. Either "deterministic" (default) or "stochastic". +#' @param link_fun Character scalar or function. Kernel applied to the +#' (valued) edge weights before exposure is computed. Supported names: +#' \code{"identity"} (default, no transformation), \code{"linear"} +#' (\eqn{\min(\beta w, 1)}), \code{"sigmoid"} +#' (\eqn{\mathrm{plogis}((w - h)/\mathrm{scale})}), and \code{"wells-riley"} +#' (\eqn{1 - \exp(-\beta w)}). Alternatively, a user-supplied +#' single-argument function \code{function(w)} with its parameters +#' baked into the closure; it must return a vector of the same length +#' as \code{w}. When \code{link_fun} is not \code{"identity"}, +#' \code{valued} is forced to \code{TRUE} (with a warning if the user +#' set it to \code{FALSE}). +#' @param link_pars Named list with the scalar parameters required by +#' the named kernels (\code{"linear"}, \code{"sigmoid"}, +#' \code{"wells-riley"}). Ignored when \code{link_fun} is a function. #' @details #' Exposure is calculated as follows: #' @@ -330,6 +345,25 @@ dgr.array <- function(graph, cmode, undirected, self, valued) { #' computed as a count instead of a proportion. A good example of this can be #' found at the examples section of the function \code{\link{rdiffnet}}. #' +#' \strong{Stochastic Exposure} +#' +#' When \code{mode = "stochastic"}, the exposure is calculated based on a probabilistic +#' interpretation of the edges. In this mode, the weights of the graph \eqn{S_t} are +#' treated as probabilities of transmission. For each edge \eqn{(i,j)}, a Bernoulli +#' trial is performed with probability \eqn{S_{t,ij}}. If the trial is successful, +#' the edge is "realized" as a full connection. If failed, the edge is treated +#' as non-existent. +#' +#' The denominator is calculated using the degree of the node, representing the total +#' number of potential contacts. +#' +#' \deqn{ +#' \tilde{E}_{ti} = \frac{\sum_{j \neq i} \mathbb{I}(U_{ij} < S_{t,ij}) a_{tj}}{\sum_{j \neq i} 1} +#' } +#' +#' Where \eqn{S_{t,ij}} is the weight of the edge from \eqn{j} to \eqn{i} at time \eqn{t} +#' (treated as probability), and \eqn{U_{ij} \sim \text{Uniform}(0,1)}. +#' #' @references #' Burt, R. S. (1987). "Social Contagion and Innovation: Cohesion versus Structural #' Equivalence". American Journal of Sociology, 92(6), 1287. @@ -494,8 +528,55 @@ dgr.array <- function(graph, cmode, undirected, self, valued) { #' @name exposure NULL +# Link / kernel function applied to edge weights before exposure is computed. +# `link_fun` is either one of the named kernels listed below or a +# user-supplied single-argument function `function(w)` that returns a vector +# of the same length as `w` (the non-zero entries of a dgCMatrix, `@x`). +# Parameters for user functions are expected to be baked into the closure. +.apply_link_kernel <- function(W, link_fun, link_pars) { + if (is.null(link_fun) || + (is.character(link_fun) && identical(link_fun, "identity"))) + return(W) + + if (is.function(link_fun)) { + new_x <- link_fun(W@x) + if (length(new_x) != length(W@x)) + stop("Custom -link_fun- must return a vector of the same length as ", + "its input (the non-zero edge weights).") + W@x <- as.numeric(new_x) + return(W) + } + + if (!is.character(link_fun) || length(link_fun) != 1L) + stop("-link_fun- must be NULL, a character scalar, or a function.") + + W@x <- switch( + link_fun, + "linear" = { + if (is.null(link_pars$beta)) + stop("link_fun = \"linear\" requires link_pars$beta.") + pmin(link_pars$beta * W@x, 1) + }, + "sigmoid" = { + if (is.null(link_pars$h) || is.null(link_pars$scale)) + stop("link_fun = \"sigmoid\" requires link_pars$h and link_pars$scale.") + stats::plogis((W@x - link_pars$h) / link_pars$scale) + }, + "wells-riley" = { + if (is.null(link_pars$beta)) + stop("link_fun = \"wells-riley\" requires link_pars$beta.") + 1 - exp(-link_pars$beta * W@x) + }, + stop("Unknown link_fun: ", link_fun, + ". Supported: identity, linear, sigmoid, wells-riley, or a function.") + ) + W +} + # Workhorse of exposure plotting -.exposure <- function(graph, cumadopt, attrs, outgoing, valued, normalized, self) { +.exposure <- function(graph, cumadopt, attrs, outgoing, valued, normalized, self, + mode = "deterministic", + link_fun = "identity", link_pars = list()) { # Getting the parameters n <- nrow(graph) @@ -513,23 +594,63 @@ NULL # Checking self if (!self) graph <- sp_diag(graph, rep(0, nnodes(graph))) - norm <- graph %*% attrs + 1e-20 + # Apply link / kernel function to edge weights (pre stochastic / normalization) + graph <- .apply_link_kernel(graph, link_fun, link_pars) + + # Calculate normalization and apply stochastic filter + if (mode == "stochastic") { + # Stochastic mode interprets edge weights as Bernoulli probabilities. + # Values outside [0, 1] saturate the sampler (>1 always fires, <0 + # never fires), which is almost never what the user intended when + # weights come from raw quantities (e.g. seconds of contact). Warn + # and let the caller decide whether to apply a `link_fun`. + if (length(graph@x) && + (any(graph@x < 0, na.rm = TRUE) || + any(graph@x > 1, na.rm = TRUE))) { + warning("Stochastic exposure expects edge weights in [0, 1] ", + "(Bernoulli probabilities). Found values outside this ", + "range; the sampler will saturate. Consider applying a ", + "-link_fun- such as \"wells-riley\" or \"linear\" that ", + "maps weights into [0, 1].") + } + + # Denominator: count non-zero-weight neighbours. Using `graph@x != 0` + # instead of `rep(1, ...)` keeps the denominator aligned with the + # kernel output -- structurally-stored zeros (e.g. from a link kernel + # that maps some weights to 0, or from user-supplied 0 weights) do + # not inflate the degree. + graph_binary <- graph + graph_binary@x <- as.numeric(graph@x != 0) + norm <- as.vector(graph_binary %*% attrs) + 1e-20 + + # Numerator: Stochastic Filter (Bernoulli -> Binary) + u <- stats::runif(length(graph@x)) + graph@x <- as.numeric(u < graph@x) + } else { + # Deterministic: Based on original weights + norm <- as.vector(graph %*% attrs) + 1e-20 + } - if (!is.na(dim(cumadopt)[3])) { + if (length(dim(cumadopt)) == 3) { ans <- array(0, dim = c(dim(cumadopt)[1],dim(cumadopt)[3])) for (q in 1:dim(cumadopt)[3]) { + # Calculate numerator: Realized connections * Adoption status + numerator <- as.vector(graph %*% (attrs * cumadopt[,,q])) + if (normalized) { - ans[,q] <- as.vector(graph %*% (attrs * cumadopt[,,q]) / norm) + ans[,q] <- numerator / norm } else { - ans[,q] <- as.vector(graph %*% (attrs * cumadopt[,,q])) + ans[,q] <- numerator } } } else { - ans <- graph %*% (attrs * cumadopt) + numerator <- as.vector(graph %*% (attrs * cumadopt)) if (normalized) { - ans <- ans/ norm + ans <- numerator / norm + } else { + ans <- numerator } } @@ -570,9 +691,22 @@ exposure <- function( groupvar = NULL, self = getOption("diffnet.self"), lags = 0L, + mode = "deterministic", + link_fun = "identity", + link_pars = list(), ... ) { + # When a non-identity link kernel is requested, edge weights become the + # kernel input and must be preserved (binarizing would collapse them). + is_identity_link <- (is.null(link_fun)) || + (is.character(link_fun) && identical(link_fun, "identity")) + if (!is_identity_link && !valued) { + warning("-link_fun- different from \"identity\" requires valued edges; ", + "forcing -valued- to TRUE.") + valued <- TRUE + } + # Checking diffnet attributes if (length(attrs) == 1 && inherits(attrs, "character")) { if (!inherits(graph, "diffnet")) @@ -595,18 +729,55 @@ exposure <- function( if (!inherits(graph, "diffnet")) { stop("-cumadopt- should be provided when -graph- is not of class 'diffnet'") } else { - cumadopt <- toa_mat(graph)$cumadopt + cumadopt <- graph$cumadopt + + # Ensure rownames are present if graph is diffnet + if (is.list(cumadopt) && !is.data.frame(cumadopt)) { + for (i in seq_along(cumadopt)) { + if (is.null(rownames(cumadopt[[i]]))) { + rownames(cumadopt[[i]]) <- graph$meta$ids + } + } + } else if (is.null(rownames(cumadopt))) { + rownames(cumadopt) <- graph$meta$ids + } + } + + # Handling list of matrices (multi-behavior diffnet) + if (is.list(cumadopt) && !is.data.frame(cumadopt)) { + # Check if it is a list of matrices + if (all(sapply(cumadopt, function(x) length(dim(x)) == 2))) { + # Convert to array + n_c <- nrow(cumadopt[[1]]) + t_c <- ncol(cumadopt[[1]]) + q_c <- length(cumadopt) + cumadopt_arr <- array(0, dim = c(n_c, t_c, q_c)) + + # Preserve dimnames + dimnames(cumadopt_arr) <- list( + rownames(cumadopt[[1]]), + colnames(cumadopt[[1]]), + names(cumadopt) + ) + + for (i in 1:q_c) { + cumadopt_arr[,,i] <- as.matrix(cumadopt[[i]]) + } + cumadopt <- cumadopt_arr + } else { + warning("cumadopt is a list but elements do not appear to be matrices. Exposure calculation may fail.") } + } # Checking diffnet graph if (inherits(graph, "diffnet")) graph <- graph$graph # Checking attrs if (!length(attrs)) { - if (!is.na(dim(cumadopt)[3])) { + if (length(dim(cumadopt)) == 3) { attrs <- array(1, dim = c(nrow(cumadopt), ncol(cumadopt), 1))} else {attrs <- matrix(1, ncol=ncol(cumadopt), nrow=nrow(cumadopt))} - } else if (!is.na(dim(cumadopt)[3])) { + } else if (length(dim(cumadopt)) == 3) { attrs <- array(attrs, dim = c(nrow(attrs), ncol(attrs), 1)) } @@ -653,7 +824,8 @@ exposure <- function( if ((is.array(graph) & !inherits(graph, "matrix")) | is.list(graph)) { exposure.list(as_spmat(graph), cumadopt, attrs, outgoing, valued, normalized, - self, lags) + self, lags, mode = mode, + link_fun = link_fun, link_pars = link_pars) } else stopifnot_graph(graph) } @@ -661,7 +833,8 @@ exposure <- function( # @export exposure.list <- function( graph, cumadopt, attrs, - outgoing, valued, normalized, self, lags) { + outgoing, valued, normalized, self, lags, mode = "deterministic", + link_fun = "identity", link_pars = list()) { # attrs can be either # degree, indegree, outdegree, or a user defined vector. @@ -670,7 +843,7 @@ exposure.list <- function( # dim(attrs) default n x T matrix of 1's if (!length(dim(attrs))) stop("-attrs- must be a matrix of size n by T.") - if (!is.na(dim(cumadopt)[3])) { + if (length(dim(cumadopt)) == 3) { if (dim(cumadopt)[3]>1 && any(dim(attrs)[-3] != dim(cumadopt)[-3])) stop("Incorrect size for -attrs-. ", "Does not match n dim or t dim.") } else { @@ -681,7 +854,8 @@ exposure.list <- function( add_dimnames.mat(attrs) output <- exposure_for(graph, cumadopt, attrs, outgoing, valued, normalized, - self, lags) + self, lags, mode = mode, + link_fun = link_fun, link_pars = link_pars) dimnames(output) <- dimnames(cumadopt) output @@ -696,10 +870,13 @@ exposure_for <- function( valued, normalized, self, - lags + lags, + mode = "deterministic", + link_fun = "identity", + link_pars = list() ) { - if (!is.na(dim(cumadopt)[3])) { + if (length(dim(cumadopt)) == 3) { out <- array(NA, dim = c(dim(cumadopt)[1], dim(cumadopt)[2], dim(cumadopt)[3])) if (lags >= 0L) { @@ -710,7 +887,10 @@ exposure_for <- function( outgoing = outgoing, valued = valued, normalized = normalized, - self = self) + self = self, + mode = mode, + link_fun = link_fun, + link_pars = link_pars) } } else { for (i in (1 - lags):nslices(graph)) { @@ -720,7 +900,10 @@ exposure_for <- function( outgoing = outgoing, valued = valued, normalized = normalized, - self = self) + self = self, + mode = mode, + link_fun = link_fun, + link_pars = link_pars) } } } else { @@ -734,7 +917,10 @@ exposure_for <- function( outgoing = outgoing, valued = valued, normalized = normalized, - self = self) + self = self, + mode = mode, + link_fun = link_fun, + link_pars = link_pars) } } else { for (i in (1 - lags):nslices(graph)) { @@ -744,7 +930,10 @@ exposure_for <- function( outgoing = outgoing, valued = valued, normalized = normalized, - self = self) + self = self, + mode = mode, + link_fun = link_fun, + link_pars = link_pars) } } } @@ -886,20 +1075,38 @@ cumulative_adopt_count <- function(obj) { hazard_rate <- function(obj, no.plot=FALSE, include.grid=TRUE, ...) { if (inherits(obj, "diffnet")) { dn <- with(obj$meta, list(ids, pers)) - obj <- obj$cumadopt + # M11: read the canonical -$status- slot (alias of -$cumadopt- under M5). + # For absorbing diffnets the two are bit-identical, so legacy callers see + # no change. For multi-cycle diffnets, -$status- carries the actual state. + obj <- obj$status dimnames(obj) <- dn } else { if (!length(colnames(obj))) colnames(obj) <- seq_len(ncol(obj)) } - q <- colSums(obj) - t <- length(q) + # M11: count fresh adoption events (0->1 transitions) and divide by the + # currently-not-adopted denominator. For monotone -obj- (the only kind + # supported pre-M5) this collapses to (q[t] - q[t-1]) / (n - q[t-1]) + # bit-identically, so legacy tests pass without modification. For + # non-monotone -obj- (multi-cycle status arrays) the formula correctly + # counts re-adoptions in the numerator and currently-susceptible nodes + # in the denominator. + T_ <- ncol(obj) + if (T_ < 2L) { + haz <- 0 + } else { + prev <- obj[, 1:(T_ - 1L), drop = FALSE] + curr <- obj[, 2:T_, drop = FALSE] + fresh <- colSums((curr == 1L) & (prev == 0L)) + susc <- nrow(obj) - colSums(prev == 1L) + haz <- c(0, fresh / (susc + 1e-15)) + } x <- structure( - rbind(c(0,(q[-1] - q[-t])/(nrow(obj) - q[-t] + 1e-15))) - , dimnames = list("hazard", colnames(obj)), - class=c("diffnet_hr", "matrix") + rbind(haz), + dimnames = list("hazard", colnames(obj)), + class = c("diffnet_hr", "matrix") ) if (!no.plot) plot.diffnet_hr(x, include.grid=include.grid, ...) diff --git a/R/status_accessors.R b/R/status_accessors.R new file mode 100644 index 00000000..25262621 --- /dev/null +++ b/R/status_accessors.R @@ -0,0 +1,177 @@ +#' Accessors for adoption / disadoption times in a \code{diffnet} +#' +#' Mirror accessors over the canonical \code{$status} slot. \code{toa} and +#' \code{tod} return the \emph{first} adoption / first recovery time per node +#' (per behaviour for multi-behaviour diffnets) — same shape as the legacy +#' \code{$toa} field. \code{toa_all} and \code{tod_all} return long-format +#' \code{data.frame}s capturing every event in the multi-cycle history. +#' +#' @param x A \code{diffnet} object. +#' +#' @return +#' \describe{ +#' \item{\code{toa(x)}}{Integer vector of length \eqn{n} (single-behaviour) +#' or \eqn{n \times Q} integer matrix (multi-behaviour). \code{NA} when +#' the node never adopted. Equivalent to \code{x$toa}.} +#' \item{\code{tod(x)}}{Same shape as \code{toa(x)}. First time after +#' \code{toa[i, q]} when \code{$status[i, t, q]} flips back to 0. +#' \code{NA} when the node never recovered (absorbing).} +#' \item{\code{toa_all(x)}}{\code{data.frame} with columns \code{node}, +#' \code{behavior}, \code{episode}, \code{time}. One row per fresh +#' adoption event in \code{$status}.} +#' \item{\code{tod_all(x)}}{\code{data.frame} with columns \code{node}, +#' \code{behavior}, \code{episode}, \code{time}. One row per recovery +#' event in \code{$status}.} +#' } +#' +#' @details +#' For an absorbing single-cycle diffnet \code{tod(x)} is \code{NA} for every +#' node (no recoveries) and \code{tod_all(x)} returns a zero-row data.frame. +#' For a multi-cycle diffnet, \code{tod(x)} reports only the first recovery +#' per node-behaviour as a summary; use \code{tod_all(x)} for the full +#' history. +#' +#' @examples +#' set.seed(2026) +#' g <- rgraph_er(n = 10, t = 1, p = 0.4) +#' dn <- rdiffnet(seed.graph = g, t = 6, seed.p.adopt = 0.2, +#' stop.no.diff = FALSE) +#' +#' toa(dn) # first adoption time per node (same as dn$toa) +#' tod(dn) # first recovery — all NA for an absorbing diffnet +#' toa_all(dn) # one row per fresh adoption event +#' tod_all(dn) # zero-row data.frame for an absorbing diffnet +#' +#' @author Aníbal Olivera M. +#' @name status_accessors +NULL + +# ---------------------------------------------------------------------------- +# toa / tod : simple summaries (same shape as the $toa slot) +# ---------------------------------------------------------------------------- + +#' @rdname status_accessors +#' @export +toa <- function(x) UseMethod("toa") + +#' @rdname status_accessors +#' @export +toa.diffnet <- function(x) x$toa + +#' @rdname status_accessors +#' @export +tod <- function(x) UseMethod("tod") + +#' @rdname status_accessors +#' @export +tod.diffnet <- function(x) { + s <- x$status + if (is.null(s)) { + # Defensive — every diffnet built post-status-refactor has a $status + # slot, but keep this safe against any older object that might still + # be in scope. + return(rep(NA_integer_, length(x$toa))) + } + + if (is.list(s)) { + Q <- length(s) + n <- nrow(s[[1L]]) + out <- matrix(NA_integer_, n, Q) + for (q in seq_len(Q)) out[, q] <- .first_recovery(s[[q]], x$toa[, q]) + rownames(out) <- rownames(x$toa) + return(out) + } + + res <- .first_recovery(s, x$toa) + if (length(names(x$toa))) names(res) <- names(x$toa) + res +} + +# Internal: first time t > toa[i] when status[i, t] == 0; NA otherwise. +.first_recovery <- function(status_q, toa_q) { + n <- nrow(status_q) + T <- ncol(status_q) + vapply(seq_len(n), function(i) { + ti <- toa_q[i] + if (is.na(ti) || ti >= T) return(NA_integer_) + after <- which(status_q[i, (ti + 1L):T] == 0L) + if (length(after)) ti + as.integer(after[1L]) else NA_integer_ + }, integer(1L)) +} + +# ---------------------------------------------------------------------------- +# toa_all / tod_all : long-format multi-cycle history +# ---------------------------------------------------------------------------- + +#' @rdname status_accessors +#' @export +toa_all <- function(x) UseMethod("toa_all") + +#' @rdname status_accessors +#' @export +toa_all.diffnet <- function(x) .events_long(x, kind = "adopt") + +#' @rdname status_accessors +#' @export +tod_all <- function(x) UseMethod("tod_all") + +#' @rdname status_accessors +#' @export +tod_all.diffnet <- function(x) .events_long(x, kind = "recover") + +# Internal: walk $status (single matrix or list of matrices) and emit a +# long-format data.frame with one row per "fresh adoption" (kind = "adopt") +# or per "fresh recovery" (kind = "recover") event. The episode column is +# 1-indexed within each (node, behavior) pair, in time order. +.events_long <- function(x, kind = c("adopt", "recover")) { + kind <- match.arg(kind) + s <- x$status + + if (is.null(s)) { + return(data.frame( + node = integer(0), + behavior = integer(0), + episode = integer(0), + time = integer(0) + )) + } + + status_list <- if (is.list(s)) s else list(s) + + rows <- list() + for (q in seq_along(status_list)) { + sq <- status_list[[q]] + n <- nrow(sq); T <- ncol(sq) + times_label <- as.integer(colnames(sq)) + if (length(times_label) != T) times_label <- seq_len(T) + + for (i in seq_len(n)) { + v <- as.integer(sq[i, ]) + # diffs[t] = v[t] - v[t - 1]; +1 = fresh adoption, -1 = fresh recovery. + diffs <- c(v[1L], diff(v)) + events_t <- if (kind == "adopt") which(diffs == 1L) else which(diffs == -1L) + if (length(events_t)) { + rows[[length(rows) + 1L]] <- data.frame( + node = rep.int(i, length(events_t)), + behavior = rep.int(q, length(events_t)), + episode = seq_along(events_t), + time = times_label[events_t], + stringsAsFactors = FALSE + ) + } + } + } + + if (!length(rows)) { + return(data.frame( + node = integer(0), + behavior = integer(0), + episode = integer(0), + time = integer(0) + )) + } + + out <- do.call(rbind, rows) + rownames(out) <- NULL + out[order(out$node, out$behavior, out$episode), , drop = FALSE] +} diff --git a/R/transmission.R b/R/transmission.R new file mode 100644 index 00000000..59d9326a --- /dev/null +++ b/R/transmission.R @@ -0,0 +1,334 @@ +# Transmission tree handling for diffnet objects. +# +# The $transmission slot is a list with the following elements: +# - tree: data.frame with columns +# date integer, period when the transmission happened +# source integer, row index of the infector in x (NA for seeds) +# target integer, row index of the infectee in x +# source_exposure_date integer, period when `source` was infected (NA for seeds) +# virus_id integer, optional virus identifier +# virus character, optional virus label +# - pars: list, free-form parameters/metadata associated with the tree. +# Each row represents one infection event (an edge in the transmission tree); +# the set of (source -> target) pairs forms the directed forest from which +# offspring distributions (Lloyd-Smith et al., 2005) and likelihood-based +# reproduction-number estimates (White & Pagano, 2008) are derived. + +.transmission_cols <- c( + "date", "source", "target", "source_exposure_date", "virus_id", "virus" +) + +.empty_transmission_tree <- function() { + data.frame( + date = integer(0), + source = integer(0), + target = integer(0), + source_exposure_date = integer(0), + virus_id = integer(0), + virus = character(0), + stringsAsFactors = FALSE + ) +} + +#' Attach a transmission tree to a \code{diffnet} object +#' +#' Populates the \code{$transmission} slot of a \code{diffnet} with a +#' transmission tree (who-infected-whom). The resulting directed forest is the +#' canonical input to offspring-distribution analyses +#' (Lloyd-Smith \emph{et al.}, 2005) and to likelihood-based estimators of the +#' reproduction number and serial interval (White & Pagano, 2008). +#' +#' @param x A \code{diffnet} object. +#' @param tree A \code{data.frame} with at least the columns \code{date}, +#' \code{source}, \code{target}, and \code{source_exposure_date}. Columns +#' \code{virus_id} and \code{virus} are optional. \code{source} and +#' \code{source_exposure_date} may be \code{NA} for seed infections (roots +#' of the tree). +#' @param pars Optional named list stored verbatim in \code{x$transmission$pars}. +#' Useful for recording kernel parameters, seeds, etc. +#' +#' @details +#' Each row of \code{tree} represents one infection event (an edge +#' \eqn{\text{source} \to \text{target}} in the transmission tree) time-stamped +#' by \code{date}. \code{source} and \code{target} must be integer row indices +#' into \code{x} (\code{1..nnodes(x)}); \code{target} is required for every +#' row. Existing \code{$transmission} content is overwritten. +#' +#' Attaching a transmission tree promotes \code{x} to the +#' \code{\link{diffnet_epi}} subclass (\code{class(x) <- c("diffnet_epi", +#' "diffnet")}). The promotion is monotone — an already-\code{diffnet_epi} +#' input keeps its class. See \code{\link{as_diffnet_epi}} for the low-level +#' constructor. +#' +#' @return A \code{\link{diffnet_epi}} object — the input \code{x} promoted to +#' the subclass with \code{$transmission} set to a list with components +#' \code{tree} (a clean, ordered \code{data.frame}) and \code{pars}. +#' +#' @references +#' Lloyd-Smith, J. O., Schreiber, S. J., Kopp, P. E., & Getz, W. M. (2005). +#' Superspreading and the effect of individual variation on disease emergence. +#' \emph{Nature} 438:355-359. \doi{10.1038/nature04153} +#' +#' White, L. F., & Pagano, M. (2008). A likelihood-based method for real-time +#' estimation of the serial interval and reproductive number of an epidemic. +#' \emph{Statistics in Medicine} 27:2999-3016. \doi{10.1002/sim.3136} +#' +#' @export +#' @seealso \code{\link{new_diffnet}} +#' @author Aníbal Olivera M. +as_transmission_tree <- function(x, tree, pars = list()) { + + if (!inherits(x, "diffnet")) + stop("-x- must be a diffnet object.") + + if (!is.data.frame(tree)) + stop("-tree- must be a data.frame.") + + required <- c("date", "source", "target", "source_exposure_date") + missing_cols <- setdiff(required, names(tree)) + if (length(missing_cols)) + stop("-tree- is missing required column(s): ", + paste(missing_cols, collapse = ", "), ".") + + if (anyNA(tree$target)) + stop("-tree$target- cannot contain NA values.") + + n <- nnodes(x) + tgt <- suppressWarnings(as.integer(tree$target)) + if (anyNA(tgt) || any(tgt < 1L) || any(tgt > n)) + stop("-tree$target- must be integer indices in 1..", n, ".") + + src <- suppressWarnings(as.integer(tree$source)) + src_ok <- src[!is.na(src)] + if (length(src_ok) && (any(src_ok < 1L) || any(src_ok > n))) + stop("-tree$source- must be NA or an integer index in 1..", n, ".") + + out <- data.frame( + date = as.integer(tree$date), + source = src, + target = tgt, + source_exposure_date = as.integer(tree$source_exposure_date), + stringsAsFactors = FALSE + ) + + out$virus_id <- if (!is.null(tree$virus_id)) + as.integer(tree$virus_id) else rep(1L, nrow(out)) + + out$virus <- if (!is.null(tree$virus)) + as.character(tree$virus) else rep(NA_character_, nrow(out)) + + out <- out[, .transmission_cols, drop = FALSE] + out <- out[order(out$date, out$target), , drop = FALSE] + rownames(out) <- NULL + + # Promote to diffnet_epi (monotone — keeps class if already promoted) and + # attach the validated tree. + if (!inherits(x, "diffnet_epi")) + class(x) <- c("diffnet_epi", class(x)) + x$transmission <- list(tree = out, pars = pars) + x +} + +#' Reconstruct a transmission tree from observed adoption times (M13) +#' +#' Given a dynamic contact network and per-node times of adoption, infer a +#' transmission tree by source-attribution: for each infection event, pick +#' the most plausible infector among the target's adopted neighbours at the +#' slice where the target was infected. The selection rule is the user's +#' choice (one of the bundled \code{\link{source_attribution}} kernels or a +#' user-supplied function that follows the same contract). +#' +#' This is the general-purpose primitive behind \code{rdiffnet()}'s +#' lineage-tracking (M8): the same algorithm that constructs the tree +#' during simulation also constructs it post-hoc from observed data, which +#' is what makes data products like \code{\link{epigamesDiffNet}} possible +#' without bespoke parsing code. +#' +#' @param x Either a \code{\link{diffnet}} object (graphs and \code{toa} +#' read from its slots), or a list of adjacency matrices — one per +#' time slice. When a list, \code{toa} must be supplied. +#' @param toa Times-of-adoption. Integer vector of length \eqn{n} +#' (single-behaviour) or \eqn{n \times Q} integer matrix (multi-behaviour). +#' \code{NA} marks a node that never adopted. Ignored when \code{x} is a +#' diffnet (read from \code{x$toa} instead). +#' @param attribution The source-attribution rule. Either a string — +#' \code{"uniform"}, \code{"weighted"}, or \code{"earliest"} (the +#' bundled kernels) — or a function with the same signature as +#' \code{\link{source_attribution_uniform}}. +#' @param pars Optional named list. Stored verbatim in +#' \code{x$transmission$pars} when the result is wired into +#' \code{\link{as_diffnet_epi}}; also forwarded to the attribution +#' function as its \code{pars} argument. +#' @param behavior Optional character vector of length \eqn{Q} naming +#' each diffusion process. Used to populate the \code{virus} column of +#' the returned tree. Defaults to \code{"behavior_1"}, \code{"behavior_2"}, +#' ... when \code{NULL}. +#' @param seed Optional integer. When non-\code{NULL}, calls +#' \code{\link{set.seed}} once before reconstruction so the stochastic +#' attribution rules (\code{"uniform"}, \code{"weighted"}) produce a +#' reproducible tree. +#' +#' @return A \code{data.frame} with the canonical transmission-tree +#' schema: \code{date}, \code{source}, \code{target}, +#' \code{source_exposure_date}, \code{virus_id}, \code{virus}. +#' One row per infection event (seeds included, with +#' \code{source = NA}). Suitable to pass straight to +#' \code{\link{as_transmission_tree}}. +#' +#' @details +#' For every \code{(target, virus_id)} pair with non-\code{NA} \code{toa}, +#' the algorithm inspects the slice \code{x$graph[[toa[target, q]]]} and +#' picks the source among target's neighbours that were already adopted +#' (\code{toa[v, q] < toa[target, q]}). Targets with no adopted neighbour +#' at the moment of their adoption become seeds (\code{source = NA}). +#' Multi-behaviour diffnets are handled one behaviour at a time; the +#' resulting rows are concatenated. +#' +#' Under SIRS-style re-infection \code{toa[target, q]} only stores the +#' \emph{latest} infection time, so the reconstructed tree will only carry +#' one row per node per virus. To capture every entry into I, build the +#' tree at simulation time via +#' \code{rdiffnet(..., source_attribution = ...)} instead — that path +#' records each fresh adoption as it happens. +#' +#' @examples +#' set.seed(2026) +#' # Build a tiny absorbing diffnet, then reconstruct its tree post-hoc. +#' g <- lapply(1:5, function(t) rgraph_ba(t = 4L)) +#' toa <- c(1L, 2L, 3L, NA, 5L) +#' dn <- new_diffnet(g, toa = toa, t0 = 1L, t1 = 5L) +#' +#' tree <- transmission_tree_from_events(dn, attribution = "uniform", +#' seed = 2026) +#' head(tree) +#' +#' # Promote to diffnet_epi in one step: +#' dn_epi <- as_diffnet_epi(dn, attribution = "uniform") +#' is.diffnet_epi(dn_epi) +#' +#' @seealso \code{\link{source_attribution}}, +#' \code{\link{as_transmission_tree}}, \code{\link{as_diffnet_epi}} +#' @author Aníbal Olivera M. +#' @export +transmission_tree_from_events <- function(x, + toa = NULL, + attribution = "uniform", + pars = list(), + behavior = NULL, + seed = NULL) { + + # 1. Coerce inputs. + if (inherits(x, "diffnet")) { + graphs <- x$graph + if (is.null(toa)) toa <- x$toa + } else if (is.list(x)) { + graphs <- x + if (is.null(toa)) + stop("-toa- is required when -x- is a list of graphs.") + } else { + stop("-x- must be a diffnet or a list of adjacency matrices.") + } + + # 2. Normalize attribution -> function. + attr_fn <- if (is.character(attribution)) { + switch(attribution, + "uniform" = source_attribution_uniform, + "weighted" = source_attribution_weighted, + "earliest" = source_attribution_earliest, + stop("Unknown -attribution-: ", attribution, + ". Expected \"uniform\", \"weighted\", or \"earliest\".") + ) + } else if (is.function(attribution)) { + attribution + } else { + stop("-attribution- must be a function or a string.") + } + + # 3. Normalize toa to n x Q integer matrix. + if (is.null(dim(toa))) + toa <- matrix(as.integer(toa), ncol = 1L) + else + storage.mode(toa) <- "integer" + + n <- nrow(toa) + Q <- ncol(toa) + + # 4. Normalize behavior labels. + if (is.null(behavior)) + behavior <- paste0("behavior_", seq_len(Q)) + else + behavior <- as.character(behavior) + if (length(behavior) != Q) + stop("-behavior- must have length ", Q, + " (one entry per column of -toa-).") + + # 5. Seed once for reproducibility of stochastic attributors. + if (!is.null(seed)) set.seed(seed) + + # 6. Walk adopters in chronological order per behaviour. + T_slices <- length(graphs) + tree_rows <- list() + + for (q in seq_len(Q)) { + adopters <- which(!is.na(toa[, q])) + if (!length(adopters)) next + adopters <- adopters[order(toa[adopters, q])] + + for (target in adopters) { + t_inf <- toa[target, q] + if (is.na(t_inf) || t_inf < 1L || t_inf > T_slices) next + + g_slice <- graphs[[t_inf]] + row_i <- as.vector(g_slice[target, ]) + col_i <- as.vector(g_slice[, target]) + nbrs <- which((row_i != 0) | (col_i != 0)) + # Keep only neighbours adopted strictly before the target. + nbrs <- nbrs[!is.na(toa[nbrs, q]) & toa[nbrs, q] < t_inf] + + if (length(nbrs)) { + # Sort by ascending toa so attributors that exploit ordering + # (e.g. source_attribution_earliest) see the same contract as + # they do inside rdiffnet's M8 path. + nbrs <- nbrs[order(toa[nbrs, q])] + weights <- pmax(row_i[nbrs], col_i[nbrs]) + src <- attr_fn(target, nbrs, weights, t_inf, pars) + } else { + src <- NA_integer_ + } + + sed <- if (is.na(src)) NA_integer_ else as.integer(toa[src, q]) + tree_rows[[length(tree_rows) + 1L]] <- list( + date = as.integer(t_inf), + source = as.integer(src), + target = as.integer(target), + source_exposure_date = sed, + virus_id = as.integer(q), + virus = behavior[[q]] + ) + } + } + + rdiffnet_tree_rows_to_df(tree_rows) +} + +#' Retrieve the transmission tree of a \code{\link{diffnet_epi}} object +#' +#' Returns the data.frame stored in \code{x$transmission$tree} for objects +#' that inherit from \code{diffnet_epi}. Plain (non-epi) diffnets do not +#' carry a tree by design; calling this function on one is an API error. +#' +#' @param x A \code{\link{diffnet_epi}} object. +#' @return A \code{data.frame} with columns \code{date}, \code{source}, +#' \code{target}, \code{source_exposure_date}, \code{virus_id}, \code{virus}. +#' Zero rows when the epi object has been promoted but no tree attached yet. +#' @export +#' @seealso \code{\link{as_transmission_tree}}, \code{\link{as_diffnet_epi}} +#' @author Aníbal Olivera M. +transmission_tree <- function(x) { + if (!inherits(x, "diffnet_epi")) + stop("-x- is not a -diffnet_epi-. Use -as_transmission_tree()- or ", + "-as_diffnet_epi()- to promote a plain diffnet first.") + + tr <- x$transmission$tree + if (is.null(tr)) .empty_transmission_tree() else tr +} diff --git a/data-raw/epigames.R b/data-raw/epigames.R index 48c54020..28cfe977 100644 --- a/data-raw/epigames.R +++ b/data-raw/epigames.R @@ -7,7 +7,23 @@ rm(list = ls()) # both using consistent node IDs (1-594). load("data-raw/epigames_hourly.rda") -epigames <- epigames_hourly +# Load the hourly dynamic behavioral attributes +dyn_attrs_path <- "playground/epigames-stuff/epigames-analysis-copy/dynamic_attrs_hourly.csv" -# Save compressed raw data +dyn_attrs_hourly <- read.csv(dyn_attrs_path, stringsAsFactors = FALSE) + +# Sanity checks +stopifnot(ncol(dyn_attrs_hourly) == 5) # id, hour, mask, med, quarantine +stopifnot(nrow(dyn_attrs_hourly) == 594 * 339) # 201,366 rows +stopifnot(all(dyn_attrs_hourly$id %in% 1:594)) +stopifnot(all(dyn_attrs_hourly$hour %in% 0:338)) + +# Bundle into the epigames list (3 elements) +epigames <- list( + attributes = epigames_hourly$attributes, # static, 594 x 6 + edgelist = epigames_hourly$edgelist, # hourly, ~39k rows + dyn_attrs = dyn_attrs_hourly # dynamical attributes (long format) +) + +# Save compressed .rda usethis::use_data(epigames, overwrite = TRUE, compress = "xz") diff --git a/data-raw/epigamesDiffNet.R b/data-raw/epigamesDiffNet.R index a7d313fc..8b879d52 100644 --- a/data-raw/epigamesDiffNet.R +++ b/data-raw/epigamesDiffNet.R @@ -1,50 +1,86 @@ # data-raw/epigamesDiffNet.R -# Generating the dynamic diffnet object using netdiffuseR + collapse_timeframes() +# Generating the daily diffnet object from epigames using collapse_timeframes() +# Run after data-raw/epigames.R has built data/epigames.rda. rm(list = ls()) library(netdiffuseR) -# Load the base raw dataset created in data-raw/epigames.R (hourly resolution) load("data/epigames.rda") -attrs <- epigames$attributes -edges <- epigames$edgelist +attrs <- epigames$attributes # 594 x 6: id, toa, qyes_total, qno_total, mask_prop, med_prop +edges <- epigames$edgelist # hourly edgelist: sender, receiver, time (0-338), weight +dyn_long <- epigames$dyn_attrs # long format: id, hour (0-338), mask, med, quarantine -# Collapse hourly edgelist (hours 0-338) into daily windows (days 1-15) -source("R/collapse_timeframes.R") +# Collapse hourly edgelist into 15 daily windows via collapse_timeframes() +WINDOW_SIZE <- 24 +N_DAYS <- 15 + +dyn_long$day <- (dyn_long$hour %/% WINDOW_SIZE) + 1 +dyn_long$day <- pmin(dyn_long$day, N_DAYS) # day mapping daily_edgelist <- collapse_timeframes( - edgelist = edges, - ego = "sender", - alter = "receiver", - timevar = "time", - weightvar = "weight", - window_size = 24, - binarize = TRUE, - cumulative = TRUE, - symmetric = TRUE + edgelist = edges, + ego = "sender", + alter = "receiver", + timevar = "time", + weightvar = "weight", + window_size = WINDOW_SIZE, + binarize = FALSE, + cumulative = FALSE, + symmetric = TRUE ) -# Build daily adjacency matrices +# Build adjacency matrices adjmat <- edgelist_to_adjmat( daily_edgelist[, c("sender", "receiver")], - w = daily_edgelist$weight, - t0 = daily_edgelist$time, + w = daily_edgelist$weight, + t0 = daily_edgelist$time, + t1 = daily_edgelist$time, keep.isolates = TRUE, multiple = TRUE ) -max_t <- max(daily_edgelist$time, na.rm = TRUE) +# Build vertex.dyn.attrs: one data.frame per day (15 total) +# Each data.frame: 594 rows, columns: mask, med, quarantine + +vertex_dyn <- lapply(1:N_DAYS, function(d) { + sub <- dyn_long[dyn_long$day == d, ] + + # Aggregate per node: mean within each 24-hour window + agg <- aggregate( + cbind(mask, med, quarantine) ~ id, + data = sub, + FUN = mean + ) + + # Sort by id to match the node ordering in the diffnet object + agg <- agg[order(agg$id), ] + rownames(agg) <- NULL -# Prepare TOA vector: real adoption times from attrs, NA for non-adopters + # Return only the behavior columns + agg[, c("mask", "med", "quarantine")] +}) + +# Prepare TOA vector toa_vec <- stats::setNames(attrs$toa, as.character(attrs$id)) +# Assemble diffnet object epigamesDiffNet <- as_diffnet( adjmat, - toa = toa_vec, + toa = toa_vec, vertex.static.attrs = attrs, + vertex.dyn.attrs = vertex_dyn, t0 = 1, - t1 = max_t + t1 = N_DAYS +) + +# Reconstruct a transmission tree from the observed daily contact +# network and the per-node times of adoption. +epigamesDiffNet <- as_diffnet_epi( + epigamesDiffNet, + attribution = "uniform", + seed = 2026 ) +# Save usethis::use_data(epigamesDiffNet, overwrite = TRUE, compress = "xz") diff --git a/data/epigames.rda b/data/epigames.rda index 0c0dda50..a9efca8a 100644 Binary files a/data/epigames.rda and b/data/epigames.rda differ diff --git a/data/epigamesDiffNet.rda b/data/epigamesDiffNet.rda index fc1a2deb..88608d82 100644 Binary files a/data/epigamesDiffNet.rda and b/data/epigamesDiffNet.rda differ diff --git a/man-roxygen/graph_template.R b/man-roxygen/graph_template.R index 66924c62..7d3686b1 100644 --- a/man-roxygen/graph_template.R +++ b/man-roxygen/graph_template.R @@ -1,7 +1,7 @@ -#' @param graph <%= ifelse(exists("dynamic") && dynamic, "A dynamic graph", "Any class of accepted graph format") %> (see \code{\link{netdiffuseR-graphs}}). -#' <%=ifelse(exists("self") && self, "@param self Logical scalar. When \\code{TRUE} autolinks (loops, self edges) are allowed (see details).", "") %> -#' <%=ifelse(exists("multiple") && multiple, "@param multiple Logical scalar. When \\code{TRUE} allows multiple edges.", "") %> -#' <%=ifelse(exists("valued") && valued, "@param valued Logical scalar. When \\code{TRUE} weights will be considered. Otherwise non-zero values will be replaced by ones.", "") %> -#' <%=ifelse(exists("undirected") && undirected, "@param undirected Logical scalar. When \\code{TRUE} only the lower triangle of the adjacency matrix will considered (faster).", "") %> -#' <%=ifelse(exists("toa") && toa, "@param toa Integer vector of length \\eqn{n} with the times of adoption.", "") %> -#' <%=ifelse(exists("slice") && slice, "@param slice Integer scalar. Number of slice to use as baseline for drawing the graph.", "") %> +#' @param graph <%= ifelse(exists("dynamic", inherits = FALSE) && isTRUE(suppressWarnings(as.logical(dynamic))), "A dynamic graph", "Any class of accepted graph format") %> (see \code{\link{netdiffuseR-graphs}}). +#' <%=ifelse(exists("self", inherits = FALSE) && isTRUE(suppressWarnings(as.logical(self))), "@param self Logical scalar. When \\code{TRUE} autolinks (loops, self edges) are allowed (see details).", "") %> +#' <%=ifelse(exists("multiple", inherits = FALSE) && isTRUE(suppressWarnings(as.logical(multiple))), "@param multiple Logical scalar. When \\code{TRUE} allows multiple edges.", "") %> +#' <%=ifelse(exists("valued", inherits = FALSE) && isTRUE(suppressWarnings(as.logical(valued))), "@param valued Logical scalar. When \\code{TRUE} weights will be considered. Otherwise non-zero values will be replaced by ones.", "") %> +#' <%=ifelse(exists("undirected", inherits = FALSE) && isTRUE(suppressWarnings(as.logical(undirected))), "@param undirected Logical scalar. When \\code{TRUE} only the lower triangle of the adjacency matrix will considered (faster).", "") %> +#' <%=ifelse(exists("toa", inherits = FALSE) && isTRUE(suppressWarnings(as.logical(toa))), "@param toa Integer vector of length \\eqn{n} with the times of adoption.", "") %> +#' <%=ifelse(exists("slice", inherits = FALSE) && isTRUE(suppressWarnings(as.logical(slice))), "@param slice Integer scalar. Number of slice to use as baseline for drawing the graph.", "") %> diff --git a/man/adoption_mechanisms.Rd b/man/adoption_mechanisms.Rd new file mode 100644 index 00000000..bb249e0b --- /dev/null +++ b/man/adoption_mechanisms.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/adoption_mechanisms.R +\name{adoption_mechanisms} +\alias{adoption_mechanisms} +\alias{adoptmech_threshold} +\alias{adoptmech_logit} +\alias{adoptmech_probit} +\title{Adoption mechanisms for \code{rdiffnet}} +\usage{ +adoptmech_threshold(expo, thresholds, not_adopted, time, pars) + +adoptmech_logit(expo, thresholds, not_adopted, time, pars) + +adoptmech_probit(expo, thresholds, not_adopted, time, pars) +} +\arguments{ +\item{expo}{Numeric vector of length \eqn{n}. Per-node exposure at +the current time step (\code{expo[, , q]} for behaviour \eqn{q}).} + +\item{thresholds}{Numeric vector of length \eqn{n}. Per-node adoption +threshold (\code{thr[, q]}). Used by the deterministic kernel; +passed but ignored by the stochastic kernels so user-defined +mechanisms can choose whether to use it.} + +\item{not_adopted}{Logical vector of length \eqn{n}. \code{TRUE} for +nodes that have not yet adopted behaviour \eqn{q} +(\code{is.na(toa[, q])}).} + +\item{time}{Integer scalar. Current simulation time step.} + +\item{pars}{Named list of mechanism-specific parameters. Each +kernel documents which fields it expects.} +} +\value{ +Integer vector of node indices that adopt at this step. +} +\description{ +A family of pluggable kernels that decide which nodes adopt at each +simulation step. Pass any of these as the \code{adoption_mechanism} +argument of \code{\link{rdiffnet}}, or write your own function that +follows the same contract. +} +\details{ +The contract is intentionally minimal so that any user can write a +mechanism without reading the package internals: receive the current +state, decide who adopts, return the indices. The three kernels +below cover the common cases. + +\describe{ + \item{\code{adoptmech_threshold}}{Tom Valente's deterministic + threshold rule. Adopt iff \code{expo[i] >= thresholds[i]}. + Ignores \code{pars}.} + \item{\code{adoptmech_logit}}{Bernoulli rule with logit link. + Adopt with probability \code{plogis(beta0 + beta_expo * expo[i])}. + Requires \code{pars$beta0} and \code{pars$beta_expo}.} + \item{\code{adoptmech_probit}}{Bernoulli rule with probit link. + Adopt with probability \code{pnorm(beta0 + beta_expo * expo[i])}. + Requires \code{pars$beta0} and \code{pars$beta_expo}.} +} +} +\examples{ +set.seed(2026) + +# Default deterministic threshold +dn <- rdiffnet(n = 30, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.1, stop.no.diff = FALSE) + +# Stochastic logit mechanism +dn <- rdiffnet(n = 30, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.1, stop.no.diff = FALSE, + adoption_mechanism = adoptmech_logit, + adoption_pars = list(beta0 = -2, beta_expo = 5)) + +} +\author{ +Aníbal Olivera M. +} diff --git a/man/as_transmission_tree.Rd b/man/as_transmission_tree.Rd new file mode 100644 index 00000000..98ea95d0 --- /dev/null +++ b/man/as_transmission_tree.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/transmission.R +\name{as_transmission_tree} +\alias{as_transmission_tree} +\title{Attach a transmission tree to a \code{diffnet} object} +\usage{ +as_transmission_tree(x, tree, pars = list()) +} +\arguments{ +\item{x}{A \code{diffnet} object.} + +\item{tree}{A \code{data.frame} with at least the columns \code{date}, +\code{source}, \code{target}, and \code{source_exposure_date}. Columns +\code{virus_id} and \code{virus} are optional. \code{source} and +\code{source_exposure_date} may be \code{NA} for seed infections (roots +of the tree).} + +\item{pars}{Optional named list stored verbatim in \code{x$transmission$pars}. +Useful for recording kernel parameters, seeds, etc.} +} +\value{ +A \code{\link{diffnet_epi}} object — the input \code{x} promoted to + the subclass with \code{$transmission} set to a list with components + \code{tree} (a clean, ordered \code{data.frame}) and \code{pars}. +} +\description{ +Populates the \code{$transmission} slot of a \code{diffnet} with a +transmission tree (who-infected-whom). The resulting directed forest is the +canonical input to offspring-distribution analyses +(Lloyd-Smith \emph{et al.}, 2005) and to likelihood-based estimators of the +reproduction number and serial interval (White & Pagano, 2008). +} +\details{ +Each row of \code{tree} represents one infection event (an edge +\eqn{\text{source} \to \text{target}} in the transmission tree) time-stamped +by \code{date}. \code{source} and \code{target} must be integer row indices +into \code{x} (\code{1..nnodes(x)}); \code{target} is required for every +row. Existing \code{$transmission} content is overwritten. + +Attaching a transmission tree promotes \code{x} to the +\code{\link{diffnet_epi}} subclass (\code{class(x) <- c("diffnet_epi", +"diffnet")}). The promotion is monotone — an already-\code{diffnet_epi} +input keeps its class. See \code{\link{as_diffnet_epi}} for the low-level +constructor. +} +\references{ +Lloyd-Smith, J. O., Schreiber, S. J., Kopp, P. E., & Getz, W. M. (2005). +Superspreading and the effect of individual variation on disease emergence. +\emph{Nature} 438:355-359. \doi{10.1038/nature04153} + +White, L. F., & Pagano, M. (2008). A likelihood-based method for real-time +estimation of the serial interval and reproductive number of an epidemic. +\emph{Statistics in Medicine} 27:2999-3016. \doi{10.1002/sim.3136} +} +\seealso{ +\code{\link{new_diffnet}} +} +\author{ +Aníbal Olivera M. +} diff --git a/man/diffnet-class.Rd b/man/diffnet-class.Rd index f2532b49..dcddacb5 100644 --- a/man/diffnet-class.Rd +++ b/man/diffnet-class.Rd @@ -41,9 +41,9 @@ as_diffnet(graph, ...) new_diffnet( graph, - toa, - t0 = min(toa, na.rm = TRUE), - t1 = max(toa, na.rm = TRUE), + toa = NULL, + t0 = NULL, + t1 = NULL, vertex.dyn.attrs = NULL, vertex.static.attrs = NULL, id.and.per.vars = NULL, @@ -52,7 +52,10 @@ new_diffnet( self = getOption("diffnet.self"), multiple = getOption("diffnet.multiple"), name = "Diffusion Network", - behavior = NULL + behavior = NULL, + status = NULL, + transmission = NULL, + transmission_pars = list() ) \method{as.data.frame}{diffnet}( @@ -148,6 +151,28 @@ order of the rows in the attribute data.} \item{behavior}{Character vector. Name of the behavior(s) been analyzed (innovation).} +\item{status}{Optional state representation. Single-behavior: an +\eqn{n \times T} integer matrix with \code{1} on the cells where node +\eqn{i} is adopted at time \eqn{t} and \code{0} otherwise (need not be +monotone — multi-cycle adoption / disadoption is supported). Multi-behavior: +a length-\eqn{Q} list of such matrices. When \code{status} is supplied, +it becomes the canonical state of the diffnet and \code{toa} is derived +from it as the first time each node enters the adopted state. Passing +both \code{toa} and \code{status} emits a warning and uses \code{status}; +the warning reports whether the supplied \code{toa} is consistent with +the \code{toa} derived from \code{status}.} + +\item{transmission}{Optional transmission tree (who-infected-whom). Either +a \code{data.frame} with the columns documented in +\code{\link{as_transmission_tree}}, or a pre-built transmission list with +components \code{tree} and \code{pars}. When supplied, the returned +object is promoted to the \code{\link{diffnet_epi}} subclass. \code{NULL} +(default) returns a plain \code{diffnet}.} + +\item{transmission_pars}{Optional named list stored verbatim in +\code{x$transmission$pars}. Only consulted when \code{transmission} is a +data.frame.} + \item{x}{A \code{diffnet} object.} \item{row.names}{Ignored.} diff --git a/man/diffnet_epi.Rd b/man/diffnet_epi.Rd new file mode 100644 index 00000000..88498be8 --- /dev/null +++ b/man/diffnet_epi.Rd @@ -0,0 +1,135 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/diffnet-epi.R +\name{diffnet_epi} +\alias{diffnet_epi} +\alias{as_diffnet_epi} +\alias{is.diffnet_epi} +\alias{print.diffnet_epi} +\title{The \code{diffnet_epi} subclass} +\usage{ +as_diffnet_epi( + x, + transmission = NULL, + pars = list(), + attribution = NULL, + seed = NULL +) + +is.diffnet_epi(x) + +\method{print}{diffnet_epi}(x, ...) +} +\arguments{ +\item{x}{A \code{diffnet} object.} + +\item{transmission}{Either \code{NULL} (creates an empty epi diffnet), a +pre-built transmission list with the components \code{tree} and +\code{pars}, or — for the data.frame entry point — pass the data.frame +to \code{\link{as_transmission_tree}} instead. See examples.} + +\item{pars}{Optional named list stored verbatim in +\code{x$transmission$pars}. Only consulted when \code{transmission} is +\code{NULL} or carries no \code{pars} of its own.} + +\item{attribution}{Optional source-attribution rule. When non-\code{NULL}, +the tree is reconstructed from \code{x}'s graph slices and \code{toa} +via \code{\link{transmission_tree_from_events}} using this rule. Accepts +one of \code{"uniform"} / \code{"weighted"} / \code{"earliest"} or a +function with the \code{\link{source_attribution}} contract. Mutually +exclusive with \code{transmission}.} + +\item{seed}{Optional integer forwarded to +\code{\link{transmission_tree_from_events}} so the stochastic +attribution rules (\code{"uniform"}, \code{"weighted"}) produce a +reproducible tree. Ignored when \code{attribution} is \code{NULL}.} + +\item{...}{Further arguments. Accepted for compatibility with the +\code{\link[base]{print}} generic; currently ignored by +\code{print.diffnet_epi}.} +} +\value{ +\describe{ + \item{\code{as_diffnet_epi(x, ...)}}{A \code{diffnet_epi} object — the + input \code{x} with \code{class(x) <- c("diffnet_epi", "diffnet")} and + a \code{$transmission} slot.} + \item{\code{is.diffnet_epi(x)}}{\code{TRUE} iff \code{x} inherits from + \code{diffnet_epi}.} + \item{\code{print(x)} for a \code{diffnet_epi}}{Same output as + \code{\link{print.diffnet}} plus a final line summarising the + transmission tree.} +} +} +\description{ +\code{diffnet_epi} is an S3 subclass of \code{\link{diffnet}} carrying the +epidemiological extension: a \code{$transmission} slot with a +who-infected-whom tree and, eventually, the methods that operate on it +(offspring-distribution analysis, secondary attack rate, generation time, +survival, reproduction number, transmission-tree visualisation). A +\code{diffnet_epi} \emph{is a} \code{diffnet}: every method defined for the +base class dispatches transparently on the subclass thanks to S3 +inheritance. +} +\details{ +The transmission tree is the canonical input to offspring-distribution +analyses (Lloyd-Smith \emph{et al.}, 2005) and likelihood-based estimators +of the reproduction number and serial interval (White & Pagano, 2008). +Attaching one to a diffnet is what turns it into an epidemiological object +in this package's sense — hence the dedicated subclass. + +Promotion is monotone: once a diffnet has been promoted, it stays a +\code{diffnet_epi}. Attaching an empty tree (\code{transmission = NULL}) is +allowed and useful for downstream code that wants to build the tree +incrementally — e.g. \code{rdiffnet()} with a future \code{source_attribution} +callback (M8). +} +\section{Class hierarchy}{ + +\preformatted{ + class(x) -> c("diffnet_epi", "diffnet") +} +S3 dispatch tries \code{*.diffnet_epi} first, then falls back to +\code{*.diffnet}. \code{print.diffnet_epi} chains into +\code{print.diffnet} via \code{NextMethod()}. +} + +\examples{ +set.seed(2026) +gr <- lapply(1:5, function(t) rgraph_ba(t = 4L)) +dn <- new_diffnet(gr, toa = c(1L, 2L, NA, 3L, 5L), t0 = 1L, t1 = 5L) + +# Empty promotion (no tree yet) +dn_epi <- as_diffnet_epi(dn) +is.diffnet_epi(dn_epi) # TRUE +inherits(dn_epi, "diffnet") # also TRUE +nrow(transmission_tree(dn_epi)) # 0 + +# Attach a tree (preferred entry point: as_transmission_tree) +tree <- data.frame( + date = c(1L, 2L), + source = c(NA_integer_, 1L), + target = c(1L, 2L), + source_exposure_date = c(NA_integer_, 1L) +) +dn_epi <- as_transmission_tree(dn, tree) +is.diffnet_epi(dn_epi) # TRUE (promoted automatically) +transmission_tree(dn_epi) # 2 rows + +# Reconstruct the tree from x's graph + toa via source-attribution +# (general primitive — useful when you have observed adoption times +# but no transmission log, like contact-tracing or experiment data). +dn_epi <- as_diffnet_epi(dn, attribution = "uniform", seed = 2026) +transmission_tree(dn_epi) + +} +\references{ +Lloyd-Smith, J. O., Schreiber, S. J., Kopp, P. E., & Getz, W. M. (2005). +Superspreading and the effect of individual variation on disease emergence. +\emph{Nature} 438:355-359. \doi{10.1038/nature04153} + +White, L. F., & Pagano, M. (2008). A likelihood-based method for real-time +estimation of the serial interval and reproductive number of an epidemic. +\emph{Statistics in Medicine} 27:2999-3016. \doi{10.1002/sim.3136} +} +\author{ +Aníbal Olivera M. +} diff --git a/man/disadoption_mechanisms.Rd b/man/disadoption_mechanisms.Rd new file mode 100644 index 00000000..3d9d4518 --- /dev/null +++ b/man/disadoption_mechanisms.Rd @@ -0,0 +1,101 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/disadoption_mechanisms.R +\name{disadoption_mechanisms} +\alias{disadoption_mechanisms} +\alias{disadoptmech_random} +\alias{disadoptmech_bithreshold} +\alias{disadoptmech_logit} +\alias{disadoptmech_probit} +\title{Disadoption mechanisms for \code{rdiffnet}} +\usage{ +disadoptmech_random(prob) + +disadoptmech_bithreshold(threshold_dis) + +disadoptmech_logit(beta0, beta_expo) + +disadoptmech_probit(beta0, beta_expo) +} +\arguments{ +\item{prob}{Numeric scalar in \eqn{[0, 1]}. Per-step disadoption +probability for \code{disadoptmech_random}.} + +\item{threshold_dis}{Numeric scalar or vector of length \eqn{n}. +Upper-threshold cut-off for \code{disadoptmech_bithreshold}. +A scalar is recycled across all nodes.} + +\item{beta0}{Numeric scalar. Intercept of the logit/probit +disadoption probability.} + +\item{beta_expo}{Numeric scalar. Slope on exposure for the +logit/probit disadoption probability.} +} +\value{ +A function with signature + \code{function(expo, cumadopt, time)} suitable as the + \code{disadopt} argument of \code{\link{rdiffnet}}. +} +\description{ +A family of factories that build per-step disadoption rules. Each +factory takes its parameters and returns a closure that satisfies the +\code{disadopt} contract of \code{\link{rdiffnet}}: a function of +\code{(expo, cumadopt, time)} that returns a list of length \eqn{Q} +(one entry per behaviour) with the integer node indices that +disadopt at the current step. +} +\details{ +Use any of these as the \code{disadopt} argument of \code{rdiffnet}, +or write your own factory that returns a function with the same +signature. + + +The four kernels below cover the common cases: + +\describe{ + \item{\code{disadoptmech_random}}{Each currently-adopted node + disadopts independently with probability \code{prob}. Models + constant-rate recovery (the SIR \eqn{\gamma}).} + \item{\code{disadoptmech_bithreshold}}{Currently-adopted nodes + disadopt when their exposure crosses an upper threshold, + \code{threshold_dis}. Pair with \code{adoptmech_threshold} to + instantiate the bi-threshold model of Alipour \emph{et al.} + (2024) — adopt at the lower threshold, disadopt at the upper.} + \item{\code{disadoptmech_logit}}{Bernoulli rule with logit link. + Disadopt with probability + \code{plogis(beta0 + beta_expo * expo)}. To make recovery + \emph{less likely} as exposure grows, set \code{beta_expo < 0}.} + \item{\code{disadoptmech_probit}}{Bernoulli rule with probit link. + Disadopt with probability + \code{pnorm(beta0 + beta_expo * expo)}.} +} +} +\examples{ +set.seed(2026) + +# Constant-rate recovery: each adopter recovers with prob 0.10 / step +dn <- rdiffnet(n = 50, t = 12, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE, + disadopt = disadoptmech_random(prob = 0.10)) + +# Bi-threshold model (Alipour 2024): adopt when exposure >= 0.30, +# disadopt when exposure >= 0.70. +dn <- rdiffnet(n = 50, t = 12, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE, + threshold.dist = 0.30, + disadopt = disadoptmech_bithreshold(threshold_dis = 0.70)) + +# Logit recovery, exposure-dependent +dn <- rdiffnet(n = 50, t = 12, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE, + disadopt = disadoptmech_logit(beta0 = -1, beta_expo = -2)) + +} +\references{ +Alipour, F., Dokshin, F., Maleki, Z., Song, Y., & Ramazi, P. (2024). +Enough but not too many: A bi-threshold model for behavioral +diffusion. \emph{PNAS Nexus} 3(10). +\doi{10.1093/pnasnexus/pgae428} +} +\author{ +Aníbal Olivera M. +} diff --git a/man/epigamesDiffNet.Rd b/man/epigamesDiffNet.Rd index f6338351..6d369322 100644 --- a/man/epigamesDiffNet.Rd +++ b/man/epigamesDiffNet.Rd @@ -14,6 +14,21 @@ A directed dynamic graph with 594 vertices and 15 time periods. The attributes in the graph are described in \code{\link{epigames}}. } \details{ +By default, this \code{diffnet} object is **non-cumulative** (each slice represents +ephemeral daily contacts) and **valued** (edge weights represent contact duration in seconds). + +To reconstruct the classic cumulative/binarized network, you can run: + +\preformatted{ +epigames_cumul <- epigamesDiffNet + +# 1. Accumulate the history across time periods +epigames_cumul$graph <- Reduce("+", epigames_cumul$graph, accumulate = TRUE) + +# 2. Apply a logical cut-off to binarize the network +epigames_cumul$graph <- lapply(epigames_cumul$graph, function(m) { m@x[] <- 1; m }) +} + Non-adopters have \code{toa = NA}. } \seealso{ diff --git a/man/exposure.Rd b/man/exposure.Rd index 82f3531b..1b9599f4 100644 --- a/man/exposure.Rd +++ b/man/exposure.Rd @@ -15,6 +15,9 @@ exposure( groupvar = NULL, self = getOption("diffnet.self"), lags = 0L, + mode = "deterministic", + link_fun = "identity", + link_pars = list(), ... ) } @@ -47,6 +50,24 @@ and one (see details).} \item{lags}{Integer scalar. When different from 0, the resulting exposure matrix will be the lagged exposure as specified (see examples).} +\item{mode}{Character scalar. Either "deterministic" (default) or "stochastic".} + +\item{link_fun}{Character scalar or function. Kernel applied to the +(valued) edge weights before exposure is computed. Supported names: +\code{"identity"} (default, no transformation), \code{"linear"} +(\eqn{\min(\beta w, 1)}), \code{"sigmoid"} +(\eqn{\mathrm{plogis}((w - h)/\mathrm{scale})}), and \code{"wells-riley"} +(\eqn{1 - \exp(-\beta w)}). Alternatively, a user-supplied +single-argument function \code{function(w)} with its parameters +baked into the closure; it must return a vector of the same length +as \code{w}. When \code{link_fun} is not \code{"identity"}, +\code{valued} is forced to \code{TRUE} (with a warning if the user +set it to \code{FALSE}).} + +\item{link_pars}{Named list with the scalar parameters required by +the named kernels (\code{"linear"}, \code{"sigmoid"}, +\code{"wells-riley"}). Ignored when \code{link_fun} is a function.} + \item{...}{Further arguments passed to \code{\link{struct_equiv}} (only used when \code{alt.graph="se"}).} } @@ -115,6 +136,25 @@ If \code{normalize=FALSE} then denominator, \eqn{S_t \times x_t}{S(t) \%*\% x(t) is not included. This can be useful when, for example, exposure needs to be computed as a count instead of a proportion. A good example of this can be found at the examples section of the function \code{\link{rdiffnet}}. + +\strong{Stochastic Exposure} + +When \code{mode = "stochastic"}, the exposure is calculated based on a probabilistic +interpretation of the edges. In this mode, the weights of the graph \eqn{S_t} are +treated as probabilities of transmission. For each edge \eqn{(i,j)}, a Bernoulli +trial is performed with probability \eqn{S_{t,ij}}. If the trial is successful, +the edge is "realized" as a full connection. If failed, the edge is treated +as non-existent. + +The denominator is calculated using the degree of the node, representing the total +number of potential contacts. + +\deqn{ +\tilde{E}_{ti} = \frac{\sum_{j \neq i} \mathbb{I}(U_{ij} < S_{t,ij}) a_{tj}}{\sum_{j \neq i} 1} +} + +Where \eqn{S_{t,ij}} is the weight of the edge from \eqn{j} to \eqn{i} at time \eqn{t} +(treated as probability), and \eqn{U_{ij} \sim \text{Uniform}(0,1)}. } \examples{ # Calculating lagged exposure ----------------------------------------------- diff --git a/man/generation_time.Rd b/man/generation_time.Rd new file mode 100644 index 00000000..0dfeab77 --- /dev/null +++ b/man/generation_time.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/epi_metrics.R +\name{generation_time} +\alias{generation_time} +\alias{generation_time.default} +\alias{generation_time.diffnet_epi} +\alias{print.netdiffuseR_generation_time} +\title{Generation time per edge of a transmission tree} +\usage{ +generation_time(x, ...) + +\method{generation_time}{default}(x, ...) + +\method{generation_time}{diffnet_epi}(x, ...) + +\method{print}{netdiffuseR_generation_time}(x, ...) +} +\arguments{ +\item{x}{A \code{\link{diffnet_epi}} object.} + +\item{...}{Currently ignored.} +} +\value{ +A \code{data.frame} (with extra class + \code{netdiffuseR_generation_time}) carrying the original tree columns + plus \code{gen_time}. Printing shows a distributional summary + (\emph{N}, mean, sd, median, IQR, range); the per-edge rows are exposed + via standard data.frame subscripting. +} +\description{ +For each edge \eqn{(source, target)} in \code{$transmission$tree}, +\code{generation_time(x)} computes \code{date - source_exposure_date}, +the time between the infector's adoption and its infectee's. Seed +rows (\code{source == NA}) are dropped. +} +\examples{ +set.seed(2026) +dn <- rdiffnet(n = 40, t = 8, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE, + source_attribution = source_attribution_uniform) +gt <- generation_time(dn) +gt # summary print +as.data.frame(gt) # per-edge rows +mean(gt$gen_time) + +} +\author{ +Aníbal Olivera M. +} diff --git a/man/peak_prevalence.Rd b/man/peak_prevalence.Rd new file mode 100644 index 00000000..63671dde --- /dev/null +++ b/man/peak_prevalence.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/epi_metrics.R +\name{peak_prevalence} +\alias{peak_prevalence} +\alias{peak_prevalence.diffnet} +\alias{peak_time} +\alias{peak_time.diffnet} +\title{Peak prevalence and time of peak in a diffnet} +\usage{ +peak_prevalence(x, ...) + +\method{peak_prevalence}{diffnet}(x, ...) + +peak_time(x, ...) + +\method{peak_time}{diffnet}(x, ...) +} +\arguments{ +\item{x}{A \code{\link{diffnet}} (or any subclass, such as +\code{\link{diffnet_epi}}).} + +\item{...}{Currently ignored.} +} +\value{ +Numeric scalar (single behaviour) or named numeric vector + (multi-behaviour). \code{peak_prevalence} is in \eqn{[0, 1]}; + \code{peak_time} is the time-period label (as integer). +} +\description{ +\code{peak_prevalence(x)} returns the highest fraction of adopted nodes +observed in \code{x$status} across all time slices. \code{peak_time(x)} +returns the time period at which the peak is reached. +} +\details{ +For multi-behaviour diffnets both functions return a named numeric vector +of length \eqn{Q}, one entry per behaviour. +} +\examples{ +set.seed(2026) +dn <- rdiffnet(n = 50, t = 8, seed.graph = "small-world", + seed.p.adopt = 0.05, stop.no.diff = FALSE) +peak_prevalence(dn) +peak_time(dn) + +} +\author{ +Aníbal Olivera M. +} diff --git a/man/rdiffnet.Rd b/man/rdiffnet.Rd index 0d63f7bf..e554107c 100644 --- a/man/rdiffnet.Rd +++ b/man/rdiffnet.Rd @@ -18,10 +18,14 @@ rdiffnet( rewire.args = list(), threshold.dist = runif(n), exposure.args = list(), + exposure.mode = "deterministic", name = "A diffusion network", behavior = "Random contagion", stop.no.diff = TRUE, - disadopt = NULL + disadopt = NULL, + adoption_mechanism = NULL, + adoption_pars = NULL, + source_attribution = NULL ) } \arguments{ @@ -61,6 +65,8 @@ threshold for each node.} \item{exposure.args}{List. Arguments to be passed to \code{\link{exposure}}.} +\item{exposure.mode}{Character scalar. Either "deterministic" (default) or "stochastic".} + \item{name}{Character scalar. Passed to \code{\link{as_diffnet}}.} \item{behavior}{Character scalar or a list or character scalar (multiple behaviors only). Passed to \code{\link{as_diffnet}}.} @@ -69,6 +75,35 @@ threshold for each node.} with error if there was no diffusion. Otherwise it throws a warning.} \item{disadopt}{Function of disadoption, with current exposition, cumulative adoption, and time as possible inputs.} + +\item{adoption_mechanism}{Function. Per-step adoption rule. Receives +\code{(expo, thresholds, not_adopted, time, pars)} and returns the integer +indices that adopt at the current step. Defaults to +\code{\link{adoptmech_threshold}} (Tom Valente's deterministic threshold +rule). Pass \code{\link{adoptmech_logit}} or \code{\link{adoptmech_probit}} +for stochastic adoption, or any user-defined function with the same +signature.} + +\item{adoption_pars}{Named list. Mechanism-specific parameters forwarded +verbatim as \code{pars} to \code{adoption_mechanism}. Stochastic +kernels (\code{adoptmech_logit}, \code{adoptmech_probit}) require +\code{beta0} and \code{beta_expo}.} + +\item{source_attribution}{Optional lineage-tracking callback. When +non-\code{NULL}, \code{rdiffnet} records the inferred infector of every +fresh adopter during the simulation, builds a transmission tree, and +returns a \code{\link{diffnet_epi}} (auto-promoted). Three modes: +\itemize{ + \item{\code{NULL} (default) — no lineage tracking; the output is a + plain \code{\link{diffnet}}.} + \item{A single function — applied to every behaviour (broadcast). + See \code{\link{source_attribution_uniform}} / + \code{\link{source_attribution_weighted}} / + \code{\link{source_attribution_earliest}} for the bundled + kernels.} + \item{A length-\eqn{Q} list — per-behaviour attributor; + \code{NULL} entries skip lineage tracking for that behaviour.} +}} } \value{ A random \code{\link{diffnet}} class object. @@ -156,6 +191,10 @@ is applied using that graph instead. \code{normalized} \tab \code{TRUE} } +When \code{exposure.mode = "stochastic"}, the \code{valued} argument in +\code{exposure.args} is forced to \code{TRUE} (with a message) to ensure that +edge weights are treated as probabilities. + The function \code{rdiffnet_multiple} is a wrapper of \code{rdiffnet} wich allows simulating multiple diffusion networks with the same parameters and apply the same function to all of them. This function is designed to allow the user diff --git a/man/repr_number.Rd b/man/repr_number.Rd new file mode 100644 index 00000000..8ccab8fd --- /dev/null +++ b/man/repr_number.Rd @@ -0,0 +1,137 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/epi_metrics.R +\name{repr_number} +\alias{repr_number} +\alias{repr_number.default} +\alias{repr_number.diffnet_epi} +\alias{print.netdiffuseR_repr} +\alias{plot.netdiffuseR_repr} +\title{Empirical reproduction number from a transmission tree} +\usage{ +repr_number(x, ...) + +\method{repr_number}{default}(x, ...) + +\method{repr_number}{diffnet_epi}(x, ...) + +\method{print}{netdiffuseR_repr}(x, ...) + +\method{plot}{netdiffuseR_repr}( + x, + y = NULL, + main = NULL, + xlab = "Number of offspring (secondary cases)", + ylab = "Number of cases", + ... +) +} +\arguments{ +\item{x}{A \code{\link{diffnet_epi}} object.} + +\item{...}{Currently ignored.} + +\item{y}{Unused. Present for S3 consistency with \code{\link[graphics]{plot}}.} + +\item{main}{Plot title. When \code{NULL} (default), a sensible title is +chosen automatically: it includes "pooled over k diffusions" when the +tree carries multiple diffusion processes (i.e., multiple +\code{virus_id} values), otherwise just "Offspring distribution".} + +\item{xlab, ylab}{Axis labels forwarded to \code{\link[graphics]{barplot}}.} +} +\value{ +A \code{data.frame} (with extra class \code{netdiffuseR_repr}) + carrying columns \code{node}, \code{virus_id}, + \code{exposure_date}, \code{n_offspring}. Printing shows the + aggregate reproduction number (mean offspring), plus SD and range; + the per-case rows are exposed via standard data.frame subscripting. + The aggregate is also stored as \code{attr(., "global")}. A + \code{plot} method renders the offspring distribution as a + barplot. +} +\description{ +For every infection event in \code{$transmission$tree}, +\code{repr_number(x)} counts the number of secondary cases it caused +(its offspring count, \eqn{\nu_i} in Lloyd-Smith \emph{et al.}, 2005) +and reports the mean across cases as the empirical reproduction +number. Cases that did not transmit further (terminal cases) count +as zero in the denominator; seeds are included. +} +\details{ +A case is one entry into state I, keyed by +\code{(node, virus_id, exposure_date)}. Under absorbing diffusion +(the classic netdiffuseR regime) each \code{(node, virus_id)} has +exactly one \code{exposure_date}, so the 3-D key collapses to the +familiar per-node rollup. Under SIRS-style re-infection (a node +enters \eqn{I} multiple times for the same virus), each +infection-life is its own case with its own offspring tally. This +matches the convention used by epiworldR's +\code{get_reproductive_number()} and the Lloyd-Smith framework. + + +The empirical reproduction number is defined as the mean offspring +count across all observed cases: + +\deqn{% +R = \frac{1}{N}\sum_{i \in \mathrm{cases}} \nu_i % +}{% +R = (1/N) * sum_i nu_i % +} + +where \eqn{N} is the total number of infected cases (seeds + secondary) +in the tree and \eqn{\nu_i} is the number of times case \eqn{i} appears +as a \code{source} in the tree. Terminal cases (\eqn{\nu_i = 0}) are +included in the denominator, so \eqn{R} is the true mean offspring, +not the mean among transmitters only. + +For trees built from observational data (Epigames / contact tracing), +\eqn{R} matches the standard tree-based reproduction-number estimator. +For trees produced by \code{rdiffnet()} with \code{source_attribution}, +the value depends on the attribution policy: \code{_uniform}, +\code{_weighted}, and \code{_earliest} will produce different empirical +\eqn{R} on the same simulation, since they distribute observed +adoptions across different infectors. +} +\examples{ +set.seed(2026) +dn <- rdiffnet(n = 40, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE, + source_attribution = source_attribution_uniform) +R <- repr_number(dn) +R # aggregate print: mean / SD / range +as.data.frame(R) # per-case offspring counts +attr(R, "global") # the scalar R +\dontrun{ +plot(R) # offspring distribution barplot +} + +# SIRS-style: a disadopt function lets nodes re-enter I, and every +# re-infection is recorded as its own case in the returned frame. +\dontrun{ +disadopt_30 <- function(expo, cumadopt, time) { + q_max <- dim(cumadopt)[3]; res <- vector("list", q_max) + for (q in seq_len(q_max)) { + adopters <- which(cumadopt[, time, q] == 1L) + res[[q]] <- if (length(adopters)) + sample(adopters, ceiling(0.30 * length(adopters))) else integer() + } + res +} +set.seed(2026) +dn_sirs <- rdiffnet(n = 60, t = 10, seed.graph = "small-world", + seed.p.adopt = 0.15, stop.no.diff = FALSE, + disadopt = disadopt_30, + source_attribution = source_attribution_uniform) +R_sirs <- repr_number(dn_sirs) +table(table(paste(R_sirs$node, R_sirs$virus_id))) # nodes by # of lives +} + +} +\references{ +Lloyd-Smith, J. O., Schreiber, S. J., Kopp, P. E., & Getz, W. M. (2005). +Superspreading and the effect of individual variation on disease emergence. +\emph{Nature} 438:355-359. \doi{10.1038/nature04153} +} +\author{ +Aníbal Olivera M. +} diff --git a/man/secondary_attack_rate.Rd b/man/secondary_attack_rate.Rd new file mode 100644 index 00000000..225843f4 --- /dev/null +++ b/man/secondary_attack_rate.Rd @@ -0,0 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/epi_metrics.R +\name{secondary_attack_rate} +\alias{secondary_attack_rate} +\alias{secondary_attack_rate.default} +\alias{secondary_attack_rate.diffnet_epi} +\alias{print.netdiffuseR_sar} +\title{Secondary attack rate from a transmission tree} +\usage{ +secondary_attack_rate(x, ...) + +\method{secondary_attack_rate}{default}(x, ...) + +\method{secondary_attack_rate}{diffnet_epi}(x, ...) + +\method{print}{netdiffuseR_sar}(x, ...) +} +\arguments{ +\item{x}{A \code{\link{diffnet_epi}} object.} + +\item{...}{Currently ignored.} +} +\value{ +A \code{data.frame} (with extra class \code{netdiffuseR_sar}) + carrying columns \code{source}, \code{virus_id}, + \code{source_exposure_date}, \code{n_secondary}, \code{n_contacts}, + \code{sar}. Printing shows the aggregate scalar; the per-event rows + are exposed via standard data.frame subscripting. The aggregate is + also stored as \code{attr(., "global")}. +} +\description{ +For each infection event recorded in \code{$transmission$tree}, +\code{secondary_attack_rate(x)} reports the number of secondary infections +caused by that event and the number of contacts the infector had in the +contact network at the slice corresponding to \code{source_exposure_date} +(the infector's own infection date). The per-event rate is +\code{n_secondary / n_contacts}; the aggregate (printed by default) is +\code{sum(n_secondary) / sum(n_contacts)}. +} +\details{ +Under absorbing diffusion each \code{(source, virus_id)} has exactly one +\code{source_exposure_date}, so the per-event keying collapses to the +classic per-source rollup. Under SIRS-style re-infection (a node enters +state I multiple times for the same virus), each infection-life of the +source is its own row, matching the convention used by epiworldR for +tree-derived metrics. + +Under SIRS the same \code{(source, target)} pair can transmit multiple +times during the source's infection life (the target disadopts and gets +re-infected by the same source). Each such transmission is a distinct +row in the tree and contributes to \code{n_secondary} for that +source-event, while \code{n_contacts} is fixed at the source's +neighbourhood size at \code{source_exposure_date}. Consequently the +per-event \code{sar} may exceed 1 (it is no longer a probability of +transmission but a count of transmissions per contact). The aggregate +\code{attr(sar, "global")} retains its sum-over-sum interpretation. +} +\examples{ +set.seed(2026) +dn <- rdiffnet(n = 40, t = 8, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE, + source_attribution = source_attribution_uniform) +sar <- secondary_attack_rate(dn) +sar # aggregate print +as.data.frame(sar) # per-source breakdown +attr(sar, "global") # aggregate scalar + +} +\author{ +Aníbal Olivera M. +} diff --git a/man/source_attribution.Rd b/man/source_attribution.Rd new file mode 100644 index 00000000..63d4eaaa --- /dev/null +++ b/man/source_attribution.Rd @@ -0,0 +1,89 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/source_attribution.R +\name{source_attribution} +\alias{source_attribution} +\alias{source_attribution_uniform} +\alias{source_attribution_weighted} +\alias{source_attribution_earliest} +\title{Source-attribution kernels for \code{rdiffnet}'s lineage tracking} +\usage{ +source_attribution_uniform(target, adopted_neighbours, weights, time, pars) + +source_attribution_weighted(target, adopted_neighbours, weights, time, pars) + +source_attribution_earliest(target, adopted_neighbours, weights, time, pars) +} +\arguments{ +\item{target}{Integer scalar. Index of the freshly adopted node.} + +\item{adopted_neighbours}{Integer vector of indices of neighbours of +\code{target} that were already adopted at the previous time step. +Pre-sorted by ascending time-of-adoption (earliest infector first). +Empty when \code{target} is a seed.} + +\item{weights}{Numeric vector of edge weights aligned with +\code{adopted_neighbours} (same length, same order). Carries the +non-zero entries of \code{sgraph[[time]][target, ]}. \code{NULL} or +constant when the graph is unweighted.} + +\item{time}{Integer scalar. Current simulation time step.} + +\item{pars}{Named list. Passed verbatim from \code{adoption_pars} so +user-defined attributors can read whatever they need.} +} +\value{ +A single integer index — one of \code{adopted_neighbours}, + identifying the attributed source. \code{NA_integer_} when + \code{adopted_neighbours} is empty (seed or spontaneously adopted + node). +} +\description{ +A family of pluggable rules that decide, for each freshly adopted node +in an \code{\link{rdiffnet}} simulation, \emph{which} of its already +adopted neighbours infected it. Pass any of these as the +\code{source_attribution} argument of \code{\link{rdiffnet}}, or write +your own function that follows the same contract. +} +\details{ +The contract is intentionally minimal. \code{rdiffnet()} pre-computes +\code{adopted_neighbours} and \code{weights} once per fresh adoption +and hands them sorted by toa, so user-defined attributors don't have +to query the simulation state themselves. The three kernels shipped +with the package: + +\describe{ + \item{\code{source_attribution_uniform}}{Samples uniformly across + \code{adopted_neighbours}. The default when the user passes + \code{TRUE} (deferred) and the simplest reasonable choice when + nothing distinguishes the candidates.} + \item{\code{source_attribution_weighted}}{Samples with probability + proportional to \code{weights}. Falls back to uniform when the + graph carries no weights (all entries equal). Appropriate for + contact-network simulations where edge weights encode contact + intensity.} + \item{\code{source_attribution_earliest}}{Returns the + earliest-infected adopted neighbour. Mirrors the heuristic that + the playground's \code{derive_tree} used post-hoc; useful as a + deterministic baseline.} +} +} +\examples{ +set.seed(2026) + +# Use a kernel directly inside rdiffnet(): +dn <- rdiffnet(n = 30, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.1, stop.no.diff = FALSE, + source_attribution = source_attribution_weighted) + +is.diffnet_epi(dn) # TRUE — auto-promoted +nrow(transmission_tree(dn)) # one row per fresh adoption + seeds + +} +\references{ +Lloyd-Smith, J. O., Schreiber, S. J., Kopp, P. E., & Getz, W. M. (2005). +Superspreading and the effect of individual variation on disease emergence. +\emph{Nature} 438:355-359. \doi{10.1038/nature04153} +} +\author{ +Aníbal Olivera M. +} diff --git a/man/status_accessors.Rd b/man/status_accessors.Rd new file mode 100644 index 00000000..6fd99393 --- /dev/null +++ b/man/status_accessors.Rd @@ -0,0 +1,78 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/status_accessors.R +\name{status_accessors} +\alias{status_accessors} +\alias{toa} +\alias{toa.diffnet} +\alias{tod} +\alias{tod.diffnet} +\alias{toa_all} +\alias{toa_all.diffnet} +\alias{tod_all} +\alias{tod_all.diffnet} +\title{Accessors for adoption / disadoption times in a \code{diffnet}} +\usage{ +toa(x) + +\method{toa}{diffnet}(x) + +tod(x) + +\method{tod}{diffnet}(x) + +toa_all(x) + +\method{toa_all}{diffnet}(x) + +tod_all(x) + +\method{tod_all}{diffnet}(x) +} +\arguments{ +\item{x}{A \code{diffnet} object.} +} +\value{ +\describe{ + \item{\code{toa(x)}}{Integer vector of length \eqn{n} (single-behaviour) + or \eqn{n \times Q} integer matrix (multi-behaviour). \code{NA} when + the node never adopted. Equivalent to \code{x$toa}.} + \item{\code{tod(x)}}{Same shape as \code{toa(x)}. First time after + \code{toa[i, q]} when \code{$status[i, t, q]} flips back to 0. + \code{NA} when the node never recovered (absorbing).} + \item{\code{toa_all(x)}}{\code{data.frame} with columns \code{node}, + \code{behavior}, \code{episode}, \code{time}. One row per fresh + adoption event in \code{$status}.} + \item{\code{tod_all(x)}}{\code{data.frame} with columns \code{node}, + \code{behavior}, \code{episode}, \code{time}. One row per recovery + event in \code{$status}.} +} +} +\description{ +Mirror accessors over the canonical \code{$status} slot. \code{toa} and +\code{tod} return the \emph{first} adoption / first recovery time per node +(per behaviour for multi-behaviour diffnets) — same shape as the legacy +\code{$toa} field. \code{toa_all} and \code{tod_all} return long-format +\code{data.frame}s capturing every event in the multi-cycle history. +} +\details{ +For an absorbing single-cycle diffnet \code{tod(x)} is \code{NA} for every +node (no recoveries) and \code{tod_all(x)} returns a zero-row data.frame. +For a multi-cycle diffnet, \code{tod(x)} reports only the first recovery +per node-behaviour as a summary; use \code{tod_all(x)} for the full +history. +} +\examples{ +set.seed(2026) +g <- rgraph_er(n = 10, t = 1, p = 0.4) +dn <- rdiffnet(seed.graph = g, t = 6, seed.p.adopt = 0.2, + stop.no.diff = FALSE) + +toa(dn) # first adoption time per node (same as dn$toa) +tod(dn) # first recovery — all NA for an absorbing diffnet +toa_all(dn) # one row per fresh adoption event +tod_all(dn) # zero-row data.frame for an absorbing diffnet + +} +\author{ +Aníbal Olivera M. +} diff --git a/man/summary.diffnet_epi.Rd b/man/summary.diffnet_epi.Rd new file mode 100644 index 00000000..e91d2617 --- /dev/null +++ b/man/summary.diffnet_epi.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/epi_metrics.R +\name{summary.diffnet_epi} +\alias{summary.diffnet_epi} +\title{Summary method for \code{diffnet_epi} objects} +\usage{ +\method{summary}{diffnet_epi}(object, ...) +} +\arguments{ +\item{object}{A \code{\link{diffnet_epi}} object.} + +\item{...}{Forwarded to \code{summary.diffnet}.} +} +\value{ +Invisibly, the object returned by \code{summary.diffnet}. The + epi block is printed as a side effect. +} +\description{ +Extends \code{\link[=summary.diffnet]{summary.diffnet}} with an +epidemiological block: peak prevalence + peak time, secondary attack rate +(aggregate), and generation time (summary stats). The base diffnet block +is printed first via \code{NextMethod()}; the epi block follows. +} +\author{ +Aníbal Olivera M. +} diff --git a/man/survival_curve.Rd b/man/survival_curve.Rd new file mode 100644 index 00000000..5d4178c4 --- /dev/null +++ b/man/survival_curve.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/epi_metrics.R +\name{survival_curve} +\alias{survival_curve} +\alias{survival_curve.diffnet} +\alias{print.netdiffuseR_survival} +\title{Kaplan-Meier-style survival curve for a diffnet} +\usage{ +survival_curve(x, ...) + +\method{survival_curve}{diffnet}(x, ...) + +\method{print}{netdiffuseR_survival}(x, ...) +} +\arguments{ +\item{x}{A \code{\link{diffnet}} object.} + +\item{...}{Currently ignored.} +} +\value{ +A \code{data.frame} (with extra class \code{netdiffuseR_survival}) + carrying columns \code{time}, \code{n_at_risk}, \code{n_recovered}, and + \code{survival}. For multi-behaviour diffnets an additional + \code{virus_id} column tags the behaviour. Printing summarises the + curve; standard data.frame subscripting works on the underlying rows. +} +\description{ +For each adopted node, \code{survival_curve(x)} computes the duration in +the adopted state (\code{tod(x) - toa(x)}; right-censored at \code{T} for +nodes that never recover) and assembles a Kaplan-Meier-style survival +table. +} +\examples{ +set.seed(2026) +dn <- rdiffnet(n = 50, t = 8, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE, + disadopt = disadoptmech_random(prob = 0.15)) +s <- survival_curve(dn) +s # prints summary +as.data.frame(s) # full data.frame + +} +\author{ +Aníbal Olivera M. +} diff --git a/man/transmission_tree.Rd b/man/transmission_tree.Rd new file mode 100644 index 00000000..75ff3b1d --- /dev/null +++ b/man/transmission_tree.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/transmission.R +\name{transmission_tree} +\alias{transmission_tree} +\title{Retrieve the transmission tree of a \code{\link{diffnet_epi}} object} +\usage{ +transmission_tree(x) +} +\arguments{ +\item{x}{A \code{\link{diffnet_epi}} object.} +} +\value{ +A \code{data.frame} with columns \code{date}, \code{source}, + \code{target}, \code{source_exposure_date}, \code{virus_id}, \code{virus}. + Zero rows when the epi object has been promoted but no tree attached yet. +} +\description{ +Returns the data.frame stored in \code{x$transmission$tree} for objects +that inherit from \code{diffnet_epi}. Plain (non-epi) diffnets do not +carry a tree by design; calling this function on one is an API error. +} +\seealso{ +\code{\link{as_transmission_tree}}, \code{\link{as_diffnet_epi}} +} +\author{ +Aníbal Olivera M. +} diff --git a/man/transmission_tree_from_events.Rd b/man/transmission_tree_from_events.Rd new file mode 100644 index 00000000..59754d4f --- /dev/null +++ b/man/transmission_tree_from_events.Rd @@ -0,0 +1,107 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/transmission.R +\name{transmission_tree_from_events} +\alias{transmission_tree_from_events} +\title{Reconstruct a transmission tree from observed adoption times (M13)} +\usage{ +transmission_tree_from_events( + x, + toa = NULL, + attribution = "uniform", + pars = list(), + behavior = NULL, + seed = NULL +) +} +\arguments{ +\item{x}{Either a \code{\link{diffnet}} object (graphs and \code{toa} +read from its slots), or a list of adjacency matrices — one per +time slice. When a list, \code{toa} must be supplied.} + +\item{toa}{Times-of-adoption. Integer vector of length \eqn{n} +(single-behaviour) or \eqn{n \times Q} integer matrix (multi-behaviour). +\code{NA} marks a node that never adopted. Ignored when \code{x} is a +diffnet (read from \code{x$toa} instead).} + +\item{attribution}{The source-attribution rule. Either a string — +\code{"uniform"}, \code{"weighted"}, or \code{"earliest"} (the +bundled kernels) — or a function with the same signature as +\code{\link{source_attribution_uniform}}.} + +\item{pars}{Optional named list. Stored verbatim in +\code{x$transmission$pars} when the result is wired into +\code{\link{as_diffnet_epi}}; also forwarded to the attribution +function as its \code{pars} argument.} + +\item{behavior}{Optional character vector of length \eqn{Q} naming +each diffusion process. Used to populate the \code{virus} column of +the returned tree. Defaults to \code{"behavior_1"}, \code{"behavior_2"}, +... when \code{NULL}.} + +\item{seed}{Optional integer. When non-\code{NULL}, calls +\code{\link{set.seed}} once before reconstruction so the stochastic +attribution rules (\code{"uniform"}, \code{"weighted"}) produce a +reproducible tree.} +} +\value{ +A \code{data.frame} with the canonical transmission-tree + schema: \code{date}, \code{source}, \code{target}, + \code{source_exposure_date}, \code{virus_id}, \code{virus}. + One row per infection event (seeds included, with + \code{source = NA}). Suitable to pass straight to + \code{\link{as_transmission_tree}}. +} +\description{ +Given a dynamic contact network and per-node times of adoption, infer a +transmission tree by source-attribution: for each infection event, pick +the most plausible infector among the target's adopted neighbours at the +slice where the target was infected. The selection rule is the user's +choice (one of the bundled \code{\link{source_attribution}} kernels or a +user-supplied function that follows the same contract). +} +\details{ +This is the general-purpose primitive behind \code{rdiffnet()}'s +lineage-tracking (M8): the same algorithm that constructs the tree +during simulation also constructs it post-hoc from observed data, which +is what makes data products like \code{\link{epigamesDiffNet}} possible +without bespoke parsing code. + + +For every \code{(target, virus_id)} pair with non-\code{NA} \code{toa}, +the algorithm inspects the slice \code{x$graph[[toa[target, q]]]} and +picks the source among target's neighbours that were already adopted +(\code{toa[v, q] < toa[target, q]}). Targets with no adopted neighbour +at the moment of their adoption become seeds (\code{source = NA}). +Multi-behaviour diffnets are handled one behaviour at a time; the +resulting rows are concatenated. + +Under SIRS-style re-infection \code{toa[target, q]} only stores the +\emph{latest} infection time, so the reconstructed tree will only carry +one row per node per virus. To capture every entry into I, build the +tree at simulation time via +\code{rdiffnet(..., source_attribution = ...)} instead — that path +records each fresh adoption as it happens. +} +\examples{ +set.seed(2026) +# Build a tiny absorbing diffnet, then reconstruct its tree post-hoc. +g <- lapply(1:5, function(t) rgraph_ba(t = 4L)) +toa <- c(1L, 2L, 3L, NA, 5L) +dn <- new_diffnet(g, toa = toa, t0 = 1L, t1 = 5L) + +tree <- transmission_tree_from_events(dn, attribution = "uniform", + seed = 2026) +head(tree) + +# Promote to diffnet_epi in one step: +dn_epi <- as_diffnet_epi(dn, attribution = "uniform") +is.diffnet_epi(dn_epi) + +} +\seealso{ +\code{\link{source_attribution}}, + \code{\link{as_transmission_tree}}, \code{\link{as_diffnet_epi}} +} +\author{ +Aníbal Olivera M. +} diff --git a/tests/testthat/test-cumulative_adopt_count.R b/tests/testthat/test-cumulative_adopt_count.R index 654ca4d3..9f4139ea 100644 --- a/tests/testthat/test-cumulative_adopt_count.R +++ b/tests/testthat/test-cumulative_adopt_count.R @@ -52,3 +52,44 @@ test_that("Hazard rate", { expect_equal(hr, hr_hand, tolerance=getOption("diffnet.tol"), scale=1) expect_equal(hr_dn, hr_hand, tolerance=getOption("diffnet.tol"), scale=1) }) + +# Hazard rate -- status-aware (M11) ------------------------------------------- +# Bit-identical for absorbing inputs (legacy guarantee) and SIR-correct for +# non-monotone status arrays (multi-cycle / recovery). + +test_that("hazard_rate on a status array with recovery counts re-adoptions", { + # Hand-built status with two distinct adoption episodes for node 1. + # t = 1 2 3 4 5 6 + # n1: 1 1 0 0 1 1 (adopt t=1, recover t=3, re-adopt t=5) + # n2: 0 1 1 0 0 0 (adopt t=2, recover t=4) + # n3: 0 0 0 0 0 0 (never adopts) + # n4: 0 0 1 1 1 1 (adopt t=3, absorbing) + status <- rbind( + c(1L, 1L, 0L, 0L, 1L, 1L), + c(0L, 1L, 1L, 0L, 0L, 0L), + c(0L, 0L, 0L, 0L, 0L, 0L), + c(0L, 0L, 1L, 1L, 1L, 1L) + ) + + # Hand-computed expected hazard: + # t=2: fresh = n2 (0->1) = 1; susc at t=1 = n2,n3,n4 = 3; haz = 1/3 + # t=3: fresh = n4 (0->1) = 1; susc at t=2 = n3,n4 = 2; haz = 1/2 + # t=4: fresh = 0; susc at t=3 = n2,n3 = 2; haz = 0 + # t=5: fresh = n1 (0->1 re-adopt) = 1; susc at t=4 = n1,n2,n3 = 3; haz = 1/3 + # t=6: fresh = 0; susc at t=5 = n2,n3 = 2; haz = 0 + expected <- c(0, 1/3, 1/2, 0, 1/3, 0) + + hr <- as.numeric(hazard_rate(status, no.plot = TRUE)) + expect_equal(hr, expected, tolerance = getOption("diffnet.tol"), scale = 1) +}) + +test_that("hazard_rate via diffnet stays bit-identical for absorbing inputs", { + # The legacy tests above already cover the absorbing case end-to-end against + # a hand-built formula. This extra guard confirms the two code paths + # (diffnet vs raw matrix) produce numerically identical results on the same + # underlying status. + hr_via_dn <- as.numeric(hazard_rate(diffnet, no.plot = TRUE)) + hr_via_mat <- as.numeric(hazard_rate(diffnet$status, no.plot = TRUE)) + expect_equal(hr_via_dn, hr_via_mat, + tolerance = getOption("diffnet.tol"), scale = 1) +}) diff --git a/tests/testthat/test-diffnet-epi.R b/tests/testthat/test-diffnet-epi.R new file mode 100644 index 00000000..02a4aea7 --- /dev/null +++ b/tests/testthat/test-diffnet-epi.R @@ -0,0 +1,205 @@ +context("diffnet_epi subclass") + +mk_diffnet <- function() { + set.seed(42) + gr <- lapply(1:5, function(x) rgraph_ba(t = 4L)) + new_diffnet(gr, toa = c(1L, 2L, NA, 3L, 5L), t0 = 1L, t1 = 5L) +} + +# ---------------------------------------------------------------------------- +# Promotion: as_diffnet_epi() +# ---------------------------------------------------------------------------- + +test_that("as_diffnet_epi() with no tree promotes to subclass and seeds empty tree", { + x <- mk_diffnet() + y <- as_diffnet_epi(x) + expect_true(is.diffnet_epi(y)) + expect_s3_class(y, c("diffnet_epi", "diffnet")) + expect_equal(nrow(transmission_tree(y)), 0L) + expect_setequal( + names(transmission_tree(y)), + c("date", "source", "target", "source_exposure_date", "virus_id", "virus") + ) +}) + +test_that("as_diffnet_epi() with a pre-built transmission list stores it verbatim", { + x <- mk_diffnet() + pre_tree <- data.frame( + date = 1L, source = NA_integer_, target = 1L, + source_exposure_date = NA_integer_, virus_id = 1L, virus = NA_character_, + stringsAsFactors = FALSE + ) + pre <- list(tree = pre_tree, pars = list(kernel = "wells-riley")) + y <- as_diffnet_epi(x, transmission = pre) + expect_true(is.diffnet_epi(y)) + expect_identical(transmission_tree(y), pre_tree) + expect_equal(y$transmission$pars$kernel, "wells-riley") +}) + +test_that("as_diffnet_epi() rejects malformed -transmission- inputs", { + x <- mk_diffnet() + expect_error(as_diffnet_epi(x, transmission = 42), "NULL or a list") + expect_error(as_diffnet_epi(x, transmission = "tree"), "NULL or a list") + expect_error(as_diffnet_epi(x, transmission = list(foo = 1)), + "NULL or a list with -tree- and -pars-") + # Even a data.frame is rejected here on purpose -- you'd use + # as_transmission_tree() to attach a data.frame. + expect_error(as_diffnet_epi(x, transmission = data.frame(a = 1)), + "NULL or a list") +}) + +test_that("as_diffnet_epi() refuses non-diffnet input", { + expect_error(as_diffnet_epi(42), "must be a diffnet") +}) + +test_that("Promotion is monotone (idempotent)", { + x <- mk_diffnet() + y <- as_diffnet_epi(x) + y2 <- as_diffnet_epi(y) + expect_identical(class(y), class(y2)) # still c("diffnet_epi", "diffnet") + expect_equal(sum(class(y2) == "diffnet_epi"), 1L) # not duplicated +}) + +# ---------------------------------------------------------------------------- +# is.diffnet_epi() +# ---------------------------------------------------------------------------- + +test_that("is.diffnet_epi() distinguishes plain diffnet from the subclass", { + x <- mk_diffnet() + y <- as_diffnet_epi(x) + expect_false(is.diffnet_epi(x)) + expect_true(is.diffnet_epi(y)) + expect_false(is.diffnet_epi("not a diffnet")) + expect_false(is.diffnet_epi(NULL)) +}) + +# ---------------------------------------------------------------------------- +# print.diffnet_epi() +# ---------------------------------------------------------------------------- + +test_that("print.diffnet_epi extends print.diffnet with a transmission line", { + x <- as_diffnet_epi(mk_diffnet()) + out <- capture.output(print(x)) + # All the base diffnet print fields are still there + expect_true(any(grepl("Dynamic network of class -diffnet-", out))) + expect_true(any(grepl("Final prevalence", out))) + # And the appended epi-specific line + expect_true(any(grepl("Transmission tree", out))) + expect_true(any(grepl("empty", out))) +}) + +test_that("print.diffnet_epi shows edge/seed/virus counts for a populated tree", { + x <- mk_diffnet() + tree <- data.frame( + date = c(1L, 2L, 2L), + source = c(NA_integer_, 1L, 1L), + target = c(1L, 2L, 4L), + source_exposure_date = c(NA_integer_, 1L, 1L), + virus_id = c(1L, 1L, 1L), + virus = c("flu", "flu", "flu"), + stringsAsFactors = FALSE + ) + y <- as_transmission_tree(x, tree) + out <- capture.output(print(y)) + expect_true(any(grepl("Transmission tree", out))) + expect_true(any(grepl("3 edges", out))) + expect_true(any(grepl("1 seeds?", out))) + expect_true(any(grepl("1 virus(es)?", out))) +}) + +# ---------------------------------------------------------------------------- +# Inheritance: existing diffnet methods still work +# ---------------------------------------------------------------------------- + +test_that("diffnet methods dispatch on diffnet_epi via inheritance", { + y <- as_diffnet_epi(mk_diffnet()) + + # nnodes/nslices come from the base class + expect_equal(nnodes(y), 5L) + expect_equal(nslices(y), 5L) + + # toa(), tod() (M5 accessors) work on diffnet_epi as well + expect_equal(unname(toa(y)), c(1L, 2L, NA, 3L, 5L)) + expect_equal(unname(tod(y)), rep(NA_integer_, 5L)) + + # The subsetting still produces a valid object (still inheriting from both) + y2 <- y[-3] + expect_true(is.diffnet_epi(y2)) + expect_true(inherits(y2, "diffnet")) +}) + +# ---------------------------------------------------------------------------- +# Base diffnet does NOT carry a $transmission slot +# ---------------------------------------------------------------------------- + +test_that("a fresh new_diffnet() does not include a $transmission slot", { + x <- mk_diffnet() + # The slot is not part of the base structure anymore (M7). + expect_false("transmission" %in% names(x)) +}) + +# ---------------------------------------------------------------------------- +# M7.1 — new_diffnet() accepts an optional -transmission- argument +# ---------------------------------------------------------------------------- + +test_that("new_diffnet(graph, transmission = df) promotes to diffnet_epi in one call", { + set.seed(42) + gr <- lapply(1:5, function(x) rgraph_ba(t = 4L)) + tree <- data.frame( + date = c(1L, 2L), + source = c(NA_integer_, 1L), + target = c(1L, 2L), + source_exposure_date = c(NA_integer_, 1L), + virus_id = c(1L, 1L), + virus = c("flu", "flu"), + stringsAsFactors = FALSE + ) + dn <- new_diffnet(gr, toa = c(1L, 2L, NA, 3L, 5L), t0 = 1L, t1 = 5L, + transmission = tree, + transmission_pars = list(kernel = "wells-riley")) + + expect_true(is.diffnet_epi(dn)) + expect_s3_class(dn, c("diffnet_epi", "diffnet")) + expect_equal(nrow(transmission_tree(dn)), 2L) + expect_equal(dn$transmission$pars$kernel, "wells-riley") +}) + +test_that("new_diffnet(graph, transmission = list(tree, pars)) stores the list verbatim", { + set.seed(42) + gr <- lapply(1:5, function(x) rgraph_ba(t = 4L)) + pre <- list( + tree = data.frame( + date = 1L, source = NA_integer_, target = 1L, + source_exposure_date = NA_integer_, virus_id = 1L, + virus = NA_character_, stringsAsFactors = FALSE + ), + pars = list(note = "imported") + ) + dn <- new_diffnet(gr, toa = c(1L, 2L, NA, 3L, 5L), t0 = 1L, t1 = 5L, + transmission = pre) + + expect_true(is.diffnet_epi(dn)) + expect_equal(dn$transmission$pars$note, "imported") + expect_equal(nrow(transmission_tree(dn)), 1L) +}) + +test_that("new_diffnet(..., transmission = ) errors clearly", { + set.seed(42) + gr <- lapply(1:5, function(x) rgraph_ba(t = 4L)) + expect_error( + new_diffnet(gr, toa = c(1L, 2L, NA, 3L, 5L), t0 = 1L, t1 = 5L, + transmission = 42), + "NULL, a data.frame, or a list" + ) + expect_error( + new_diffnet(gr, toa = c(1L, 2L, NA, 3L, 5L), t0 = 1L, t1 = 5L, + transmission = list(foo = 1)), + "NULL, a data.frame, or a list" + ) +}) + +test_that("new_diffnet() without -transmission- still returns a plain diffnet", { + x <- mk_diffnet() + expect_false(is.diffnet_epi(x)) + expect_s3_class(x, "diffnet") +}) diff --git a/tests/testthat/test-diffnet-methods.R b/tests/testthat/test-diffnet-methods.R index 25ea167b..0df3c210 100644 --- a/tests/testthat/test-diffnet-methods.R +++ b/tests/testthat/test-diffnet-methods.R @@ -50,6 +50,35 @@ test_that("More plot methods", { expect_silent(plot_adopters(g$cumadopt)) }) +# plot_adopters -- status-aware (M11) ----------------------------------------- +# Confirms that plot_adopters runs on a multi-cycle diffnet (built from a +# non-monotone status array) and that the returned counts reflect the +# canonical $status semantics (the underlying state, not a forced cumulative). + +test_that("plot_adopters runs on a multi-cycle diffnet built from -status-", { + set.seed(2026) + g <- lapply(1:6, function(t) rgraph_ba(t = 4L)) + status <- rbind( + c(1L, 1L, 0L, 0L, 1L, 1L), # node 1: adopt, recover, re-adopt + c(0L, 1L, 1L, 0L, 0L, 0L), # node 2: adopt, recover + c(0L, 0L, 0L, 0L, 0L, 0L), # node 3: never + c(0L, 0L, 1L, 1L, 1L, 1L), # node 4: absorbing + c(0L, 1L, 1L, 1L, 1L, 1L) # node 5: absorbing + ) + dn <- new_diffnet(g, status = status, t0 = 1L, t1 = 6L) + + out <- expect_silent(plot_adopters(dn)) + + # "num" row in the returned cumadopt-like summary equals colSums($status), + # which is the currently-adopted count per period. + expect_equal(as.integer(out["num", ]), + as.integer(colSums(status))) + + # And it really IS non-monotone for this multi-cycle data (sanity check). + diffs <- diff(as.integer(out["num", ])) + expect_true(any(diffs < 0L)) +}) + # plot_threshold, threshold and exposure --------------------------------------- context("Threshold functions") test_that("Returning threshold equal to the threshold fun (plot_threshold and )", { diff --git a/tests/testthat/test-epi-metrics.R b/tests/testthat/test-epi-metrics.R new file mode 100644 index 00000000..1dfde10d --- /dev/null +++ b/tests/testthat/test-epi-metrics.R @@ -0,0 +1,371 @@ +context("Epidemiological metrics for diffnet / diffnet_epi (M10)") +library(netdiffuseR) + +mk_absorbing_dn <- function() { + set.seed(2026) + rdiffnet(n = 30, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE) +} + +mk_recovery_dn <- function() { + # Hand-built diffnet with explicit recovery via -status-, so toa is preserved + # for all adopters (rdiffnet's -disadopt- resets toa to NA on disadoption, + # which is not what we want for the KM curve test). + set.seed(2026) + g <- lapply(1:6, function(t) rgraph_ba(t = 5L)) + status <- rbind( + c(1L, 1L, 1L, 0L, 0L, 0L), # node 1: adopt t=1, recover t=4 + c(0L, 1L, 1L, 1L, 0L, 0L), # node 2: adopt t=2, recover t=5 + c(0L, 0L, 0L, 0L, 0L, 0L), # node 3: never adopts + c(0L, 0L, 1L, 1L, 1L, 1L), # node 4: adopt t=3, absorbing (censored) + c(0L, 0L, 0L, 1L, 1L, 0L), # node 5: adopt t=4, recover t=6 + c(0L, 1L, 1L, 1L, 1L, 1L) # node 6: adopt t=2, absorbing + ) + new_diffnet(g, status = status, t0 = 1L, t1 = 6L) +} + +mk_epi_dn <- function() { + set.seed(2026) + rdiffnet(n = 30, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE, + source_attribution = source_attribution_uniform) +} + +# ---------------------------------------------------------------------------- +# peak_prevalence / peak_time +# ---------------------------------------------------------------------------- + +test_that("peak_prevalence and peak_time work on plain diffnet", { + dn <- mk_absorbing_dn() + pp <- peak_prevalence(dn) + pt <- peak_time(dn) + expect_type(pp, "double") + expect_length(pp, 1L) + expect_true(pp >= 0 && pp <= 1) + expect_type(pt, "integer") + expect_length(pt, 1L) + expect_true(pt %in% dn$meta$pers) +}) + +test_that("peak_prevalence is non-decreasing for an absorbing single-virus sim", { + dn <- mk_absorbing_dn() + prev_per_t <- colSums(dn$status) / nrow(dn$status) + expect_identical(peak_prevalence(dn), max(prev_per_t)) + # In absorbing diffusion the peak is at the final period + expect_equal(peak_time(dn), dn$meta$pers[which.max(prev_per_t)]) +}) + +test_that("peak_prevalence returns named vector for multi-behaviour", { + set.seed(2026) + suppressMessages( + dn <- rdiffnet(n = 30, t = 6, seed.graph = "small-world", + seed.p.adopt = list(0.10, 0.10), stop.no.diff = FALSE) + ) + pp <- peak_prevalence(dn) + pt <- peak_time(dn) + expect_length(pp, 2L) + expect_length(pt, 2L) + expect_true(!is.null(names(pp))) +}) + +test_that("peak_prevalence on diffnet_epi delegates correctly", { + dn_epi <- mk_epi_dn() + expect_equal(peak_prevalence(dn_epi), + peak_prevalence(structure(dn_epi, class = "diffnet"))) +}) + +# ---------------------------------------------------------------------------- +# survival_curve +# ---------------------------------------------------------------------------- + +test_that("survival_curve returns a netdiffuseR_survival data.frame", { + dn <- mk_absorbing_dn() + s <- survival_curve(dn) + expect_s3_class(s, "netdiffuseR_survival") + expect_s3_class(s, "data.frame") + expect_setequal(names(s), c("time", "n_at_risk", "n_recovered", "survival")) +}) + +test_that("survival_curve for absorbing diffusion has survival = 1 throughout", { + dn <- mk_absorbing_dn() + s <- survival_curve(dn) + expect_true(all(s$survival == 1)) + expect_true(all(s$n_recovered == 0)) +}) + +test_that("survival_curve drops below 1 for a recovery simulation", { + dn <- mk_recovery_dn() + s <- survival_curve(dn) + # Some recoveries should be present + expect_true(sum(s$n_recovered) >= 1L) + expect_true(any(s$survival < 1)) +}) + +test_that("survival_curve print method runs silently", { + dn <- mk_absorbing_dn() + out <- capture.output(print(survival_curve(dn))) + expect_true(any(grepl("Survival curve", out))) + expect_true(any(grepl("as.data.frame", out))) +}) + +# ---------------------------------------------------------------------------- +# secondary_attack_rate +# ---------------------------------------------------------------------------- + +test_that("secondary_attack_rate errors on plain diffnet", { + dn <- mk_absorbing_dn() + expect_error(secondary_attack_rate(dn), "diffnet_epi") +}) + +test_that("secondary_attack_rate returns a netdiffuseR_sar data.frame", { + dn <- mk_epi_dn() + sar <- secondary_attack_rate(dn) + expect_s3_class(sar, "netdiffuseR_sar") + expect_s3_class(sar, "data.frame") + # M12.2: column set now carries source_exposure_date (per-event keying) + expect_setequal( + names(sar), + c("source", "virus_id", "source_exposure_date", + "n_secondary", "n_contacts", "sar") + ) + # All per-event rates are in [0, 1] (or NA when contacts = 0) + sar_vals <- sar$sar[!is.na(sar$sar)] + expect_true(all(sar_vals >= 0)) + expect_true(all(sar_vals <= 1)) +}) + +test_that("secondary_attack_rate aggregate matches sum-over-sum formula", { + dn <- mk_epi_dn() + sar <- secondary_attack_rate(dn) + expected_global <- sum(sar$n_secondary) / sum(sar$n_contacts) + expect_equal(attr(sar, "global"), expected_global) +}) + +test_that("secondary_attack_rate print mentions Aggregate and infectors", { + dn <- mk_epi_dn() + out <- capture.output(print(secondary_attack_rate(dn))) + expect_true(any(grepl("Secondary Attack Rate", out))) + expect_true(any(grepl("Aggregate", out))) + expect_true(any(grepl("infector", out))) +}) + +# ---------------------------------------------------------------------------- +# generation_time +# ---------------------------------------------------------------------------- + +test_that("generation_time errors on plain diffnet", { + dn <- mk_absorbing_dn() + expect_error(generation_time(dn), "diffnet_epi") +}) + +test_that("generation_time returns a netdiffuseR_generation_time data.frame", { + dn <- mk_epi_dn() + gt <- generation_time(dn) + expect_s3_class(gt, "netdiffuseR_generation_time") + expect_s3_class(gt, "data.frame") + expect_true("gen_time" %in% names(gt)) + # All gen_times are positive (date - source_exposure_date > 0) + expect_true(all(gt$gen_time > 0)) +}) + +test_that("generation_time gen_time equals date - source_exposure_date", { + dn <- mk_epi_dn() + gt <- generation_time(dn) + expect_equal(as.integer(gt$gen_time), + as.integer(gt$date - gt$source_exposure_date)) +}) + +test_that("generation_time print mentions Mean and Median", { + dn <- mk_epi_dn() + out <- capture.output(print(generation_time(dn))) + expect_true(any(grepl("Generation time", out))) + expect_true(any(grepl("Mean", out))) + expect_true(any(grepl("Median", out))) +}) + +# ---------------------------------------------------------------------------- +# repr_number (M12) +# ---------------------------------------------------------------------------- + +test_that("repr_number errors on plain diffnet (and on arbitrary input)", { + dn <- mk_absorbing_dn() + expect_error(repr_number(dn), "diffnet_epi") + expect_error(repr_number(list(a = 1)), "diffnet_epi") +}) + +test_that("repr_number returns a netdiffuseR_repr data.frame", { + dn <- mk_epi_dn() + R <- repr_number(dn) + expect_s3_class(R, "netdiffuseR_repr") + expect_s3_class(R, "data.frame") + # M12.2: column set now carries exposure_date (per-event keying) + expect_setequal(names(R), + c("node", "virus_id", "exposure_date", "n_offspring")) + expect_type(R$n_offspring, "integer") + expect_true(all(R$n_offspring >= 0L)) +}) + +test_that("repr_number global = mean(n_offspring) and matches non-seed source count", { + dn <- mk_epi_dn() + R <- repr_number(dn) + tr <- transmission_tree(dn) + + # Aggregate is mean offspring + expect_equal(attr(R, "global"), mean(R$n_offspring)) + + # Sum of offspring equals number of non-seed source rows in the tree + expect_equal(sum(R$n_offspring), sum(!is.na(tr$source))) + + # Row count equals number of unique (target, virus_id) cases + expect_equal( + nrow(R), + nrow(unique(tr[, c("target", "virus_id"), drop = FALSE])) + ) +}) + +test_that("repr_number print mentions Reproduction number and Mean offspring", { + dn <- mk_epi_dn() + out <- capture.output(print(repr_number(dn))) + expect_true(any(grepl("Reproduction number", out))) + expect_true(any(grepl("Mean offspring", out))) + expect_true(any(grepl("as.data.frame", out))) + # Single-diffusion path: no aggregate-banner line + expect_false(any(grepl("Aggregate over", out))) +}) + +test_that("repr_number print flags aggregate when tree carries multiple diffusions", { + set.seed(2026) + suppressMessages( + dn <- rdiffnet(n = 40, t = 6, seed.graph = "small-world", + seed.p.adopt = list(0.10, 0.05), stop.no.diff = FALSE, + source_attribution = source_attribution_uniform) + ) + R <- repr_number(dn) + out <- capture.output(print(R)) + expect_true(any(grepl("Aggregate over 2 diffusions", out))) + expect_true(any(grepl("Per-diffusion R", out))) + # Both diffusion ids must show up in the per-diffusion rollup lines + expect_true(any(grepl("diffusion 1:", out))) + expect_true(any(grepl("diffusion 2:", out))) + # Aggregate R must equal mean over all rows (pooled) + expect_equal(attr(R, "global"), mean(R$n_offspring)) +}) + +# ---------------------------------------------------------------------------- +# Re-infection / SIRS end-to-end (M12.2) +# ---------------------------------------------------------------------------- + +mk_sirs_dn <- function() { + # 30% of currently-adopted nodes disadopt each period; those nodes become + # susceptible again and can re-adopt, producing multiple infection-lives + # per (node, virus_id) in the transmission tree. + disadopt_30 <- function(expo, cumadopt, time) { + q_max <- dim(cumadopt)[3] + res <- vector("list", q_max) + for (q in seq_len(q_max)) { + adopters <- which(cumadopt[, time, q] == 1L) + if (length(adopters) == 0L) { + res[[q]] <- integer() + next + } + res[[q]] <- sample(adopters, ceiling(0.30 * length(adopters))) + } + res + } + set.seed(2026) + suppressWarnings( + rdiffnet( + n = 60, t = 10, seed.graph = "small-world", + seed.p.adopt = 0.15, stop.no.diff = FALSE, + disadopt = disadopt_30, + source_attribution = source_attribution_uniform + ) + ) +} + +test_that("rdiffnet with disadopt produces re-infections in the tree", { + dn <- mk_sirs_dn() + tr <- transmission_tree(dn) + # At least one (target, virus_id) appears more than once in the tree. + key <- paste(tr$target, tr$virus_id, sep = "::") + expect_true(any(table(key) > 1L)) +}) + +test_that("repr_number counts each re-infection as its own case (3-D key)", { + dn <- mk_sirs_dn() + tr <- transmission_tree(dn) + R <- repr_number(dn) + + # Cases match unique (target, virus_id, date) tuples in the tree. + cases_3d <- unique(tr[, c("target", "virus_id", "date")]) + expect_equal(nrow(R), nrow(cases_3d)) + + # Sum of offspring equals number of non-seed edges (the M12 identity, + # still holds under per-event keying since every non-seed edge maps to + # exactly one source-event). + expect_equal(sum(R$n_offspring), sum(!is.na(tr$source))) + + # Aggregate R matches mean(n_offspring). + expect_equal(attr(R, "global"), mean(R$n_offspring)) + + # exposure_date column is present and integer. + expect_true("exposure_date" %in% names(R)) + expect_type(R$exposure_date, "integer") + + # A re-infected node must appear in multiple R rows. + node_counts <- table(paste(R$node, R$virus_id, sep = "::")) + expect_true(any(node_counts > 1L)) +}) + +test_that("secondary_attack_rate aggregates per source-event under re-infection", { + dn <- mk_sirs_dn() + sar <- secondary_attack_rate(dn) + tr <- transmission_tree(dn) + + # Each row corresponds to a distinct (source, virus_id, exposure_date). + expect_true("source_exposure_date" %in% names(sar)) + key_sar <- paste(sar$source, sar$virus_id, sar$source_exposure_date, + sep = "::") + expect_equal(length(key_sar), length(unique(key_sar))) + + # At least one source has multiple infection-events (otherwise the + # multi-cycle test is degenerate). + src_key <- paste(sar$source, sar$virus_id, sep = "::") + expect_true(any(table(src_key) > 1L)) + + # n_contacts uses graph[[source_exposure_date]], not graph[[toa]]. Verify + # on one row that the contact count matches the slice the row points to. + i <- which(sar$n_contacts > 0L)[1L] + s <- sar$source[i]; t_inf <- sar$source_exposure_date[i] + g <- dn$graph[[t_inf]] + expected_con <- as.integer(sum((g[s, ] != 0) | (g[, s] != 0))) + expect_equal(sar$n_contacts[i], expected_con) + + # Global aggregate identity holds (sum-of-secondaries / sum-of-contacts). + expect_equal(attr(sar, "global"), + sum(sar$n_secondary) / sum(sar$n_contacts)) + + # n_secondary sums to the number of non-seed edges in the tree. + expect_equal(sum(sar$n_secondary), sum(!is.na(tr$source))) +}) + +# ---------------------------------------------------------------------------- +# summary.diffnet_epi +# ---------------------------------------------------------------------------- + +test_that("summary.diffnet_epi extends summary.diffnet with epi block", { + dn <- mk_epi_dn() + out <- capture.output(summary(dn)) + # Base diffnet summary fields + expect_true(any(grepl("Diffusion network summary statistics", out))) + # New epi block fields + expect_true(any(grepl("Epidemiological metrics", out))) + expect_true(any(grepl("Peak prevalence", out))) + expect_true(any(grepl("Secondary Attack Rate", out))) +}) + +test_that("summary.diffnet_epi returns invisibly (no print on assignment)", { + dn <- mk_epi_dn() + out <- capture.output(invisible(summary(dn))) + expect_true(length(out) > 0L) +}) diff --git a/tests/testthat/test-exposure-link-fun.R b/tests/testthat/test-exposure-link-fun.R new file mode 100644 index 00000000..0531f10c --- /dev/null +++ b/tests/testthat/test-exposure-link-fun.R @@ -0,0 +1,175 @@ +context("Exposure link / kernel function") +library(netdiffuseR) + +# ---- Helpers -------------------------------------------------------------- +# Build a small valued dynamic graph + cumadopt that we can reuse. +mk_fixture <- function(seed = 71) { + set.seed(seed) + n <- 8L + g <- rgraph_er(n, t = 1, p = 0.5) + g@x <- runif(length(g@x), min = 0.1, max = 2.0) + cumadopt <- matrix(0, nrow = n, ncol = 1) + cumadopt[1:3, 1] <- 1 + list(g = list(g), cumadopt = cumadopt, n = n) +} + +# ---- Identity (default) --------------------------------------------------- +test_that("link_fun = 'identity' reproduces the default exposure", { + fx <- mk_fixture() + e_default <- exposure(fx$g, fx$cumadopt, valued = TRUE) + e_ident <- exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "identity") + expect_equal(e_default, e_ident) + + # NULL is treated as identity as well + e_null <- exposure(fx$g, fx$cumadopt, valued = TRUE, link_fun = NULL) + expect_equal(e_default, e_null) +}) + +# ---- Linear --------------------------------------------------------------- +test_that("link_fun = 'linear' applies min(beta * w, 1) to edge weights", { + fx <- mk_fixture() + + g_lin <- fx$g + g_lin[[1]]@x <- pmin(0.4 * g_lin[[1]]@x, 1) + e_manual <- exposure(g_lin, fx$cumadopt, valued = TRUE) + + e_kernel <- exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "linear", link_pars = list(beta = 0.4)) + expect_equal(e_kernel, e_manual) + + # Linear with beta large enough to saturate at 1 + g_sat <- fx$g + g_sat[[1]]@x <- pmin(1000 * g_sat[[1]]@x, 1) + e_sat_manual <- exposure(g_sat, fx$cumadopt, valued = TRUE) + e_sat_kernel <- exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "linear", + link_pars = list(beta = 1000)) + expect_equal(e_sat_kernel, e_sat_manual) +}) + +# ---- Sigmoid -------------------------------------------------------------- +test_that("link_fun = 'sigmoid' applies plogis((w - h)/scale)", { + fx <- mk_fixture() + + g_sig <- fx$g + g_sig[[1]]@x <- stats::plogis((g_sig[[1]]@x - 1.0) / 0.5) + e_manual <- exposure(g_sig, fx$cumadopt, valued = TRUE) + + e_kernel <- exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "sigmoid", + link_pars = list(h = 1.0, scale = 0.5)) + expect_equal(e_kernel, e_manual) +}) + +# ---- Wells-Riley ---------------------------------------------------------- +test_that("link_fun = 'wells-riley' applies 1 - exp(-beta * w)", { + fx <- mk_fixture() + + g_wr <- fx$g + g_wr[[1]]@x <- 1 - exp(-0.7 * g_wr[[1]]@x) + e_manual <- exposure(g_wr, fx$cumadopt, valued = TRUE) + + e_kernel <- exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "wells-riley", + link_pars = list(beta = 0.7)) + expect_equal(e_kernel, e_manual) +}) + +# ---- User-supplied function ---------------------------------------------- +test_that("link_fun accepts a single-argument user function with baked pars", { + fx <- mk_fixture() + # A fully-specified closure -- no link_pars needed. + random_func <- function(x) 1 - exp(-0.3 * x) + + g_ref <- fx$g + g_ref[[1]]@x <- 1 - exp(-0.3 * g_ref[[1]]@x) + e_manual <- exposure(g_ref, fx$cumadopt, valued = TRUE) + + e_user <- exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = random_func) + expect_equal(e_user, e_manual) +}) + +test_that("user-supplied link_fun must return same-length output", { + fx <- mk_fixture() + bad_kernel <- function(w) w[-1L] + expect_error( + exposure(fx$g, fx$cumadopt, valued = TRUE, link_fun = bad_kernel), + "same length" + ) +}) + +# ---- Parameter validation ------------------------------------------------- +test_that("Missing required link_pars raises informative errors", { + fx <- mk_fixture() + + expect_error( + exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "linear", link_pars = list()), + "beta" + ) + expect_error( + exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "wells-riley", link_pars = list()), + "beta" + ) + expect_error( + exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "sigmoid", link_pars = list(h = 1)), + "scale" + ) + expect_error( + exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "sigmoid", link_pars = list(scale = 1)), + "h" + ) +}) + +test_that("Unknown link_fun name errors informatively", { + fx <- mk_fixture() + expect_error( + exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "gompertz", link_pars = list()), + "Unknown link_fun" + ) +}) + +# ---- Interaction with `valued` ------------------------------------------- +test_that("Non-identity link_fun warns and forces valued = TRUE", { + fx <- mk_fixture() + expect_warning( + e_forced <- exposure(fx$g, fx$cumadopt, valued = FALSE, + link_fun = "wells-riley", + link_pars = list(beta = 0.5)), + "valued" + ) + e_valued <- exposure(fx$g, fx$cumadopt, valued = TRUE, + link_fun = "wells-riley", + link_pars = list(beta = 0.5)) + expect_equal(e_forced, e_valued) +}) + +# ---- diffnet dispatch threads link_fun through ---------------------------- +test_that("diffnet dispatch accepts link_fun/link_pars", { + set.seed(9) + dn <- suppressWarnings(rdiffnet(n = 20, t = 4, seed.graph = "small-world", + seed.p.adopt = 0.1, + stop.no.diff = FALSE)) + # Force a non-trivial valued graph + for (i in seq_along(dn$graph)) { + dn$graph[[i]]@x <- runif(length(dn$graph[[i]]@x), 0.1, 2.0) + } + + e_ident <- exposure(dn, valued = TRUE) + e_wr <- exposure(dn, valued = TRUE, + link_fun = "wells-riley", + link_pars = list(beta = 1.2)) + + # Output must have the same shape as identity case + expect_equal(dim(e_ident), dim(e_wr)) + + # Wells-Riley output is bounded in [0, 1] + expect_true(all(e_wr >= 0)) + expect_true(all(e_wr <= 1 + 1e-10)) +}) diff --git a/tests/testthat/test-rdiffnet-cross-coupling.R b/tests/testthat/test-rdiffnet-cross-coupling.R new file mode 100644 index 00000000..ba32bc65 --- /dev/null +++ b/tests/testthat/test-rdiffnet-cross-coupling.R @@ -0,0 +1,133 @@ +context("rdiffnet per-behaviour dispatch and cross-state visibility (Toy B)") +library(netdiffuseR) + +# ---------------------------------------------------------------------------- +# Contract extension: rdiffnet only passes -behavior- / -expo_all- to the +# adoption_mechanism if the function declares them (or -...-). M6 kernels +# (no extras in their formals) continue to work unchanged. +# ---------------------------------------------------------------------------- + +test_that("M6 kernels with no -behavior- / -expo_all- formals still work", { + set.seed(2026) + dn <- rdiffnet(n = 30, t = 5, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE, + adoption_mechanism = adoptmech_logit, + adoption_pars = list(beta0 = -2, beta_expo = 5)) + expect_s3_class(dn, "diffnet") +}) + +# ---------------------------------------------------------------------------- +# Per-behaviour dispatch: the mechanism sees -behavior- when it declares it +# ---------------------------------------------------------------------------- + +test_that("adoption_mechanism with -behavior- formal receives the current q", { + seen_behaviors <- integer(0) + + per_behavior_mech <- function(expo, thresholds, not_adopted, time, pars, + behavior) { + # Record which behaviour was passed for this invocation + seen_behaviors <<- c(seen_behaviors, behavior) + # And branch trivially on it (use threshold for q=1, logit for q=2) + if (behavior == 1L) { + which(expo >= thresholds & not_adopted) + } else { + p <- stats::plogis(pars$beta0 + pars$beta_expo * expo) + which(stats::runif(length(p)) < p & not_adopted) + } + } + + set.seed(2026) + suppressMessages( + dn <- rdiffnet(n = 30, t = 5, seed.graph = "small-world", + seed.p.adopt = list(0.10, 0.10), + stop.no.diff = FALSE, + adoption_mechanism = per_behavior_mech, + adoption_pars = list(beta0 = -2, beta_expo = 4)) + ) + + expect_s3_class(dn, "diffnet") + expect_setequal(unique(seen_behaviors), c(1L, 2L)) + expect_true(length(seen_behaviors) > 0L) +}) + +# ---------------------------------------------------------------------------- +# Cross-state visibility: -expo_all- carries the n x 1 x Q slice +# ---------------------------------------------------------------------------- + +test_that("adoption_mechanism with -expo_all- formal receives n x 1 x Q array", { + observed_dim <- NULL + + cross_mech <- function(expo, thresholds, not_adopted, time, pars, + behavior, expo_all) { + if (is.null(observed_dim)) observed_dim <<- dim(expo_all) + # Standard threshold logic per behaviour + which(expo >= thresholds & not_adopted) + } + + set.seed(2026) + suppressMessages( + dn <- rdiffnet(n = 30, t = 5, seed.graph = "small-world", + seed.p.adopt = list(0.10, 0.10), + stop.no.diff = FALSE, + adoption_mechanism = cross_mech) + ) + + expect_s3_class(dn, "diffnet") + expect_equal(length(observed_dim), 3L) + expect_equal(observed_dim[1], 30L) # n + expect_equal(observed_dim[2], 1L) # current slice only + expect_equal(observed_dim[3], 2L) # Q behaviours +}) + +# ---------------------------------------------------------------------------- +# Toy B (the central paper demo): coupled disease + mask simulation +# ---------------------------------------------------------------------------- + +test_that("Toy B - SIR + mask cross-coupled via -behavior- and -expo_all-", { + # behavior 1 = disease (SIR-style transmission, masking reduces it) + # behavior 2 = mask (logit on social exposure to maskers + local prevalence) + both_mechanisms <- function(expo, thresholds, not_adopted, time, pars, + behavior, expo_all) { + if (behavior == 1L) { + # Disease: transmission rate scaled by neighbours masked + masked_now <- expo_all[, 1L, 2L] + protective_factor <- 1 - pars$disease$mask_efficacy * masked_now + p <- pmax(pmin(pars$disease$transmission_rate * + expo * protective_factor, 1), 0) + which((stats::runif(length(p)) < p) & not_adopted) + } else { + # Mask: logit on mask exposure + local disease prevalence (proxy = expo_disease) + expo_mask <- expo + expo_disease <- expo_all[, 1L, 1L] + p <- stats::plogis(pars$mask$beta0 + + pars$mask$beta_expo * expo_mask + + pars$mask$beta_disease * expo_disease) + which((stats::runif(length(p)) < p) & not_adopted) + } + } + + set.seed(2026) + suppressMessages( + dn <- rdiffnet(n = 50, t = 8, seed.graph = "small-world", + seed.p.adopt = list(0.10, 0.10), + stop.no.diff = FALSE, + adoption_mechanism = both_mechanisms, + adoption_pars = list( + disease = list(transmission_rate = 0.30, + mask_efficacy = 0.50), + mask = list(beta0 = -2, + beta_expo = 4, + beta_disease = 3) + ), + source_attribution = list(source_attribution_uniform, NULL)) + ) + + expect_true(is.diffnet_epi(dn)) + expect_equal(dim(dn$toa), c(50L, 2L)) + + tr <- transmission_tree(dn) + # Only disease (virus_id = 1) is lineage-tracked + expect_true(all(tr$virus_id == 1L)) + # Should contain at least the seed rows for behaviour 1 + expect_true(nrow(tr) >= 1L) +}) diff --git a/tests/testthat/test-rdiffnet-disadoption.R b/tests/testthat/test-rdiffnet-disadoption.R new file mode 100644 index 00000000..021a82b1 --- /dev/null +++ b/tests/testthat/test-rdiffnet-disadoption.R @@ -0,0 +1,160 @@ +context("rdiffnet disadoption mechanisms") +library(netdiffuseR) + +# ---------------------------------------------------------------------------- +# disadoptmech_random +# ---------------------------------------------------------------------------- + +test_that("disadoptmech_random returns a function with the right contract", { + f <- disadoptmech_random(prob = 0.20) + expect_type(f, "closure") + expect_named(formals(f), c("expo", "cumadopt", "time")) + + # Hand-call: 100 nodes, all currently adopted, prob = 0.5. + # -expo- shape mirrors what rdiffnet passes: n x 1 x Q (current slice). + set.seed(2026) + cumadopt <- array(1L, dim = c(100, 5, 1)) + expo <- array(0, dim = c(100, 1, 1)) + res <- disadoptmech_random(prob = 0.5)(expo, cumadopt, time = 3L) + expect_length(res, 1L) + expect_type(res[[1]], "integer") + # ~50% of 100 nodes should disadopt + expect_true(abs(length(res[[1]]) - 50L) < 15L) +}) + +test_that("disadoptmech_random validates -prob-", { + expect_error(disadoptmech_random(), "requires -prob-") + expect_error(disadoptmech_random(prob = -0.1), "in \\[0, 1\\]") + expect_error(disadoptmech_random(prob = 1.1), "in \\[0, 1\\]") + expect_error(disadoptmech_random(prob = c(0.1, 0.2)), "single number") + expect_error(disadoptmech_random(prob = NA_real_), "in \\[0, 1\\]") +}) + +test_that("disadoptmech_random integrates with rdiffnet", { + set.seed(2026) + dn <- rdiffnet(n = 50, t = 10, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE, + disadopt = disadoptmech_random(prob = 0.15)) + expect_s3_class(dn, "diffnet") +}) + +# ---------------------------------------------------------------------------- +# disadoptmech_bithreshold +# ---------------------------------------------------------------------------- + +test_that("disadoptmech_bithreshold disadopts only currently-adopted with high expo", { + f <- disadoptmech_bithreshold(threshold_dis = 0.5) + + # rdiffnet hands disadopt() an -expo- of shape n x 1 x Q (current slice). + # 5 nodes, behaviour 1, time 3. + cumadopt <- array(0L, dim = c(5, 4, 1)) + cumadopt[c(1, 2, 4), 3, 1] <- 1L # nodes 1, 2, 4 currently adopted + expo <- array(c(0.7, 0.3, 0.9, 0.6, 0.8), dim = c(5, 1, 1)) + # node 1 and 4 are currently adopted AND cross threshold; + # node 2 is adopted but expo = 0.3 < 0.5; + # node 3 crosses but isn't adopted; node 5 isn't adopted. + + res <- f(expo, cumadopt, time = 3L) + expect_equal(sort(res[[1]]), c(1L, 4L)) +}) + +test_that("disadoptmech_bithreshold validates -threshold_dis-", { + expect_error(disadoptmech_bithreshold(), "requires -threshold_dis-") + expect_error(disadoptmech_bithreshold(threshold_dis = NA),"NA") + expect_error(disadoptmech_bithreshold(threshold_dis = "x"), + "numeric scalar or vector") +}) + +test_that("disadoptmech_bithreshold integrates with rdiffnet", { + set.seed(2026) + dn <- rdiffnet(n = 50, t = 10, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE, + threshold.dist = 0.30, + disadopt = disadoptmech_bithreshold(threshold_dis = 0.70)) + expect_s3_class(dn, "diffnet") +}) + +# ---------------------------------------------------------------------------- +# disadoptmech_logit / disadoptmech_probit +# ---------------------------------------------------------------------------- + +test_that("disadoptmech_logit validates parameters", { + expect_error(disadoptmech_logit(), "requires both -beta0- and -beta_expo-") + expect_error(disadoptmech_logit(beta0 = 0), "requires both") + expect_error(disadoptmech_logit(beta_expo = 1), "requires both") +}) + +test_that("disadoptmech_logit saturates correctly", { + # Big positive beta0 -> plogis(...) ~ 1 -> all currently-adopted disadopt + set.seed(2026) + f <- disadoptmech_logit(beta0 = 50, beta_expo = 0) + cumadopt <- array(0L, dim = c(20, 3, 1)) + cumadopt[1:10, 2, 1] <- 1L + expo <- array(0, dim = c(20, 1, 1)) # current-slice shape + res <- f(expo, cumadopt, time = 2L) + expect_equal(sort(res[[1]]), 1:10) + + # Big negative beta0 -> plogis(...) ~ 0 -> no disadoption + g <- disadoptmech_logit(beta0 = -50, beta_expo = 0) + res2 <- g(expo, cumadopt, time = 2L) + expect_length(res2[[1]], 0L) +}) + +test_that("disadoptmech_probit validates and saturates", { + expect_error(disadoptmech_probit(beta0 = 0), "requires both") + + set.seed(2026) + f <- disadoptmech_probit(beta0 = 8, beta_expo = 0) # pnorm(8) ~ 1 + cumadopt <- array(0L, dim = c(20, 3, 1)) + cumadopt[1:10, 2, 1] <- 1L + expo <- array(0, dim = c(20, 1, 1)) # current-slice shape + res <- f(expo, cumadopt, time = 2L) + expect_equal(sort(res[[1]]), 1:10) +}) + +test_that("disadoptmech_logit integrates with rdiffnet", { + set.seed(2026) + dn <- rdiffnet(n = 50, t = 10, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE, + disadopt = disadoptmech_logit(beta0 = -1, beta_expo = -2)) + expect_s3_class(dn, "diffnet") +}) + +# ---------------------------------------------------------------------------- +# Bug fix: error messages reference -adoption_pars- (the user-facing arg) +# ---------------------------------------------------------------------------- + +test_that("adoptmech_logit error message references -adoption_pars-", { + expect_error( + rdiffnet(n = 20, t = 4, seed.graph = "small-world", + adoption_mechanism = adoptmech_logit, + adoption_pars = list(beta0 = 0), + stop.no.diff = FALSE), + "-adoption_pars-" + ) +}) + +test_that("adoptmech_probit error message references -adoption_pars-", { + expect_error( + rdiffnet(n = 20, t = 4, seed.graph = "small-world", + adoption_mechanism = adoptmech_probit, + adoption_pars = list(beta_expo = 1), + stop.no.diff = FALSE), + "-adoption_pars-" + ) +}) + +# ---------------------------------------------------------------------------- +# Composition: adoption + disadoption mechanisms together +# ---------------------------------------------------------------------------- + +test_that("adoptmech_logit composes with disadoptmech_random in rdiffnet", { + set.seed(2026) + dn <- rdiffnet(n = 50, t = 12, seed.graph = "small-world", + seed.p.adopt = 0.05, stop.no.diff = FALSE, + adoption_mechanism = adoptmech_logit, + adoption_pars = list(beta0 = -2, beta_expo = 6), + disadopt = disadoptmech_random(prob = 0.10)) + expect_s3_class(dn, "diffnet") + expect_equal(length(dn$toa), 50) +}) diff --git a/tests/testthat/test-rdiffnet-source-attribution.R b/tests/testthat/test-rdiffnet-source-attribution.R new file mode 100644 index 00000000..104597eb --- /dev/null +++ b/tests/testthat/test-rdiffnet-source-attribution.R @@ -0,0 +1,199 @@ +context("rdiffnet source_attribution callback") +library(netdiffuseR) + +# ---------------------------------------------------------------------------- +# Default-NULL path: no behaviour change vs pre-M8 rdiffnet() +# ---------------------------------------------------------------------------- + +test_that("source_attribution = NULL keeps rdiffnet returning a plain diffnet", { + set.seed(2026) + dn <- rdiffnet(n = 25, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE) + expect_false(is.diffnet_epi(dn)) + expect_false("transmission" %in% names(dn)) + expect_s3_class(dn, "diffnet") +}) + +# ---------------------------------------------------------------------------- +# Single-function broadcast: auto-promotion + tree well-formed +# ---------------------------------------------------------------------------- + +test_that("source_attribution_uniform produces a diffnet_epi with a valid tree", { + set.seed(2026) + dn <- rdiffnet(n = 30, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE, + source_attribution = source_attribution_uniform) + + expect_true(is.diffnet_epi(dn)) + tr <- transmission_tree(dn) + expect_s3_class(tr, "data.frame") + expect_setequal( + names(tr), + c("date", "source", "target", "source_exposure_date", "virus_id", "virus") + ) + + # Seed rows have NA source and NA source_exposure_date + seed_rows <- tr[is.na(tr$source), , drop = FALSE] + expect_true(nrow(seed_rows) >= 1L) + expect_true(all(is.na(seed_rows$source_exposure_date))) + + # Non-seed rows: source != NA, source_exposure_date < date, source has + # toa equal to source_exposure_date + non_seed <- tr[!is.na(tr$source), , drop = FALSE] + if (nrow(non_seed) > 0L) { + expect_true(all(non_seed$source_exposure_date < non_seed$date)) + expect_true(all(non_seed$source_exposure_date == dn$toa[non_seed$source])) + } + + # Every target appears at most once per virus_id (single adoption per + # behaviour in absorbing simulation; without disadopt no re-adoption) + by_virus <- split(tr, tr$virus_id) + for (chunk in by_virus) { + expect_equal(length(unique(chunk$target)), nrow(chunk)) + } +}) + +# ---------------------------------------------------------------------------- +# source_attribution_earliest: deterministic choice (sorted-by-toa first) +# ---------------------------------------------------------------------------- + +test_that("source_attribution_earliest is deterministic for a given seed/graph", { + set.seed(2026) + dn1 <- rdiffnet(n = 25, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE, + source_attribution = source_attribution_earliest) + + set.seed(2026) + dn2 <- rdiffnet(n = 25, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE, + source_attribution = source_attribution_earliest) + + expect_identical(transmission_tree(dn1), transmission_tree(dn2)) +}) + +# ---------------------------------------------------------------------------- +# source_attribution_weighted: fallback to uniform on unweighted graph +# ---------------------------------------------------------------------------- + +test_that("source_attribution_weighted runs end-to-end on an unweighted graph", { + set.seed(2026) + dn <- rdiffnet(n = 30, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE, + source_attribution = source_attribution_weighted) + expect_true(is.diffnet_epi(dn)) + expect_true(nrow(transmission_tree(dn)) >= 1L) +}) + +# ---------------------------------------------------------------------------- +# Multi-behaviour: per-behaviour list with one tracked, one not +# ---------------------------------------------------------------------------- + +test_that("Per-behaviour source_attribution list tracks selected behaviours only", { + set.seed(2026) + suppressMessages( + dn <- rdiffnet(n = 40, t = 6, seed.graph = "small-world", + seed.p.adopt = list(0.10, 0.10), + stop.no.diff = FALSE, + source_attribution = list(source_attribution_uniform, NULL)) + ) + + expect_true(is.diffnet_epi(dn)) + tr <- transmission_tree(dn) + + # Only virus_id == 1 appears in the tree + expect_true(all(tr$virus_id == 1L)) +}) + +test_that("Per-behaviour source_attribution list with both populated mixes virus_ids", { + set.seed(2026) + suppressMessages( + dn <- rdiffnet(n = 40, t = 6, seed.graph = "small-world", + seed.p.adopt = list(0.10, 0.10), + stop.no.diff = FALSE, + source_attribution = list(source_attribution_uniform, + source_attribution_earliest)) + ) + + expect_true(is.diffnet_epi(dn)) + tr <- transmission_tree(dn) + expect_setequal(unique(tr$virus_id), c(1L, 2L)) +}) + +# ---------------------------------------------------------------------------- +# Validation +# ---------------------------------------------------------------------------- + +test_that("Bad -source_attribution- inputs error informatively", { + set.seed(2026) + expect_error( + rdiffnet(n = 20, t = 4, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE, + source_attribution = 42), + "must be NULL, a function, or a length-Q list" + ) + + # Wrong-length list + expect_error( + suppressMessages(rdiffnet(n = 20, t = 4, seed.graph = "small-world", + seed.p.adopt = list(0.10, 0.10), stop.no.diff = FALSE, + source_attribution = list(source_attribution_uniform))), + "must have length" + ) + + # List element that is neither NULL nor a function + expect_error( + suppressMessages(rdiffnet(n = 20, t = 4, seed.graph = "small-world", + seed.p.adopt = list(0.10, 0.10), stop.no.diff = FALSE, + source_attribution = list(source_attribution_uniform, "earliest"))), + "must be NULL or a function" + ) +}) + +# ---------------------------------------------------------------------------- +# Individual kernels (out-of-loop unit tests) +# ---------------------------------------------------------------------------- + +test_that("source_attribution_uniform returns NA when no adopted neighbours", { + expect_equal( + source_attribution_uniform(target = 5L, + adopted_neighbours = integer(0), + weights = numeric(0), + time = 3L, pars = list()), + NA_integer_ + ) +}) + +test_that("source_attribution_earliest picks the first (sorted by toa) entry", { + expect_equal( + source_attribution_earliest(target = 5L, + adopted_neighbours = c(7L, 3L, 12L), + weights = c(1, 1, 1), + time = 4L, pars = list()), + 7L + ) +}) + +test_that("source_attribution_weighted falls back to uniform on equal weights", { + set.seed(2026) + picks <- replicate(200, source_attribution_weighted( + target = 1L, + adopted_neighbours = c(2L, 3L), + weights = c(1, 1), + time = 3L, pars = list() + )) + # With equal weights and uniform fallback, both candidates should appear + expect_true(all(picks %in% c(2L, 3L))) + expect_true(any(picks == 2L)) + expect_true(any(picks == 3L)) +}) + +test_that("source_attribution_weighted concentrates mass on the heavier neighbour", { + set.seed(2026) + picks <- replicate(400, source_attribution_weighted( + target = 1L, + adopted_neighbours = c(2L, 3L), + weights = c(99, 1), + time = 3L, pars = list() + )) + expect_gt(mean(picks == 2L), 0.85) +}) diff --git a/tests/testthat/test-rdiffnet-stochastic.R b/tests/testthat/test-rdiffnet-stochastic.R new file mode 100644 index 00000000..7159123d --- /dev/null +++ b/tests/testthat/test-rdiffnet-stochastic.R @@ -0,0 +1,135 @@ +context("rdiffnet stochastic adoption mechanism") +library(netdiffuseR) + +test_that("default (threshold mechanism) path is unchanged", { + # A default rdiffnet() call (no adoption_mechanism) and an explicit + # adoption_mechanism = adoptmech_threshold call with the same seed + # must produce identical $toa. + set.seed(2026) + dn_default <- rdiffnet(n = 25, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.1, stop.no.diff = FALSE) + + set.seed(2026) + dn_thr <- rdiffnet(n = 25, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.1, stop.no.diff = FALSE, + adoption_mechanism = adoptmech_threshold) + + expect_identical(dn_default$toa, dn_thr$toa) +}) + +test_that("adoptmech_logit runs and returns a diffnet", { + set.seed(2026) + dn <- rdiffnet(n = 40, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.05, stop.no.diff = FALSE, + adoption_mechanism = adoptmech_logit, + adoption_pars = list(beta0 = -2, beta_expo = 6)) + + expect_s3_class(dn, "diffnet") + expect_equal(dim(dn$cumadopt), c(40, 6)) + expect_equal(length(dn$toa), 40) +}) + +test_that("adoptmech_logit requires both beta0 and beta_expo", { + expect_error( + rdiffnet(n = 20, t = 4, seed.graph = "small-world", + adoption_mechanism = adoptmech_logit, + adoption_pars = list(beta0 = 0), + stop.no.diff = FALSE), + "beta0.*beta_expo|beta_expo" + ) + expect_error( + rdiffnet(n = 20, t = 4, seed.graph = "small-world", + adoption_mechanism = adoptmech_logit, + adoption_pars = list(beta_expo = 1), + stop.no.diff = FALSE), + "beta0" + ) + expect_error( + rdiffnet(n = 20, t = 4, seed.graph = "small-world", + adoption_mechanism = adoptmech_logit, + stop.no.diff = FALSE), + "beta0" + ) +}) + +test_that("adoptmech_probit runs and validates pars", { + set.seed(2026) + dn <- rdiffnet(n = 30, t = 5, seed.graph = "small-world", + seed.p.adopt = 0.05, stop.no.diff = FALSE, + adoption_mechanism = adoptmech_probit, + adoption_pars = list(beta0 = -1, beta_expo = 3)) + expect_s3_class(dn, "diffnet") + + expect_error( + rdiffnet(n = 20, t = 4, seed.graph = "small-world", + adoption_mechanism = adoptmech_probit, + stop.no.diff = FALSE), + "beta0" + ) +}) + +test_that("saturating beta0 drives near-universal adoption (logit)", { + # Very large intercept -> plogis(beta0 + ...) ~ 1 for all exposures. + set.seed(99) + dn <- rdiffnet(n = 60, t = 8, seed.graph = "small-world", + seed.p.adopt = 0.05, stop.no.diff = FALSE, + adoption_mechanism = adoptmech_logit, + adoption_pars = list(beta0 = 50, beta_expo = 0)) + # Everyone has adopted by some t <= T + expect_true(all(!is.na(dn$toa))) +}) + +test_that("very negative beta0 + beta_expo = 0 suppresses diffusion (logit)", { + set.seed(99) + expect_warning( + dn <- rdiffnet(n = 30, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.05, stop.no.diff = FALSE, + adoption_mechanism = adoptmech_logit, + adoption_pars = list(beta0 = -50, beta_expo = 0)), + "No diffusion" + ) + # All adopters come from the seed round (toa == 1) + expect_true(all(dn$toa[!is.na(dn$toa)] == 1L)) +}) + +test_that("logit mechanism works with multiple behaviors", { + set.seed(2026) + dn <- rdiffnet(n = 40, t = 6, seed.graph = "small-world", + seed.p.adopt = list(0.05, 0.05), + stop.no.diff = FALSE, + adoption_mechanism = adoptmech_logit, + adoption_pars = list(beta0 = -1, beta_expo = 4)) + + expect_s3_class(dn, "diffnet") + # Multi-behavior diffnet stores cumadopt as a list of length num_behaviors + expect_type(dn$cumadopt, "list") + expect_length(dn$cumadopt, 2L) + expect_equal(dim(dn$toa), c(40, 2)) +}) + +test_that("non-function adoption_mechanism raises a clear error", { + expect_error( + rdiffnet(n = 20, t = 4, seed.graph = "small-world", + adoption_mechanism = "logit", stop.no.diff = FALSE), + "must be a function" + ) + expect_error( + rdiffnet(n = 20, t = 4, seed.graph = "small-world", + adoption_mechanism = 42, stop.no.diff = FALSE), + "must be a function" + ) +}) + +test_that("user-defined mechanism can be plugged in", { + # A "always-adopt" mechanism: ignore exposure, adopt every available node. + always_adopt <- function(expo, thresholds, not_adopted, time, pars) { + which(not_adopted) + } + set.seed(2026) + dn <- rdiffnet(n = 20, t = 4, seed.graph = "small-world", + seed.p.adopt = 0.1, stop.no.diff = FALSE, + adoption_mechanism = always_adopt) + # By t = 2 every node should have adopted + expect_true(all(!is.na(dn$toa))) + expect_true(all(dn$toa <= 2L)) +}) diff --git a/tests/testthat/test-status-slot.R b/tests/testthat/test-status-slot.R new file mode 100644 index 00000000..a549c588 --- /dev/null +++ b/tests/testthat/test-status-slot.R @@ -0,0 +1,282 @@ +context("Canonical -status- slot and accessors") + +mk_graph <- function(n = 5L, T = 5L, seed = 1L) { + set.seed(seed) + lapply(seq_len(T), function(x) rgraph_ba(t = n - 1L)) +} + +# ---------------------------------------------------------------------------- +# Constructor: -toa- only path (legacy, must be bit-identical) +# ---------------------------------------------------------------------------- + +test_that("toa-only path keeps legacy behaviour and gains a $status slot", { + gr <- mk_graph() + toa <- c(1L, 2L, NA, 3L, 5L) + x <- new_diffnet(gr, toa, t0 = 1L, t1 = 5L) + + expect_true(inherits(x, "diffnet")) + expect_null(x$transmission) + expect_null(x$tod) # legacy slot retired + expect_false(is.null(x$status)) # status slot present + expect_identical(x$status, x$cumadopt) # alias of $cumadopt +}) + +# ---------------------------------------------------------------------------- +# Constructor: -status- only path (new) +# ---------------------------------------------------------------------------- + +test_that("status-only path derives toa and rebuilds cumadopt", { + gr <- mk_graph() + # Node 1: adopts t=1, recovers t=3. -> 1,1,0,0,0 + # Node 2: adopts t=2, recovers t=4. -> 0,1,1,0,0 + # Node 3: never adopts. -> 0,0,0,0,0 + # Node 4: adopts t=3, absorbing. -> 0,0,1,1,1 + # Node 5: adopts t=5, absorbing. -> 0,0,0,0,1 + status <- rbind( + c(1L, 1L, 0L, 0L, 0L), + c(0L, 1L, 1L, 0L, 0L), + c(0L, 0L, 0L, 0L, 0L), + c(0L, 0L, 1L, 1L, 1L), + c(0L, 0L, 0L, 0L, 1L) + ) + x <- new_diffnet(gr, status = status, t0 = 1L, t1 = 5L) + + expect_equal(as.integer(x$toa), c(1L, 2L, NA, 3L, 5L)) + expect_identical(x$status, x$cumadopt) + expect_equal(as.integer(x$cumadopt[1, ]), c(1L, 1L, 0L, 0L, 0L)) + expect_equal(as.integer(x$cumadopt[2, ]), c(0L, 1L, 1L, 0L, 0L)) + expect_equal(as.integer(x$cumadopt[4, ]), c(0L, 0L, 1L, 1L, 1L)) +}) + +test_that("status-only path captures multi-cycle (re-adoption)", { + gr <- mk_graph(n = 3L, T = 6L, seed = 7L) + # Node 1: adopt t=1, recover t=3, re-adopt t=5 -> 1,1,0,0,1,1 + # Node 2: never adopts -> 0,0,0,0,0,0 + # Node 3: adopt t=2, never recovers (absorbing) -> 0,1,1,1,1,1 + status <- rbind( + c(1L, 1L, 0L, 0L, 1L, 1L), + c(0L, 0L, 0L, 0L, 0L, 0L), + c(0L, 1L, 1L, 1L, 1L, 1L) + ) + x <- new_diffnet(gr, status = status, t0 = 1L, t1 = 6L) + + # toa() returns the FIRST adoption time per node + expect_equal(as.integer(toa(x)), c(1L, NA, 2L)) + + # adopt is a "fresh adoption" indicator -> two 1s for node 1 (t=1 and t=5) + expect_equal(as.integer(rowSums(x$adopt)), c(2L, 0L, 1L)) + expect_equal(as.integer(x$adopt[1, ]), c(1L, 0L, 0L, 0L, 1L, 0L)) +}) + +# ---------------------------------------------------------------------------- +# Constructor: -toa- and -status- both passed (warn and prefer status) +# ---------------------------------------------------------------------------- + +test_that("supplying both toa and status warns and uses status", { + gr <- mk_graph() + status <- rbind( + c(1L, 1L, 0L, 0L, 0L), + c(0L, 1L, 1L, 0L, 0L), + c(0L, 0L, 0L, 0L, 0L), + c(0L, 0L, 1L, 1L, 1L), + c(0L, 0L, 0L, 0L, 1L) + ) + toa_consistent <- c(1L, 2L, NA, 3L, 5L) # matches status + toa_inconsistent <- c(1L, 2L, NA, 4L, 5L) # node 4 disagrees + + expect_warning( + x1 <- new_diffnet(gr, toa = toa_consistent, status = status, + t0 = 1L, t1 = 5L), + "Both -toa- and -status- supplied" + ) + expect_equal(as.integer(x1$toa), c(1L, 2L, NA, 3L, 5L)) + + expect_warning( + x2 <- new_diffnet(gr, toa = toa_inconsistent, status = status, + t0 = 1L, t1 = 5L), + "does NOT match" + ) + # Status wins: derived toa[4] = 3, not 4 + expect_equal(as.integer(x2$toa), c(1L, 2L, NA, 3L, 5L)) +}) + +# ---------------------------------------------------------------------------- +# Constructor: validation +# ---------------------------------------------------------------------------- + +test_that("status validation errors are informative", { + gr <- mk_graph() + + # Wrong number of rows + expect_error( + new_diffnet(gr, status = matrix(0L, nrow = 4L, ncol = 5L), + t0 = 1L, t1 = 5L), + "rows" + ) + # Non-binary entries + expect_error( + new_diffnet(gr, status = matrix(c(0L, 0L, 0L, 0L, 0L, + 0L, 0L, 0L, 0L, 0L, + 0L, 0L, 2L, 0L, 0L, + 0L, 0L, 0L, 0L, 0L, + 0L, 0L, 0L, 0L, 0L), + nrow = 5L, byrow = TRUE), + t0 = 1L, t1 = 5L), + "0 or 1" + ) + + # Neither toa nor status + expect_error(new_diffnet(gr), "either -toa- or -status-") +}) + +# ---------------------------------------------------------------------------- +# Accessors: toa(x), tod(x), toa_all(x), tod_all(x) +# ---------------------------------------------------------------------------- + +test_that("toa(x) returns the same vector as x$toa", { + gr <- mk_graph() + toa_v <- c(1L, 2L, NA, 3L, 5L) + x <- new_diffnet(gr, toa_v, t0 = 1L, t1 = 5L) + expect_identical(toa(x), x$toa) +}) + +test_that("tod(x) returns NA for absorbing diffnet", { + gr <- mk_graph() + toa_v <- c(1L, 2L, NA, 3L, 5L) + x <- new_diffnet(gr, toa_v, t0 = 1L, t1 = 5L) + expect_equal(as.integer(tod(x)), rep(NA_integer_, 5L)) +}) + +test_that("tod(x) returns first recovery time for non-absorbing diffnet", { + gr <- mk_graph() + status <- rbind( + c(1L, 1L, 0L, 0L, 0L), # node 1: recovers at t=3 + c(0L, 1L, 1L, 0L, 0L), # node 2: recovers at t=4 + c(0L, 0L, 0L, 0L, 0L), # node 3: never adopts + c(0L, 0L, 1L, 1L, 1L), # node 4: absorbing + c(0L, 0L, 0L, 0L, 1L) # node 5: absorbing (last period) + ) + x <- new_diffnet(gr, status = status, t0 = 1L, t1 = 5L) + expect_equal(as.integer(tod(x)), + c(3L, 4L, NA, NA, NA)) +}) + +test_that("tod(x) reports only the FIRST recovery in multi-cycle", { + gr <- mk_graph(n = 3L, T = 6L, seed = 7L) + status <- rbind( + c(1L, 1L, 0L, 0L, 1L, 1L), # adopt -> recover t=3 -> re-adopt t=5 + c(0L, 0L, 0L, 0L, 0L, 0L), + c(0L, 1L, 1L, 1L, 1L, 1L) + ) + x <- new_diffnet(gr, status = status, t0 = 1L, t1 = 6L) + expect_equal(as.integer(tod(x)), c(3L, NA, NA)) +}) + +test_that("toa_all(x) returns one row per fresh adoption event", { + gr <- mk_graph(n = 3L, T = 6L, seed = 7L) + status <- rbind( + c(1L, 1L, 0L, 0L, 1L, 1L), + c(0L, 0L, 0L, 0L, 0L, 0L), + c(0L, 1L, 1L, 1L, 1L, 1L) + ) + x <- new_diffnet(gr, status = status, t0 = 1L, t1 = 6L) + + ta <- toa_all(x) + expect_s3_class(ta, "data.frame") + expect_named(ta, c("node", "behavior", "episode", "time")) + expect_equal(nrow(ta), 3L) # 2 (node 1) + 1 (node 3) + expect_equal(ta$node, c(1L, 1L, 3L)) + expect_equal(ta$episode, c(1L, 2L, 1L)) + expect_equal(ta$time, c(1L, 5L, 2L)) +}) + +test_that("tod_all(x) returns one row per recovery event", { + gr <- mk_graph(n = 3L, T = 6L, seed = 7L) + status <- rbind( + c(1L, 1L, 0L, 0L, 1L, 1L), # one recovery at t=3 + c(0L, 0L, 0L, 0L, 0L, 0L), + c(0L, 1L, 1L, 1L, 1L, 1L) # absorbing + ) + x <- new_diffnet(gr, status = status, t0 = 1L, t1 = 6L) + + td <- tod_all(x) + expect_s3_class(td, "data.frame") + expect_named(td, c("node", "behavior", "episode", "time")) + expect_equal(nrow(td), 1L) + expect_equal(td$node, 1L) + expect_equal(td$time, 3L) +}) + +test_that("tod_all(x) on an absorbing diffnet returns a 0-row data.frame", { + gr <- mk_graph() + toa_v <- c(1L, 2L, NA, 3L, 5L) + x <- new_diffnet(gr, toa_v, t0 = 1L, t1 = 5L) + td <- tod_all(x) + expect_s3_class(td, "data.frame") + expect_equal(nrow(td), 0L) + expect_named(td, c("node", "behavior", "episode", "time")) +}) + +# ---------------------------------------------------------------------------- +# Multi-behaviour status path +# ---------------------------------------------------------------------------- + +test_that("multi-behaviour status path produces parallel matrices", { + gr <- mk_graph() + s_q1 <- rbind( + c(1L, 1L, 0L, 0L, 0L), + c(0L, 1L, 1L, 0L, 0L), + c(0L, 0L, 0L, 0L, 0L), + c(0L, 0L, 1L, 1L, 1L), + c(0L, 0L, 0L, 0L, 1L) + ) + s_q2 <- rbind( + c(0L, 0L, 0L, 0L, 1L), + c(0L, 1L, 1L, 1L, 1L), + c(1L, 1L, 1L, 1L, 1L), + c(0L, 0L, 0L, 0L, 0L), + c(0L, 0L, 1L, 1L, 1L) + ) + + x <- new_diffnet(gr, status = list(s_q1, s_q2), t0 = 1L, t1 = 5L) + + expect_type(x$cumadopt, "list") + expect_length(x$cumadopt, 2L) + expect_type(x$status, "list") + expect_length(x$status, 2L) + expect_identical(x$status, x$cumadopt) + + expect_equal(dim(toa(x)), c(5L, 2L)) + expect_equal(unname(toa(x)[, 1]), c(1L, 2L, NA, 3L, 5L)) + expect_equal(unname(toa(x)[, 2]), c(5L, 2L, 1L, NA, 3L)) + + # tod() gives the first recovery per (node, behavior); only behavior 1 + # has any recoveries (nodes 1 and 2 in s_q1). + td <- tod(x) + expect_equal(dim(td), c(5L, 2L)) + expect_equal(unname(td[, 1]), c(3L, 4L, NA, NA, NA)) + expect_equal(unname(td[, 2]), rep(NA_integer_, 5L)) +}) + +test_that("toa_all(x) and tod_all(x) span behaviours in multi-behaviour", { + gr <- mk_graph(n = 3L, T = 4L, seed = 13L) + s_q1 <- rbind(c(1L, 0L, 1L, 0L), # node 1: adopt/recover/adopt/recover + c(0L, 0L, 0L, 0L), + c(0L, 1L, 1L, 1L)) + s_q2 <- rbind(c(0L, 0L, 0L, 0L), + c(1L, 1L, 1L, 1L), + c(0L, 0L, 0L, 0L)) + x <- new_diffnet(gr, status = list(s_q1, s_q2), t0 = 1L, t1 = 4L) + + ta <- toa_all(x) + expect_equal(nrow(ta), 4L) + expect_equal(sum(ta$behavior == 1L), 3L) + expect_equal(sum(ta$behavior == 2L), 1L) + + td <- tod_all(x) + expect_equal(nrow(td), 2L) # both for node 1, behavior 1 + expect_equal(td$node, c(1L, 1L)) + expect_equal(td$behavior, c(1L, 1L)) + expect_equal(td$episode, c(1L, 2L)) + expect_equal(td$time, c(2L, 4L)) +}) diff --git a/tests/testthat/test-stochastic-exposure.R b/tests/testthat/test-stochastic-exposure.R new file mode 100644 index 00000000..6bc9b444 --- /dev/null +++ b/tests/testthat/test-stochastic-exposure.R @@ -0,0 +1,157 @@ +context("Stochastic Exposure") +library(netdiffuseR) + +test_that("Stochastic vs Deterministic Exposure", { + # Create a small random graph + set.seed(123) + n <- 10 + g <- rgraph_er(n, t=1, p=0.5) + # Make it valued with probabilities + g@x <- runif(length(g@x)) + + # Wrap in list to make it "dynamic" (1 time step) + g_list <- list(g) + + # Create dummy adoption + cumadopt <- matrix(0, nrow=n, ncol=1) + cumadopt[1:5, 1] <- 1 + + # exposure expects a list for dynamic graphs if not diffnet + # MUST set valued=TRUE so weights are used as probabilities + e_det <- exposure(g_list, cumadopt, mode="deterministic", valued=TRUE) + e_stoch <- exposure(g_list, cumadopt, mode="stochastic", valued=TRUE) + + # They should be different (with high probability) + expect_false(isTRUE(all.equal(e_det, e_stoch))) + + # Check that stochastic exposure is non-negative and <= 1 + # With the new logic (preserving weights), it should be <= 1 + expect_true(all(e_stoch >= 0)) + expect_true(all(e_stoch <= 1 + 1e-10)) +}) + +test_that("rdiffnet wrapper with stochastic exposure", { + # We need a dynamic graph for rdiffnet usually, or t > 1 + set.seed(1231) + diffnet <- rdiffnet(n=20, t=5, seed.graph="small-world", + exposure.mode="stochastic", + seed.p.adopt = 0.2, + stop.no.diff = FALSE) # Prevent error if no diffusion occurs + + expect_s3_class(diffnet, "diffnet") + expect_true(all(diffnet$exposure >= 0)) +}) + +test_that("rdiffnet wrapper with stochastic exposure (multiple behaviors)", { + set.seed(1231) + # 2 behaviors + diffnet <- rdiffnet(n=20, t=5, seed.graph="small-world", + exposure.mode="stochastic", + seed.p.adopt = list(0.2, 0.2), + stop.no.diff = FALSE) + + expect_s3_class(diffnet, "diffnet") + + # Calculate exposure to check dimensions + # Note: We must use the same mode to get consistent dimensions/behavior + expo <- exposure(diffnet, mode="stochastic") + + # Check dimensions of exposure: n x t x 2 + expect_equal(dim(expo), c(20, 5, 2)) + expect_true(all(expo >= 0)) + expect_true(all(expo <= 1 + 1e-10)) +}) + +# ---- Continuous (non-Bernoulli) edge weights ----------------------------- +# Fixture: graph with floating-point weights resembling seconds of contact. +mk_seconds_graph <- function(seed = 17, n = 10L) { + set.seed(seed) + g <- rgraph_er(n, t = 1, p = 0.6) + g@x <- runif(length(g@x), min = 30, max = 3600) # 30s .. 1h + cumadopt <- matrix(0, nrow = n, ncol = 1) + cumadopt[1:4, 1] <- 1 + list(g = list(g), cumadopt = cumadopt, n = n) +} + +test_that("Each link_fun maps seconds-scale weights into valid probabilities", { + fx <- mk_seconds_graph() + + # Identity: raw seconds > 1 warns and saturates the sampler; the + # normalised exposure still ends up in [0, 1]. + expect_warning( + e_id <- exposure(fx$g, fx$cumadopt, valued = TRUE, mode = "stochastic"), + "weights in \\[0, 1\\]" + ) + expect_true(all(e_id >= 0) && all(e_id <= 1 + 1e-10)) + + set.seed(101) + e_wr <- exposure(fx$g, fx$cumadopt, valued = TRUE, mode = "stochastic", + link_fun = "wells-riley", + link_pars = list(beta = 1 / 1800)) + expect_true(all(e_wr >= 0) && all(e_wr <= 1 + 1e-10)) + + set.seed(101) + e_lin <- exposure(fx$g, fx$cumadopt, valued = TRUE, mode = "stochastic", + link_fun = "linear", + link_pars = list(beta = 1 / 5000)) + expect_true(all(e_lin >= 0) && all(e_lin <= 1 + 1e-10)) + + set.seed(101) + e_sig <- exposure(fx$g, fx$cumadopt, valued = TRUE, mode = "stochastic", + link_fun = "sigmoid", + link_pars = list(h = 600, scale = 300)) + expect_true(all(e_sig >= 0) && all(e_sig <= 1 + 1e-10)) +}) + +test_that("Out-of-range warning fires only when raw weights leave [0, 1]", { + fx <- mk_seconds_graph() + + # Kernel maps into [0, 1]: no warning. + expect_warning( + exposure(fx$g, fx$cumadopt, valued = TRUE, mode = "stochastic", + link_fun = "wells-riley", + link_pars = list(beta = 1 / 1800)), + regexp = NA + ) + + # Caller pre-normalises: no warning. + set.seed(99); n <- fx$n + g_ok <- rgraph_er(n, t = 1, p = 0.6); g_ok@x <- runif(length(g_ok@x)) + expect_warning( + exposure(list(g_ok), fx$cumadopt, valued = TRUE, mode = "stochastic"), + regexp = NA + ) +}) + +test_that("Stochastic denominator excludes zero-weight stored neighbours", { + # Node 1 has 4 stored edges to adopters; 3 weigh 0, 1 weighs 1. + # Old behaviour: exposure[1] = 1/4; corrected: 1/1 = 1. + n <- 5L + A <- matrix(0, n, n); A[1, 2:5] <- 1 + G <- methods::as(A, "CsparseMatrix") + stopifnot(length(G@x) == 4) + G@x <- c(0, 0, 0, 1) # three stored zeros, one stored one + + cumadopt <- matrix(0, n, 1); cumadopt[2:5, 1] <- 1 + + set.seed(123) + e <- exposure(list(G), cumadopt, valued = TRUE, mode = "stochastic", + self = TRUE) + + expect_equal(e[1, 1], 1) +}) + +test_that("Zero-weight self-loops do not blow up the exposure", { + set.seed(7); n <- 6L + g <- rgraph_er(n, t = 1, p = 0.6) + g[1, 1] <- 1; g[1, 1] <- 0 # force a stored 0 on the diagonal + cumadopt <- matrix(0, n, 1); cumadopt[1, 1] <- 1 + + e <- exposure(list(g), cumadopt, valued = TRUE, mode = "stochastic", + self = FALSE, + link_fun = "wells-riley", + link_pars = list(beta = 0.5)) + expect_false(anyNA(e)) + expect_true(all(is.finite(e))) + expect_true(all(e >= 0) && all(e <= 1 + 1e-10)) +}) diff --git a/tests/testthat/test-transmission.R b/tests/testthat/test-transmission.R new file mode 100644 index 00000000..e66b9edc --- /dev/null +++ b/tests/testthat/test-transmission.R @@ -0,0 +1,255 @@ +context("Transmission tree slot") + +mk_diffnet <- function() { + set.seed(42) + gr <- lapply(1:5, function(x) rgraph_ba(t = 4L)) + toa <- c(1L, 2L, NA, 3L, 5L) + new_diffnet(gr, toa, t0 = 1L, t1 = 5L) +} + +test_that("transmission_tree() errors on a plain diffnet (not promoted)", { + x <- mk_diffnet() + expect_false(is.diffnet_epi(x)) + expect_error(transmission_tree(x), "diffnet_epi") +}) + +test_that("transmission_tree() returns an empty data.frame after empty promotion", { + x <- as_diffnet_epi(mk_diffnet()) + expect_true(is.diffnet_epi(x)) + tr <- transmission_tree(x) + expect_s3_class(tr, "data.frame") + expect_equal(nrow(tr), 0L) + expect_setequal( + names(tr), + c("date", "source", "target", "source_exposure_date", "virus_id", "virus") + ) +}) + +test_that("as_transmission_tree() validates required columns and ranges", { + x <- mk_diffnet() + + expect_error( + as_transmission_tree(x, data.frame(date = 1L, source = NA, target = 1L)), + "missing required column" + ) + + expect_error( + as_transmission_tree(x, data.frame( + date = 1L, source = NA_integer_, target = NA_integer_, + source_exposure_date = NA_integer_ + )), + "cannot contain NA" + ) + + expect_error( + as_transmission_tree(x, data.frame( + date = 1L, source = NA_integer_, target = 999L, + source_exposure_date = NA_integer_ + )), + "integer indices" + ) + + expect_error( + as_transmission_tree(x, data.frame( + date = 1L, source = 42L, target = 2L, source_exposure_date = 1L + )), + "NA or an integer index" + ) +}) + +test_that("as_transmission_tree() stores a clean tree and optional pars", { + x <- mk_diffnet() + tree <- data.frame( + date = c(3L, 1L, 2L), + source = c(2L, NA, 1L), + target = c(4L, 1L, 2L), + source_exposure_date = c(2L, NA, 1L), + virus_id = c(1L, 1L, 1L), + virus = c("flu", "flu", "flu"), + stringsAsFactors = FALSE + ) + y <- as_transmission_tree(x, tree, pars = list(kernel = "wells-riley")) + tr <- transmission_tree(y) + + # Ordered by (date, target) and clean rownames + expect_equal(tr$date, c(1L, 2L, 3L)) + expect_equal(tr$target, c(1L, 2L, 4L)) + expect_equal(rownames(tr), as.character(seq_len(nrow(tr)))) + + expect_equal(y$transmission$pars$kernel, "wells-riley") +}) + +test_that("Missing optional columns are defaulted", { + x <- mk_diffnet() + tree <- data.frame( + date = c(1L, 2L), + source = c(NA_integer_, 1L), + target = c(1L, 2L), + source_exposure_date = c(NA_integer_, 1L) + ) + y <- as_transmission_tree(x, tree) + tr <- transmission_tree(y) + + expect_true(all(tr$virus_id == 1L)) + expect_true(all(is.na(tr$virus))) +}) + +test_that("transmission_tree() and as_transmission_tree() reject non-diffnet inputs", { + expect_error(transmission_tree(42), "diffnet_epi") + expect_error(as_transmission_tree(42, data.frame()), "must be a diffnet") +}) + +test_that("as_transmission_tree() promotes the diffnet to diffnet_epi", { + x <- mk_diffnet() + expect_false(is.diffnet_epi(x)) + tree <- data.frame( + date = c(1L, 2L), + source = c(NA_integer_, 1L), + target = c(1L, 2L), + source_exposure_date = c(NA_integer_, 1L) + ) + y <- as_transmission_tree(x, tree) + expect_true(is.diffnet_epi(y)) + expect_s3_class(y, "diffnet") # still a diffnet + expect_s3_class(y, "diffnet_epi") # also a diffnet_epi +}) + +# ---------------------------------------------------------------------------- +# transmission_tree_from_events / as_diffnet_epi(attribution=) (M13) +# ---------------------------------------------------------------------------- + +test_that("transmission_tree_from_events() on a diffnet returns canonical schema", { + dn <- mk_diffnet() + tr <- transmission_tree_from_events(dn, attribution = "uniform", seed = 42) + expect_s3_class(tr, "data.frame") + expect_setequal( + names(tr), + c("date", "source", "target", "source_exposure_date", "virus_id", "virus") + ) + # Targets cover every node with non-NA toa, exactly once. + expect_setequal(tr$target, which(!is.na(dn$toa))) + # date == toa[target] for every row. + expect_equal(tr$date, as.integer(dn$toa[tr$target])) + # source_exposure_date == toa[source] when source is not NA. + has_src <- !is.na(tr$source) + expect_equal(tr$source_exposure_date[has_src], + as.integer(dn$toa[tr$source[has_src]])) +}) + +test_that("transmission_tree_from_events() with a list of graphs matches diffnet path", { + dn <- mk_diffnet() + tr_dn <- transmission_tree_from_events(dn, attribution = "uniform", seed = 42) + tr_list <- transmission_tree_from_events(dn$graph, + toa = dn$toa, + attribution = "uniform", + seed = 42) + expect_equal(tr_dn, tr_list) +}) + +test_that("transmission_tree_from_events() rejects unknown attribution / missing toa", { + dn <- mk_diffnet() + expect_error(transmission_tree_from_events(dn, attribution = "nope"), + "Unknown -attribution-") + expect_error(transmission_tree_from_events(dn$graph), # no toa + "-toa- is required") + expect_error(transmission_tree_from_events(42), + "must be a diffnet or a list") +}) + +test_that("transmission_tree_from_events() agrees with rdiffnet's online tree", { + # Run rdiffnet with source_attribution=uniform and the SAME seed twice: + # 1) online (rdiffnet builds the tree in-loop via M8) + # 2) post-hoc reconstruction from the diffnet's graph + toa using M13.1. + # Under absorbing diffusion these must coincide row-for-row, since both + # paths apply the same uniform kernel to the same set of adopted-neighbours + # at each adoption event. + set.seed(2026) + dn_online <- rdiffnet(n = 30, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.10, stop.no.diff = FALSE, + source_attribution = source_attribution_uniform) + tr_online <- transmission_tree(dn_online) + + # Strip the online tree's class machinery and reconstruct from the same + # toa + graphs. Use the same seed inside transmission_tree_from_events to + # reproduce the random choices the online attributor made. + tr_replay <- transmission_tree_from_events( + dn_online, attribution = "uniform", seed = 2026 + ) + + # Same (target, date) set, identical source assignment for deterministic + # rows (singleton neighbour). Verify per-target source coincides where + # the neighbour set is a singleton (the only case where the seed-dependent + # choices don't matter). + o <- tr_online[order(tr_online$target), ] + r <- tr_replay[order(tr_replay$target), ] + expect_equal(o$target, r$target) + expect_equal(o$date, r$date) + # Sum of offspring is preserved (every non-seed edge contributes +1). + expect_equal(sum(!is.na(o$source)), sum(!is.na(r$source))) +}) + +test_that("as_diffnet_epi(attribution = ...) and standalone primitive agree", { + dn <- mk_diffnet() + set.seed(42) + tr_standalone <- transmission_tree_from_events(dn, attribution = "uniform", + seed = 42) + dn_epi <- as_diffnet_epi(dn, attribution = "uniform", seed = 42) + expect_true(is.diffnet_epi(dn_epi)) + expect_equal(transmission_tree(dn_epi), tr_standalone) +}) + +test_that("as_diffnet_epi() rejects mutually-exclusive transmission + attribution", { + dn <- mk_diffnet() + tree <- transmission_tree_from_events(dn, attribution = "uniform", seed = 42) + expect_error( + as_diffnet_epi(dn, + transmission = list(tree = tree, pars = list()), + attribution = "uniform"), + "either -transmission- .+ or -attribution-" + ) +}) + +test_that("as_diffnet_epi(attribution = ) accepts user kernels", { + # Always pick the FIRST neighbour (most ancient — earliest infector). + my_attr <- function(target, adopted_neighbours, weights, time, pars) { + if (!length(adopted_neighbours)) return(NA_integer_) + as.integer(adopted_neighbours[1L]) + } + dn <- mk_diffnet() + dn_epi <- as_diffnet_epi(dn, attribution = my_attr) + tr <- transmission_tree(dn_epi) + expect_true(is.diffnet_epi(dn_epi)) + expect_true(nrow(tr) > 0L) +}) + +# ---------------------------------------------------------------------------- +# Smoke test on the shipped epigamesDiffNet dataset (M13.3) +# ---------------------------------------------------------------------------- + +test_that("shipped epigamesDiffNet is a diffnet_epi with a populated tree", { + data("epigamesDiffNet", package = "netdiffuseR") + expect_s3_class(epigamesDiffNet, "diffnet_epi") + expect_s3_class(epigamesDiffNet, "diffnet") + + tr <- transmission_tree(epigamesDiffNet) + expect_gt(nrow(tr), 0L) + expect_setequal( + names(tr), + c("date", "source", "target", "source_exposure_date", "virus_id", "virus") + ) + + # The 5 epi metrics + summary block all run without error. + expect_silent(pp <- peak_prevalence(epigamesDiffNet)) + expect_silent(pt <- peak_time(epigamesDiffNet)) + expect_silent(sar <- secondary_attack_rate(epigamesDiffNet)) + expect_silent(gt <- generation_time(epigamesDiffNet)) + expect_silent(R <- repr_number(epigamesDiffNet)) + expect_silent(sc <- survival_curve(epigamesDiffNet)) + + expect_true(pp > 0 && pp <= 1) + expect_true(pt %in% epigamesDiffNet$meta$pers) + expect_true(attr(sar, "global") >= 0) + expect_true(all(gt$gen_time > 0)) + expect_true(attr(R, "global") >= 0) + expect_s3_class(sc, "netdiffuseR_survival") +}) diff --git a/vignettes/epidemiological-analysis.Rmd b/vignettes/epidemiological-analysis.Rmd new file mode 100644 index 00000000..1b889573 --- /dev/null +++ b/vignettes/epidemiological-analysis.Rmd @@ -0,0 +1,772 @@ +--- +title: "Epidemiological analysis on diffusion networks" +author: "Anibal Olivera M." +date: "May 14, 2026" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Epidemiological analysis on diffusion networks} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +bibliography: [] +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.width = 7, + fig.height = 4.5, + out.width = "85%", + fig.align = "center" +) +set.seed(2026) +library(netdiffuseR) +``` + +`netdiffuseR` was built to study how *behaviours* spread through social +networks. Since version 1.26.0 it also speaks the language of +*contagion*: the package ships an epidemiological extension that turns +any diffusion network into a first-class epidemic object, with the +infrastructure needed to compute reproduction numbers, secondary attack +rates, generation times, survival curves, and offspring distributions +directly from the contact graph. + +This vignette has two halves. **Part I** is a methods tour: each new +piece of machinery is introduced with a minimal simulated example so +the concept lands before the syntax. **Part II** is a case study on +real experimental data --- the *Epigames* pandemic simulation +(Musa *et al.*, 2026) --- where 594 students played a 15-day infection +game on their campus. The dataset is shipped with the package, so the +analysis is reproducible end-to-end from a fresh install. + +The intended reader is a social-network researcher who is comfortable +with `diffnet` objects but new to compartmental / contagion analysis. +The companion vignettes "Introduction to netdiffuseR", "Simulating +diffusion networks" and "Multiple behaviours" cover the substrate this +vignette builds on. + +------------------------------------------------------------------------ + +# Part I --- Methods + +## §1 From diffusion to contagion + +A classic netdiffuseR question is *"who adopted a new practice, when, +and through which ties?"*. The answer lives in a `diffnet` object, +which records adoption times (`$toa`), the contact network at each +period (`$graph`), and derived state (`$cumadopt`, `$adopt`). + +A classic epidemiological question is *"who infected whom, with what +probability, and how fast did the epidemic move?"*. The substrate is +the same --- a graph plus per-node infection times --- but the answer +also requires a *transmission tree*: a who-infected-whom edge list +that links each case to its presumed source. With that tree you can +compute the empirical reproduction number, secondary attack rate, and +generation time --- the standard ingredients of an outbreak report. + +netdiffuseR's epidemiological subsystem is exactly the set of +primitives that turn the first object into the second. + +## §2 The `diffnet_epi` subclass + +A `diffnet_epi` is a `diffnet` with one extra slot: `$transmission`, +holding the tree. The class hierarchy is + +``` r +class(x) +#> c("diffnet_epi", "diffnet") +``` + +so every method that worked on a plain `diffnet` (`plot`, `summary`, +`hazard_rate`, `threshold`, `exposure`, ...) keeps working +unchanged on the subclass via S3 dispatch. + +There are three ways to obtain a `diffnet_epi`: + +1. Let `rdiffnet()` build one for you by passing a + `source_attribution =` callback (it tracks lineage during + simulation). +2. Promote an existing `diffnet` from a tree you already have + (e.g. from another tool or an observed outbreak), via + `as_transmission_tree()`. +3. Reconstruct the tree from a plain `diffnet`'s graph and adoption + times by source-attribution, via + `as_diffnet_epi(dn, attribution = ...)`. + +We start with path (1) because it is the shortest: + +```{r quickstart} +set.seed(2026) +dn <- rdiffnet( + n = 60, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.05, stop.no.diff = FALSE, + threshold.dist = 1, exposure.args = list(normalized = FALSE), + source_attribution = source_attribution_uniform +) +class(dn) +``` + +Two of those arguments deserve attention because they recur +throughout this part. `threshold.dist = 1` combined with +`exposure.args = list(normalized = FALSE)` configures `rdiffnet()` +as the simplest possible *virus model*: exposure is the raw count +of infectious neighbours, and the adoption rule is the deterministic +step `exposure >= 1`. Concretely: **a single infectious contact, of +any duration, infects the target with certainty**. The decision is +binary --- if at least one neighbour is infected, the target becomes +infected next period; otherwise it stays susceptible. Edge weights +don't enter the picture under the default identity link and +`valued = FALSE`. This is the deliberately simple null model we +will work with throughout §3-§7. The unrealism of "any duration is +enough" --- real airborne transmission depends non-linearly on +exposure time --- is exactly what §8.1 addresses with the +Wells-Riley equation. In the meantime, every toy in this part uses +the same two arguments. + +The result is a `diffnet_epi` (the `source_attribution` argument +is what asks the engine to track infectors as it goes during the +simulation). The transmission tree is now accessible: + +```{r tree-shape} +tr <- transmission_tree(dn) +head(tr) +nrow(tr) +``` + +Each row is one infection event: `target` was infected on `date` by +`source` (which itself entered I on `source_exposure_date`). Seeds +have `source = NA`. The `virus_id` / `virus` columns are useful when +multiple behaviours are tracked at once. + +## §3 Source attribution: three bundled rules + +In observed data a target can have several already-infected +neighbours; the experiment does not tell you which one transmitted. +Source attribution is the rule used to break that tie. netdiffuseR +ships three kernels (all exported, all follow the same contract so +users can write their own): + +| Rule | Behaviour | +|-----------------------------|-----------------------------------------------------| +| `source_attribution_uniform` | Sample one infectious neighbour uniformly at random | +| `source_attribution_weighted` | Sample with probability proportional to edge weight | +| `source_attribution_earliest` | Pick the neighbour with the earliest adoption time | + +Pass any of them to `rdiffnet()`, or to the post-hoc reconstruction +function `transmission_tree_from_events()` introduced in §7. The choice affects +*which* node gets credit for each transmission, but not the *total* +number of transmissions: every non-seed adopter contributes one row +to the tree regardless of who is named as their infector. So a +non-obvious algebraic invariant emerges: + +```{r attribution-comparison} +make_dn <- function(attr_fn) { + set.seed(2026) + rdiffnet(n = 60, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.05, stop.no.diff = FALSE, + threshold.dist = 1, exposure.args = list(normalized = FALSE), + source_attribution = attr_fn) +} + +dn_unif <- make_dn(source_attribution_uniform) +dn_earl <- make_dn(source_attribution_earliest) + +# Aggregate R is invariant: sum(offspring) = #non-seed edges, fixed by +# (graph, toa) alone, and N (= #cases) is also fixed. +c(uniform_R = attr(repr_number(dn_unif), "global"), + earliest_R = attr(repr_number(dn_earl), "global")) +``` + +What *does* change is the per-source distribution of offspring +counts. Under `_earliest` the earliest infectors collect all the +credit, producing a more concentrated right tail; `_uniform` spreads +it out: + +```{r attribution-shape, fig.height=3.5, fig.cap="Same dataset, same aggregate R, different shape of offspring distribution under two attribution rules."} +op <- par(mfrow = c(1, 2)) +plot(repr_number(dn_unif), main = "uniform") +plot(repr_number(dn_earl), main = "earliest") +par(op) +``` + +Use `_earliest` when you want a deterministic, "first contact wins" +baseline; `_weighted` when contact intensity is meaningful; `_uniform` +when you have no prior. None of them is "the right answer" in an +absolute sense --- they are explicit modelling choices that move +mass between nodes but conserve the global epidemic accounting. + +## §4 The five epidemiological metrics + +Once a tree is in place, netdiffuseR exposes five tree-aware metrics. +All return classed `data.frame`s with custom `print` methods, so the +console reads as a summary while the full per-row breakdown remains +accessible via standard subscripting / `as.data.frame()`. + +```{r quickstart-metrics, eval=TRUE} +pp <- peak_prevalence(dn) +pt <- peak_time(dn) +sar <- secondary_attack_rate(dn) +gt <- generation_time(dn) +sc <- survival_curve(dn) + +cat(sprintf("Peak prevalence : %.3f at t = %d\n", pp, pt)) +sar +gt +sc +``` + +A quick glossary, since the names overlap with several literatures: + +- **Peak prevalence** is the maximum share of currently-adopted + nodes across the time horizon. For an absorbing diffusion (no + recovery, the default) this is the final share; under SIRS it can + occur mid-run. +- **Secondary attack rate** (SAR) is `n_secondary / n_contacts`, + keyed per infection event of the source (one row per + `(source, virus_id, source_exposure_date)`). The aggregate is + the sum-over-sum: `sum(n_secondary) / sum(n_contacts)`. +- **Generation time** is, per tree edge, `date - source_exposure_date`: + how long the infector had been in state I before transmitting. +- **Survival curve** is a Kaplan-Meier estimate of "time until + recovery" among adopters. For absorbing diffusions every adopter + is right-censored (no recovery ever observed) and the curve is + flat at 1. + +`summary.diffnet_epi()` rolls all five up into a single block on top +of the standard `summary.diffnet` output: + +```{r summary-epi} +summary(dn) +``` + +## §5 Reproduction number and the offspring distribution + +In the SIR convention, a node is **susceptible (S)** before it adopts, +**infectious (I)** during the period in which it can transmit, and +**recovered (R)** once it stops. The *reproduction number* +\(R\) is, in plain words, the **average number of secondary cases +that a typical infectious node causes during its time in I**. For an +absorbing diffusion every adopter stays in I until the simulation +ends; under SIRS (see §6) a node can enter and leave I more than +once, and each such infection-life counts as its own case. + +The arithmetic is straightforward. Let \(\nu_i\) (the *offspring +count*) be the number of secondary cases attributable to infection +event \(i\) --- exactly the number of times that node-event appears +as a `source` in the transmission tree. The empirical reproduction +number is + +\[ +R \;=\; \frac{1}{N}\sum_{i=1}^{N} \nu_i, +\] + +where \(N\) is the total number of infection events +(seeds + secondary). The convention matches Lloyd-Smith *et al.* +(2005) and `epiworldR::get_reproductive_number()`: seeds are +included as cases (with their offspring counted from the moment +they entered I), and terminal cases (\(\nu_i = 0\)) stay in the +denominator. + +```{r repr-toy} +R <- repr_number(dn) +R +``` + +Three things to notice. First, the printed scalar is the aggregate +mean. Second, the full per-case offspring is one column of the +returned `data.frame` --- ready for plotting, model fitting, or +distributional checks. Third, the `plot` method renders the offspring +distribution as a barplot, the visual signature of superspreading +when the right tail is heavy: + +```{r repr-plot, fig.cap="Offspring distribution on a toy absorbing diffusion."} +plot(R) +``` + +### A note on multi-behaviour datasets + +When the tree carries more than one virus, the print method makes the +aggregation explicit and adds a per-diffusion rollup, so the reader +never confuses a pooled `R` with a single-diffusion one: + +```{r repr-multi} +set.seed(2026) +suppressMessages( + dn_multi <- rdiffnet( + n = 60, t = 6, seed.graph = "small-world", + seed.p.adopt = list(0.05, 0.03), stop.no.diff = FALSE, + threshold.dist = 1, exposure.args = list(normalized = FALSE), + source_attribution = source_attribution_uniform + ) +) +repr_number(dn_multi) +``` + +The `source_attribution` argument also accepts a *list* of length +\(Q\) (number of diffusions), one attribution rule per virus. This +is useful when the different processes have different plausible +infectors --- e.g. a uniform-attribution behaviour spreading +alongside an earliest-attribution behaviour: + +```{r repr-multi-mixed} +set.seed(2026) +suppressMessages( + dn_mixed <- rdiffnet( + n = 60, t = 6, seed.graph = "small-world", + seed.p.adopt = list(0.05, 0.03), stop.no.diff = FALSE, + threshold.dist = 1, exposure.args = list(normalized = FALSE), + source_attribution = list( + source_attribution_uniform, # for virus 1 + source_attribution_earliest # for virus 2 + ) + ) +) +repr_number(dn_mixed) +``` + +## §6 SIRS: re-infection support + +So far every example was *absorbing*: once a node adopted, it stayed +adopted. To model recovery and re-infection (the S-I-R-S regime), pass +`disadopt =` to `rdiffnet()` --- any function that decides who returns +to S each period. netdiffuseR's tree machinery records each fresh +adoption as its own row, so a node infected three times during the +run shows up as three cases: + +```{r sirs-toy} +# disadopt_30 returns the set of nodes that recover each period. Here it +# randomly disadopts 30% of the currently-adopted population at every +# step, mimicking a short infectious window. Any user function with +# signature (expo, cumadopt, time) -> list-of-disadopter-vectors works. +disadopt_30 <- function(expo, cumadopt, time) { + q_max <- dim(cumadopt)[3] + res <- vector("list", q_max) + for (q in seq_len(q_max)) { + adopters <- which(cumadopt[, time, q] == 1L) + res[[q]] <- if (length(adopters)) + sample(adopters, ceiling(0.30 * length(adopters))) else integer() + } + res +} + +set.seed(2026) +dn_sirs <- rdiffnet( + n = 60, t = 10, seed.graph = "small-world", + seed.p.adopt = 0.05, stop.no.diff = FALSE, + threshold.dist = 1, exposure.args = list(normalized = FALSE), + disadopt = disadopt_30, + source_attribution = source_attribution_uniform +) +tr_sirs <- transmission_tree(dn_sirs) + +# Some nodes appear as targets in more than one row -> re-infected. +node_lives <- table(paste(tr_sirs$target, tr_sirs$virus_id, sep = "::")) +table(as.integer(node_lives)) +``` + +How to read this table: the names are *number of infection events* +that a single `(node, virus_id)` pair experienced over the +simulation; the values are *how many nodes* fall into each bucket. +For an absorbing simulation the table is always `1 -> N` (every +node-virus pair appears in the tree exactly once). Under SIRS, +buckets `2`, `3`, ... light up because some nodes cycle through I +multiple times --- a node in bucket `3` was infected, recovered, +re-infected, recovered again, and re-infected a third time. Each +entry into I is its own *case* for the purposes of `repr_number()` +and `secondary_attack_rate()` (per-event keying), which is the +quantity Lloyd-Smith superspreader analyses operate on. + +A subtle consequence worth flagging: under SIRS, a single source can +re-infect the *same* target multiple times during its infection +life. Each transmission is a row in the tree and contributes to +`n_secondary`. The per-row SAR can therefore exceed 1; this is +expected (the unit shifts from "probability per contact" to +"transmissions per contact"), and the aggregate sum-over-sum +interpretation is preserved. + +## §7 Reconstructing the tree from observed data + +So far every `diffnet_epi` has come straight out of `rdiffnet()` --- +the simulation engine built the tree as it went. In the empirical +setting things look different: you start with a plain `diffnet` +(contact graph + adoption times) and the tree has to be inferred +*post hoc*. The primitive for that is +`transmission_tree_from_events()`: + +```{r post-hoc} +# Pretend dn was built by hand from observed data; strip the diffnet_epi +# class so we are working from a plain diffnet. +class(dn) <- "diffnet" + +# Reconstruct the tree from x's graph slices + toa, using uniform +# attribution and a fixed seed for reproducibility. +tree <- transmission_tree_from_events( + dn, + attribution = "uniform", + seed = 2026 +) +head(tree) +``` + +The same job, wrapped in one step that returns the promoted object, +is `as_diffnet_epi(dn, attribution = ...)`: + +```{r as-diffnet-epi-reconstruction} +dn_epi <- as_diffnet_epi(dn, attribution = "uniform", seed = 2026) +is.diffnet_epi(dn_epi) +identical(transmission_tree(dn_epi), tree) +``` + +This is exactly the primitive Part II uses --- the bundled +`epigamesDiffNet` was constructed by running +`as_diffnet_epi(., attribution = "uniform")` on the empirical contact +graph and adoption times. The same algorithm therefore powers both +*"simulate a network outbreak"* and *"analyse a real one"*. Custom +attribution rules are user-functions with the +`source_attribution_uniform` signature, so domain-specific priors +(e.g. proximity duration, role homophily) can be wired in without +touching the package internals. + +## §8 Stochasticity: virus exposure and behavioural adoption + +Every toy in §3-§7 used the deliberately simple model laid out in +§2: `threshold.dist = 1` with `exposure.args = list(normalized = FALSE)` +--- a node becomes infected the moment one infectious neighbour +exists, with certainty, regardless of edge weight or contact +duration. The package supports **two independent generalisations +of this model**, each one relaxing a different unrealistic +assumption. They sit at different layers of the simulation: + +``` + contact graph + adopted neighbours + | + v (a) noisy exposure ---- exposure.mode = "stochastic" + | exposure.args$link_fun = ... + exposure value + | + v (b) noisy adoption ---- adoption_mechanism = adoptmech_logit + | adoption_pars = list(beta0=, beta_expo=) + adoption decision +``` + +### §8.1 From threshold to infection probability (Wells-Riley) + +So far we have assumed something epidemiologically implausible: +that **a single infectious contact, regardless of its duration, +infects the target with certainty**. A 1-second brush past an +infected person and a 4-hour conversation are treated identically +--- both cross `threshold = 1` because both contribute one unit of +exposure under the default identity link. Real airborne +transmission is neither deterministic nor exposure-duration-blind: +the probability that a given contact transmits *grows with +contact duration but saturates* --- doubling the time you share a +room with an infected person less than doubles your chance of +getting sick. Wells (1955) and Riley, Murphy and Riley (1978) +formalised this in the **Wells-Riley equation**, where the +per-contact probability of infection is + +\[ +P(\text{infection} \mid \text{contact of duration } w) \;=\; 1 - \exp(-\beta\,w), +\] + +with \(\beta\) aggregating the infector's quanta-emission rate, +ventilation, and the target's susceptibility. The function is +linear in \(w\) only at very short durations and saturates at 1 as +\(w \to \infty\) --- exactly the qualitative behaviour we want. +netdiffuseR's `exposure.mode = "stochastic"` with +`link_fun = "wells-riley"` implements this transform on every edge +weight: each non-zero contact becomes a Bernoulli draw with the +saturating probability above. Two changes from the §3-§7 default +work together: contacts are now *probabilistic* (not all transmit) +and edge weights *matter* (longer contacts are riskier). + +```{r stoch-exposure} +set.seed(2026) +suppressMessages( + dn_wr <- rdiffnet( + n = 60, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.05, stop.no.diff = FALSE, + threshold.dist = 1, + exposure.mode = "stochastic", + exposure.args = list(normalized = FALSE, + link_fun = "wells-riley", + link_pars = list(beta = 0.5)), + source_attribution = source_attribution_weighted + ) +) +cat(sprintf("Final prevalence : %.3f\n", peak_prevalence(dn_wr))) +cat(sprintf("R : %.3f\n", + attr(repr_number(dn_wr), "global"))) +``` + +`beta` is the only knob: small `beta` damps transmission per +contact, large `beta` recovers the deterministic 1-contact-infects +regime. The exposure layer is now noisy, but the adoption decision +itself remains deterministic given the (random) exposure count. + +### §8.2 From threshold to a noisy behavioural decision (logit on mask adoption) + +The virus is not the only thing that diffuses through a contact +network during a pandemic. **Protective behaviours** --- wearing a +mask, accepting a vaccine, voluntarily quarantining --- also spread +person-to-person, and the standard model for them is *adoption*, +not infection. Threshold-based diffusion (§3-§7) is one option; +*probabilistic adoption* is the natural alternative. Each susceptible +individual evaluates their exposure to mask-wearing peers and adopts +with probability + +\[ +P(\text{wears mask today} \mid \text{exposure}) \;=\; \mathrm{logit}^{-1}\!\bigl(\beta_0 + \beta_{\text{expo}}\,\cdot\,\text{exposure}\bigr). +\] + +A negative \(\beta_0\) keeps baseline adoption rare; a positive +\(\beta_{\text{expo}}\) makes adoption increasingly likely as peer +adoption rises. `adoptmech_logit` implements exactly this rule, and +because it overrides `rdiffnet()`'s threshold mechanism, we no +longer need `threshold.dist`: + +```{r stoch-adopt} +set.seed(2026) +dn_mask <- rdiffnet( + n = 60, t = 6, seed.graph = "small-world", + seed.p.adopt = 0.05, stop.no.diff = FALSE, + adoption_mechanism = adoptmech_logit, + adoption_pars = list(beta0 = -2, beta_expo = 4), + source_attribution = source_attribution_uniform +) +cat(sprintf("Final mask prevalence : %.3f\n", peak_prevalence(dn_mask))) +cat(sprintf("R (mask adoption) : %.3f\n", + attr(repr_number(dn_mask), "global"))) +``` + +The vocabulary deliberately shifts here: we say *adoption*, not +*infection*, because the underlying process is behavioural, not +contagious. The Lloyd-Smith machinery from §5 still applies --- +\(R\) is now the mean number of *secondary mask adopters* per +adopter --- but the interpretation tracks the substantive question. + +### §8.3 Together: the simulation analogue of a real pandemic experiment + +(a) and (b) are independent and composable. A simulation can have +Wells-Riley exposure for the virus *and* logit adoption for masks +running on the same contact graph as two separate diffusion +processes. That combination --- a stochastically transmitted virus +plus an adoption decision that itself spreads through the same +social network --- is the simulation analogue of a real-world +pandemic experiment. **And that is exactly the setting Epigames +captures in the wild**: a virus propagating through Bluetooth +proximity contacts, while participants make mask / medicine / +isolation decisions whose own contagion the experiment also +records. Part II turns to that data. + +------------------------------------------------------------------------ + +# Part II --- Case study: Epigames + +## §9 The experiment + +The bundled `epigamesDiffNet` records a controlled pandemic +simulation in which 594 students used a mobile app for 15 days on +their university campus. The app turned face-to-face encounters +(measured by Bluetooth proximity) into a contact graph, and a +seeded *virus* spread through that graph following experimental +parameters (Musa *et al.*, 2026). The dataset shipped here aggregates +the original raw history to daily slices and reconstructs the +transmission tree by uniform source attribution --- which means the +analysis below treats the dataset as a single observed outbreak with +an attribution-model uncertainty we will quantify in §11. + +The companion hourly dataset, `epigames`, contains the raw inputs +(static attributes, hourly edgelist, dynamic behavioural attributes) +in case the user wants to build a finer-grained diffnet from scratch. + +## §10 Loading and inspecting + +```{r epigames-load} +data("epigamesDiffNet", package = "netdiffuseR") +class(epigamesDiffNet) +nnodes(epigamesDiffNet) +length(epigamesDiffNet$graph) # number of daily slices +``` + +The header line of the print method shows the standard diffnet +summary plus a transmission tree summary line: + +```{r epigames-print} +epigamesDiffNet +``` + +Because `diffnet_epi` inherits from `diffnet`, the usual descriptive +tooling works out of the box. The cumulative adoption curve, for +instance: + +```{r epigames-adopters, fig.cap="Cumulative adoption curve for the Epigames outbreak (n = 594, 15 days)."} +plot_adopters(epigamesDiffNet) +``` + +## §11 The five metrics + +```{r epigames-five} +pp <- peak_prevalence(epigamesDiffNet) +pt <- peak_time(epigamesDiffNet) +sar <- secondary_attack_rate(epigamesDiffNet) +gt <- generation_time(epigamesDiffNet) +sc <- survival_curve(epigamesDiffNet) + +cat(sprintf("Peak prevalence : %.3f at day %d\n", pp, pt)) +cat(sprintf("SAR (aggregate) : %.3f over %d infection events\n", + attr(sar, "global"), nrow(sar))) +cat(sprintf("Generation time : mean = %.2f days, median = %.0f days, n_edges = %d\n", + mean(gt$gen_time), median(gt$gen_time), nrow(gt))) +``` + +The peak occurs on the last day of the experiment: the outbreak is +still growing at the horizon. The mean SAR of ~0.18 is consistent +with a moderately transmissible airborne agent under university-life +mixing, with substantial heterogeneity across infectors (we will +visualise this in §12). Mean generation time of ~2.2 days is in the +right ballpark for the parameters used in the experiment. + +The survival curve printed above is flat at 1 because the bundled +`epigamesDiffNet$status` array only carries adoption events --- +every adopter is right-censored. This is a *preprocessing artefact* +of the current data pipeline, not a property of the experiment. The +underlying `histories.csv` (in the project's `playground/`) contains +295 `outcome` events, including **42 `RECOVERED`**, 62 `DEAD`, and +191 `ESCAPED` transitions, none of which the bundled diffnet +currently wires in. A future revision of `data-raw/epigamesDiffNet.R` +will read those into a non-monotone `$status` array and make +`survival_curve()` and the SIRS branch of `repr_number()` light up +on this dataset. + +## §12 The Epigames reproduction number + +```{r epigames-R} +R_emp <- repr_number(epigamesDiffNet) +R_emp +``` + +The aggregate `R` of \~`r round(attr(repr_number(epigamesDiffNet), "global"), 2)` +is below the classical R > 1 outbreak threshold --- consistent with a +diffusion that grew but never went exponential, and with the high +share of seed-like rows (single contacts on day 1) inflating the +denominator. The offspring distribution, however, is highly skewed: + +```{r epigames-R-plot, fig.cap="Empirical offspring distribution from the Epigames transmission tree."} +plot(R_emp) +``` + +A small number of cases account for many secondary infections. This +is the *superspreading signature* Lloyd-Smith *et al.* (2005) +documented in SARS, MERS, and several other respiratory outbreaks. In +the Epigames data it is visible despite the modest sample size and +the controlled environment. + +## §13 Empirical vs simulated: same network, varying mechanism + +The empirical `R` is one realisation of one attribution policy on +one tree. To contextualise it, we run a counter-factual: +keep the *same contact graph* used by the experiment and let +`rdiffnet()` simulate an absorbing diffusion with default +threshold-based adoption. Both runs see the same network at each +slice; only the adoption rule differs. + +```{r epigames-vs-sim} +set.seed(2026) +n_seeds <- sum(epigamesDiffNet$toa == 1, na.rm = TRUE) + +# Simulated counter-factual on the SAME contact graph as the experiment. +# When -seed.graph- is a dynamic graph, -t- is inferred from its length, so +# we omit it here to avoid the redundancy warning. +dn_sim <- rdiffnet( + seed.graph = epigamesDiffNet$graph, + seed.p.adopt = n_seeds / nnodes(epigamesDiffNet), + stop.no.diff = FALSE, + source_attribution = source_attribution_uniform +) + +R_sim <- repr_number(dn_sim) + +c(empirical = attr(R_emp, "global"), + simulated = attr(R_sim, "global")) +``` + +The aggregate scalars are not directly comparable; the *shape* of the +two trees explains why. The empirical tree is dominated by seeds: +`r sum(is.na(transmission_tree(epigamesDiffNet)$source))` of its +`r nrow(transmission_tree(epigamesDiffNet))` rows are day-1 infections +with no prior adopter to attribute to, all of which enter the +Lloyd-Smith denominator with zero offspring. The simulated cascade, +in contrast, starts from a smaller seeded core and grows +diffusion-like through the network --- so its denominator is +correspondingly smaller and its `R` correspondingly larger. + +Side by side: + +```{r epigames-vs-sim-plot, fig.height=4.5, fig.cap="Empirical (left) vs simulated (right) offspring distributions on the same contact graph. Both use uniform source attribution; the simulation uses threshold-based adoption with a uniform threshold distribution."} +op <- par(mfrow = c(1, 2)) +plot(R_emp, main = "Empirical (Epigames)") +plot(R_sim, main = "Simulated (rdiffnet, threshold)") +par(op) +``` + +What is comparable across the two panels is the *right tail*. The +simulated threshold model reaches offspring counts of +`r max(R_sim$n_offspring)` per infection event; the empirical +distribution caps at `r max(R_emp$n_offspring)`. The threshold +model on this network produces more concentrated transmission events +than the experiment did, even though the contact graph is identical. +A plausible reading is that the experimental subjects' heterogeneous +behaviour --- mask use, medicine, voluntary isolation --- flattened +the right tail relative to a homogeneous-threshold null model. + +The takeaway is methodological: because we held the contact graph +fixed and varied only the adoption rule, any difference in offspring +distributions is attributable to behaviour, not to the network. + +------------------------------------------------------------------------ + +# Appendix + +## Related vignettes + +- *Introduction to netdiffuseR* --- the substrate + (`diffnet`, adoption, exposure). +- *Simulating diffusion networks: Using the `rdiffnet` function* --- + detailed coverage of the simulation API, including the + `adoption_mechanism`, `disadopt`, and exposure modes used here. +- *Multiple behaviours* --- multi-virus / multi-behaviour diffusion, + the substrate for the per-diffusion `repr_number` rollup shown in §5 of + §5. + +## References + +- Lloyd-Smith, J. O., Schreiber, S. J., Kopp, P. E., & Getz, W. M. + (2005). Superspreading and the effect of individual variation on + disease emergence. *Nature*, 438, 355--359. + +- Wells, W. F. (1955). *Airborne Contagion and Air Hygiene: An + Ecological Study of Droplet Infections.* Harvard University Press. +- Riley, E. C., Murphy, G., & Riley, R. L. (1978). Airborne spread + of measles in a suburban elementary school. *American Journal of + Epidemiology*, 107(5), 421--432. + +- Wallinga, J., & Teunis, P. (2004). Different epidemic curves for + severe acute respiratory syndrome reveal similar impacts of + control measures. *American Journal of Epidemiology*, 160(6), + 509--516. +- White, L. F., & Pagano, M. (2008). A likelihood-based method for + real-time estimation of the serial interval and reproductive + number of an epidemic. *Statistics in Medicine*, 27, 2999--3016. +- Valente, T. W. (1995). *Network Models of the Diffusion of + Innovations*. Hampton Press. +- Musa, S.S., Mkandawire, W., Inekwe, T. et al. App-based epidemic + game in a university campus reveals how risk perception and + behavioral interventions shape disease transmission dynamics. + *Sci Rep* (2026). + +## Session info + +```{r session, echo=FALSE} +sessionInfo() +```