From raw data to model with tidyILD

This vignette walks through the full tidyILD pipeline: prepare data, inspect structure, apply within-between decomposition and lags, fit a mixed-effects model, and run diagnostics and plots.

For which temporal assumptions (lags in the mean, residual AR, time-varying effects, state-space) fit your question, see vignette("temporal-dynamics-model-choice", package = "tidyILD").

Simulate and prepare

library(tidyILD)
# Simulate simple ILD
d <- ild_simulate(n_id = 10, n_obs_per = 12, irregular = TRUE, seed = 42)
# Prepare: encode time structure and add .ild_* columns
x <- ild_prepare(d, id = "id", time = "time", gap_threshold = 7200)

Inspect

ild_summary(x)
#> $summary
#> # A tibble: 1 × 7
#>    n_id n_obs time_min time_max prop_gap median_dt_sec iqr_dt_sec
#>   <int> <int>    <dbl>    <dbl>    <dbl>         <dbl>      <dbl>
#> 1    10   120        0   40136.        0         3612.       640.
#> 
#> $n_units
#> [1] 10
#> 
#> $n_obs
#> [1] 120
#> 
#> $time_range
#> [1]     0 40136
#> 
#> $spacing
#> $spacing$median_dt
#> [1] 3612.244
#> 
#> $spacing$iqr_dt
#> [1] 640.2958
#> 
#> $spacing$n_intervals
#> [1] 110
#> 
#> $spacing$pct_gap
#> [1] 0
#> 
#> $spacing$by_id
#> # A tibble: 10 × 5
#>       id median_dt iqr_dt n_intervals pct_gap
#>    <int>     <dbl>  <dbl>       <int>   <dbl>
#>  1     1     3627.   549.          11       0
#>  2     2     3702.   854.          11       0
#>  3     3     3617.   740.          11       0
#>  4     4     3321.   702.          11       0
#>  5     5     3616.   588.          11       0
#>  6     6     3438.   481.          11       0
#>  7     7     3767.   816.          11       0
#>  8     8     3591.   499.          11       0
#>  9     9     3609.   194.          11       0
#> 10    10     3700.   521.          11       0
#> 
#> 
#> $n_gaps
#> [1] 0
#> 
#> $pct_gap
#> [1] 0
ild_spacing_class(x)
#> [1] "regular-ish"

Within-person centering and lags

x <- ild_center(x, y)
x <- ild_lag(x, y, mode = "gap_aware", max_gap = 7200)

Fit a model

Without residual autocorrelation (lmer):

fit0 <- ild_lme(y ~ 1 + (1 | id), data = x, ar1 = FALSE, warn_no_ar1 = FALSE)

With AR1 residual correlation (nlme):

fit1 <- ild_lme(y ~ 1, data = x, ar1 = TRUE, correlation_class = "CAR1")

Diagnostics and plots

diag <- ild_diagnostics(fit1, data = x)
names(diag)  # meta, data, stats
#> [1] "meta"  "data"  "stats"
names(plot_ild_diagnostics(diag))  # plot names for requested types
#> [1] "residual_acf"        "residuals_vs_fitted" "residuals_vs_time"  
#> [4] "qq"
# Pooled residual ACF (tibble)
head(diag$stats$acf$pooled)
#> # A tibble: 6 × 2
#>     lag      acf
#>   <dbl>    <dbl>
#> 1     0  1      
#> 2     1  0.121  
#> 3     2 -0.00832
#> 4     3 -0.113  
#> 5     4 -0.0886 
#> 6     5 -0.147
# By-id ACF when by_id = TRUE: one tibble per person
names(diag$stats$acf$by_id)
#>  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10"
head(diag$stats$acf$by_id[[1]])
#> # A tibble: 6 × 2
#>     lag     acf
#>   <dbl>   <dbl>
#> 1     0  1     
#> 2     1  0.361 
#> 3     2 -0.0851
#> 4     3  0.149 
#> 5     4  0.196 
#> 6     5  0.0752
ild_plot(x, type = "trajectory", var = "y", max_ids = 5)

Trajectory plot

ild_plot(fit1, type = "fitted")

Fitted vs observed

MSM-style weights (IPTW and IPCW) — optional

For a marginal structural model–style sensitivity analysis, you can build inverse probability of treatment weights: either pooled (ild_iptw_weights()) or sequential MSM for time-varying A_t (ild_iptw_msm_weights() after building history with ild_lag()), then inverse probability of censoring weights for monotone dropout (ild_ipcw_weights()), multiply into a joint analysis weight with ild_joint_msm_weights(), and refit with ild_ipw_refit(). This path targets treatment assignment and loss-to-follow-up; it is distinct from outcome missingness IPW (ild_missing_model() + ild_ipw_weights()). Assumptions (positivity, correct models) are still yours; for uncertainty with estimated weights, use ild_msm_bootstrap() (see ?ild_msm_inference) instead of trusting default lmer SEs alone—weight_policy = "reestimate_weights" with a weights_fn that rebuilds IPW on each bootstrap resample is often the appropriate choice when you want first-stage uncertainty reflected; fixed_weights resamples clusters but keeps the weights attached to resampled rows (faster, approximate).

# Example skeleton (not run in the vignette build)
x2 <- ild_simulate(n_id = 12, n_obs_per = 10, seed = 1)
x2$stress <- rnorm(nrow(x2))
x2$trt <- rbinom(nrow(x2), 1L, 0.45)
x2 <- ild_prepare(x2, id = "id", time = "time")
x2 <- ild_center(x2, y)
x2 <- ild_iptw_weights(x2, treatment = "trt", predictors = "stress")
# Sequential A_t: x2 <- ild_lag(x2, stress); x2 <- ild_lag(x2, trt); ...
# x2 <- ild_iptw_msm_weights(x2, treatment = "trt", history = ~ stress_lag1 + trt_lag1)
x2 <- ild_ipcw_weights(x2, predictors = "stress")
x2 <- ild_joint_msm_weights(x2)
fit_msm <- ild_lme(y ~ y_bp + y_wp + stress + (1 | id), data = x2,
  ar1 = FALSE, warn_no_ar1 = FALSE, warn_uncentered = FALSE)
fit_msm_w <- ild_ipw_refit(fit_msm, data = x2)

Balance and overlap: After weights are attached, use ild_msm_balance() (weighted SMDs), ild_ipw_ess() (Kish effective sample size), and ild_msm_overlap_plot() (propensity densities by treatment—pooled IPTW or sequential MSM via attr(x, "ild_iptw_msm_fits")). Optional: ild_diagnose(..., balance = TRUE, balance_treatment = "trt", balance_covariates = c("stress")) fills causal$balance and may trigger guardrails GR_MSM_BALANCE_SMD_HIGH / GR_MSM_ESS_LOW. ild_autoplot(bundle, section = "causal", type = "overlap", treatment = "trt") draws overlap when ggplot2 is available.

For a full assumptions-oriented walkthrough (exchangeability, positivity/overlap, consistency, and weight-model correctness) and simulation-based recovery checks, see vignette("msm-identification-and-recovery", package = "tidyILD"). When using ild_msm_fit(), check fit_obj$inference$status/reason for degraded paths, or set strict_inference = TRUE to fail fast.

A minimal bootstrap illustration (small n_boot for speed; increase in real analyses):

set.seed(3)
xb <- ild_simulate(n_id = 10, n_obs_per = 5, seed = 3)
xb$stress <- rnorm(nrow(xb))
xb <- ild_prepare(xb, id = "id", time = "time")
xb <- ild_center(xb, y)
xb$.ipw <- runif(nrow(xb), 0.85, 1.15)
fb <- ild_lme(y ~ y_bp + y_wp + stress + (1 | id), data = xb,
  ar1 = FALSE, warn_no_ar1 = FALSE, warn_uncentered = FALSE)
#> boundary (singular) fit: see help('isSingular')
fwb <- ild_ipw_refit(fb, data = xb, weights = ".ipw")
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 3.18194 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
bs_fixed <- ild_msm_bootstrap(fwb, n_boot = 20L, weight_policy = "fixed_weights", seed = 3)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 3.71574 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.350427 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 1.58548 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> boundary (singular) fit: see help('isSingular')
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.233155 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.466258 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 1.45904 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> boundary (singular) fit: see help('isSingular')
#> boundary (singular) fit: see help('isSingular')
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.54162 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.622247 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> unable to evaluate scaled gradient
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge: degenerate  Hessian with 1 negative eigenvalues
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.497571 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> unable to evaluate scaled gradient
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge: degenerate  Hessian with 1 negative eigenvalues
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.0159191 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.0728087 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 1.93999 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 1.79758 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 3.10373 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 1.78778 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
tidy_ild_msm_bootstrap(bs_fixed)
#> # A tibble: 4 × 18
#>   term  component effect_level  estimate std_error  conf_low conf_high statistic
#>   <chr> <chr>     <chr>            <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
#> 1 (Int… fixed     population    7.83e-18  1.35e-16 -2.38e-16  2.54e-16  5.82e- 2
#> 2 y_bp  fixed     between       1   e+ 0  2.02e-16  1   e+ 0  1   e+ 0  4.95e+15
#> 3 y_wp  fixed     within        1   e+ 0  1.55e-16  1   e+ 0  1   e+ 0  6.45e+15
#> 4 stre… fixed     unknown      -1.71e-17  1.11e-16 -2.18e-16  1.47e-16 -1.54e- 1
#> # ℹ 10 more variables: p_value <dbl>, interval_type <chr>, engine <chr>,
#> #   model_class <chr>, rhat <dbl>, ess_bulk <dbl>, ess_tail <dbl>, pd <dbl>,
#> #   rope_low <dbl>, rope_high <dbl>
# reestimate_weights: weights_fn must return ILD with the weight column, e.g. re-run IPTW pipeline:
bs_re <- ild_msm_bootstrap(fwb, n_boot = 12L, weight_policy = "reestimate_weights",
  seed = 4, weights_fn = function(d) { d$.ipw <- runif(nrow(d), 0.85, 1.15); d })
#> boundary (singular) fit: see help('isSingular')
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 1.0115 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.993267 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.121577 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> boundary (singular) fit: see help('isSingular')
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 1.83943 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 2.14776 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 1.0111 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 1.33871 (tol = 0.002, component 1)
#>   See ?lme4::convergence and ?lme4::troubleshooting.
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> boundary (singular) fit: see help('isSingular')
#> boundary (singular) fit: see help('isSingular')
#> boundary (singular) fit: see help('isSingular')

Reproducibility

Use a fixed seed when simulating or fitting models so results can be recreated. The pipeline is deterministic for a given seed and data. When saving results (e.g. after [ild_lme()] or [ild_diagnostics()]), you can attach a reproducibility manifest and save a single bundle with [ild_manifest()] and [ild_bundle()]:

# Optional: build a manifest with scenario and seed, then bundle the fit for saving
manifest <- ild_manifest(seed = 42, scenario = ild_summary(x), include_session = FALSE)
bundle <- ild_bundle(fit1, manifest = manifest, label = "model_ar1")
# saveRDS(bundle, "run.rds")  # one file with result + manifest + label
names(bundle)
#> [1] "result"   "manifest" "label"

For simulation-based recovery and power under the bundled ild_simulate() DGP, see vignette("benchmark-simulation-recovery", package = "tidyILD").