Running Validation
Checking consistency before submission
Overview
Validation ensures your manuscript, code, and configuration stay aligned throughout development.
Quick Start
# Full validation
make validate-consistency
# Quick check (skip slow verifications)
make validate-quick
# Specific checks
Rscript scripts/validate_consistency.R --config-only
Rscript scripts/validate_consistency.R --methods-onlyValidation Levels
Level 1: Config Validation
Checks that manuscript values match globals.yml:
# Example failures
# - Manuscript says "10,000 replications" but config has 5000
# - Manuscript says "alpha = 0.05" but config has 0.10Level 2: Method Validation
Verifies described methods exist in code:
# Example failures
# - Manuscript describes "Sobol sequences" but code uses "Latin hypercube"
# - Manuscript says "Cox model" but code uses "Weibull"Level 3: Provenance Validation
Confirms data files and figures are current:
# Example failures
# - Figure file older than its data source
# - Referenced CSV doesn't exist
# - Targets pipeline has outdated objectsLevel 4: Cross-Reference Validation
Ensures internal consistency:
# Example failures
# - Same value reported differently in different sections
# - Table and figure show conflicting numbersThe Validation Script
#!/usr/bin/env Rscript
# scripts/validate_consistency.R
library(yaml)
library(stringr)
library(cli)
#' Main validation function
validate_all <- function(
registry_path = "config/consistency_registry.yml",
globals_path = "config/globals.yml",
quick = FALSE
) {
cli_h1("Consistency Validation")
# Load resources
registry <- yaml::read_yaml(registry_path)
source("R/globals_loader.R")
cfg <- load_globals(globals_path = globals_path)
all_results <- list()
# Level 1: Config
cli_h2("Config Claims")
config_results <- validate_config_claims(registry, cfg)
all_results <- c(all_results, config_results)
report_results(config_results)
# Level 2: Methods
cli_h2("Method Claims")
method_results <- validate_method_claims(registry)
all_results <- c(all_results, method_results)
report_results(method_results)
if (!quick) {
# Level 3: Provenance
cli_h2("Data Provenance")
prov_results <- validate_provenance()
all_results <- c(all_results, prov_results)
report_results(prov_results)
}
# Summary
cli_h1("Summary")
failures <- Filter(function(x) !x$valid, all_results)
if (length(failures) > 0) {
cli_alert_danger("{length(failures)} validation failure(s)")
for (r in failures) {
cli_alert_warning("{r$id}: {r$message}")
}
quit(status = 1)
} else {
cli_alert_success("All {length(all_results)} checks passed")
}
}
validate_config_claims <- function(registry, cfg) {
claims <- Filter(
function(c) c$category %in% c("config", "simulation", "analysis"),
registry$claims
)
lapply(claims, function(claim) {
# Navigate to config value
keys <- strsplit(claim$config_key, "\\.")[[1]]
value <- cfg
for (key in keys) {
value <- value[[key]]
}
tolerance <- claim$tolerance %||% 0.0001
valid <- abs(value - claim$expected_value) <= tolerance
list(
id = claim$id,
valid = valid,
message = if (!valid) {
sprintf("Config has %s, expected %s", value, claim$expected_value)
} else NULL
)
})
}
validate_method_claims <- function(registry) {
claims <- Filter(
function(c) c$category == "method",
registry$claims
)
lapply(claims, function(claim) {
if (is.null(claim$verification$file)) {
return(list(id = claim$id, valid = TRUE, message = NULL))
}
if (!file.exists(claim$verification$file)) {
return(list(
id = claim$id,
valid = FALSE,
message = sprintf("Verification file not found: %s",
claim$verification$file)
))
}
code <- paste(readLines(claim$verification$file), collapse = "\n")
valid <- grepl(claim$code_pattern, code, perl = TRUE)
list(
id = claim$id,
valid = valid,
message = if (!valid) {
sprintf("Pattern '%s' not found in %s",
claim$code_pattern, claim$verification$file)
} else NULL
)
})
}
validate_provenance <- function(provenance_path = "docs/DATA_PROVENANCE.md") {
# ... implementation from data-provenance.qmd
}
report_results <- function(results) {
for (r in results) {
if (r$valid) {
cli_alert_success("{r$id}")
} else {
cli_alert_danger("{r$id}: {r$message}")
}
}
}
# CLI handling
if (!interactive()) {
args <- commandArgs(trailingOnly = TRUE)
quick <- "--quick" %in% args
validate_all(quick = quick)
}Makefile Integration
# Validation targets
.PHONY: validate validate-quick validate-config validate-methods
validate: validate-consistency validate-provenance
@echo "All validations passed"
validate-consistency:
Rscript scripts/validate_consistency.R
validate-quick:
Rscript scripts/validate_consistency.R --quick
validate-config:
Rscript scripts/validate_consistency.R --config-only
validate-methods:
Rscript scripts/validate_consistency.R --methods-only
validate-provenance:
Rscript scripts/validate_provenance.R
# Pre-submission checklist
submit: validate
@echo "Running pre-submission checks..."
quarto render manuscript/paper.qmd
@echo "Ready for submission!"CI Integration
# .github/workflows/validate.yml
name: Validate Consistency
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
validate:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: r-lib/actions/setup-r@v2
with:
r-version: '4.3.0'
- uses: r-lib/actions/setup-r-dependencies@v2
with:
packages: |
yaml
cli
stringr
- name: Validate Config Claims
run: Rscript scripts/validate_consistency.R --config-only
- name: Validate Method Claims
run: Rscript scripts/validate_consistency.R --methods-only
- name: Full Validation
run: Rscript scripts/validate_consistency.RWhen to Validate
| Trigger | Validation Level |
|---|---|
| After config changes | Full |
| After code changes | Methods + Provenance |
| After manuscript edits | Config + Methods |
| Before PR merge | Full |
| Before submission | Full + Manual review |
Handling Failures
Config Mismatch
FAIL: n_replications
Config has 5000, expected 10000
Fix options:
- Update
globals.ymlto match manuscript - Update manuscript to match config
- If intentional difference, document in registry
Method Mismatch
FAIL: sampling_method
Pattern 'randomLHS' not found in R/sampling.R
Fix options:
- Update code to use described method
- Update manuscript to describe actual method
- Add correct pattern to registry
Provenance Issue
FAIL: fig1_simulation
Output older than data source (needs regeneration)
Fix:
# Regenerate specific figure
Rscript -e "targets::tar_make(fig1_simulation)"
# Or regenerate all
make figuresBest Practices
- Validate early, validate often - Don’t wait until submission
- Fix immediately - Don’t let failures accumulate
- Use CI - Automated checks catch issues early
- Document exceptions - If something can’t be validated, explain why
- Review before submission - Automated checks + human review