WorldHealthOrganization · dirkschumacher · Feb 13, 2026 · Feb 13, 2026
diff --git a/R/classifications.R b/R/classifications.R
@@ -8,7 +8,8 @@
 #' .           If missing, prevalence will not be calculated for any biomarker because micronutrient status cut offs are sex-specific.
 #' @param age A vector of ages in years for the individuals in the dataset. It can also be a \code{lubridate::duration} object which is then converted to years. For example: if you have age in months, you can create a lubridate duration object like this \code{lubridate::duration(age_in_months, units = "months")}.
 #' @param pregnancy_status (Optional) A vector indicating pregnancy status. Accepted values: For Yes ("Y", "y", or "1"), No ("N", "n", or "2"), Unknown ("unk" or "3" or blank).
-#'                         When Unknown ("unk" or "3" or blank), it will be categorized as "not pregnant"
+#'                         When Unknown ("unk" or "3" or blank), it will be categorized as "not pregnant".
+#'                         Note that 'pregnancy_status' should not be missing when 'pregnancyweeks' or 'pregnancymonths' contain valid, non-missing values. Missing 'pregnancy_status' in these cases will be considered as 'not pregnant'.
 #' @param lactating_status (Optional) A vector indicating lactation status. Accepted values: For Yes ("Y", "y", or "1"), No ("N", "n", or "2").
 #' @param pregnancyweeks (Optional) A numeric vector indicating the number of weeks of pregnancy.
 #' @param pregnancymonths (Optional) A numeric vector indicating the number of months of pregnancy.
@@ -136,7 +137,7 @@ classify_data_internal <- function(
     malaria = malaria
   )
   cols <- names(concept_list$values)
-
+  validate_concepts(concept_list)
   values <- lapply(indicators, function(x) {
     value_concept <- indicator_value_concept(x)
     if (is.null(value_concept)) {
@@ -153,10 +154,49 @@ classify_data_internal <- function(
   }
   colnames(df) <- paste0("indicator_", colnames(df))
   concept_df <- dplyr::bind_cols(concept_list$values[concept_list$non_nulls])
+  # age is part of the output in years and months
+  concept_df$age_years <- as.numeric(concept_df$age, "years")
+  concept_df$age_months <- as.numeric(concept_df$age, "months")
+  concept_df$age <- NULL
+  concept_df <- concept_df[, c(
+    c("age_years", "age_months"),
+    setdiff(colnames(concept_df), c("age_years", "age_months"))
+  )]
   colnames(concept_df) <- paste0("input_", colnames(concept_df))
   dplyr::bind_cols(concept_df, df)
 }
 
+# validate_concepts does some general prechecks independent of
+# the actual indicators being used.
+validate_concepts <- function(concepts) {
+  is_smoker <- concepts$values$is_smoker
+  smokes_cigarettes_per_day <- concepts$values$smokes_cigarettes_per_day
+  if (!is.null(is_smoker) && !is.null(smokes_cigarettes_per_day)) {
+    if (any(is.na(is_smoker) & !is.na(smokes_cigarettes_per_day))) {
+      warning(
+        "Missing `is_smoker`: a non NA value for `is_smoker` is required when `smokes_cigarettes_per_day` is not NA."
+      )
+    }
+  }
+  pregnancy_status <- concepts$values$pregnancy_status
+  pregnancyweeks <- concepts$values$pregnancyweeks
+  pregnancymonths <- concepts$values$pregnancymonths
+  if (!is.null(pregnancy_status) && !is.null(pregnancyweeks)) {
+    if (any(is.na(pregnancy_status) & !is.na(pregnancyweeks))) {
+      warning(
+        "Missing `pregnancy_status`: a non NA value for `pregnancy_status` is required when `pregnancyweeks` is not NA."
+      )
+    }
+  }
+  if (!is.null(pregnancy_status) && !is.null(pregnancymonths)) {
+    if (any(is.na(pregnancy_status) & !is.na(pregnancymonths))) {
+      warning(
+        "Missing `pregnancy_status`: a non NA value for `pregnancy_status` is required when `pregnancymonths` is not NA."
+      )
+    }
+  }
+}
+
 validate_indicators <- function(indicators) {
   stopifnot(is.list(indicators))
   stopifnot(all(vapply(

diff --git a/R/indicators-anaemia.R b/R/indicators-anaemia.R
@@ -20,7 +20,6 @@ anaemia_adjustment <- function(
     length(altitude) == length(is_smoker),
     length(is_smoker) == length(smokes_cigarettes_per_day)
   )
-
   altitude <- as.numeric(altitude)
   altitude[is.na(altitude)] <- 0
   altitude_adjustments <- (0.0056384 * altitude) + (0.0000003 * altitude^2)
@@ -275,10 +274,5 @@ anaemia_indicator <- indicator(
   reorder_columns = list(
     short = NULL,
     long = NULL
-  ),
-  plot_settings = list(
-    dot_plot = list(
-      show_ci = TRUE
-    )
   )
 )
diff --git a/R/indicators-iodine.R b/R/indicators-iodine.R
@@ -131,10 +131,5 @@ iodine_indicator <- indicator(
   prevalence_reports = list(
     long = FALSE,
     short = TRUE
-  ),
-  plot_settings = list(
-    dot_plot = list(
-      show_ci = FALSE
-    )
   )
 )
diff --git a/R/indicators.R b/R/indicators.R
@@ -15,7 +15,6 @@ indicator <- function(
   drop_columns = NULL,
   rename_columns = NULL,
   reorder_columns = NULL,
-  plot_settings = NULL,
   prevalence_reports = list(
     long = TRUE,
     short = TRUE
@@ -52,7 +51,6 @@ indicator <- function(
       drop_columns = drop_columns,
       rename_columns = rename_columns,
       reorder_columns = reorder_columns,
-      plot_settings = plot_settings,
       parent_env = envir
     ),
     class = "indicator"
@@ -105,12 +103,6 @@ indicator_prevalence_names <- function(indicator) {
   indicator$prevalence_category_names
 }
 
-
-indicator_plot_settings <- function(indicator) {
-  stopifnot(is_indicator(indicator))
-  indicator$plot_settings
-}
-
 prevalence_report_long <- function(indicator) {
   isTRUE(indicator$prevalence_report$long)
 }

diff --git a/man/individual_classification.Rd b/man/individual_classification.Rd
diff --git a/man/micronutrients_stats.Rd b/man/micronutrients_stats.Rd
diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R
@@ -6,10 +6,10 @@ random_datset <- function(n) {
     age_years = pmax(0, rnorm(n, mean = 20, sd = 10)),
     sex = sample(c(1, 2), size = n, replace = TRUE, prob = c(0.1, 0.9)),
     pregnancy_status = sample(
-      c(1L, 2L, NA_integer_),
+      c(1L, 2L),
       size = n,
       replace = TRUE,
-      prob = c(0.1, 0.89, 0.01)
+      prob = c(0.1, 0.9)
     ),
     pregnancyweeks = sample(1:(9 * 4), size = n, replace = TRUE),
     pregnancymonths = sample(1:9, size = n, replace = TRUE),
@@ -25,10 +25,10 @@ random_datset <- function(n) {
     agp_measurement = pmax(0.1, rnorm(n, mean = 0.5, sd = 0.5)),
     haemoglobin_measurement = pmax(0.1, rnorm(n, mean = 100, sd = 50)),
     is_smoker = sample(
-      c(1L, 2L, NA_integer_),
+      c(1L, 2L),
       size = n,
       replace = TRUE,
-      prob = c(0.1, 0.89, 0.01)
+      prob = c(0.1, 0.90)
     ),
     smokes_cigarettes_per_day = rpois(n, 3),
     altitude = pmax(0, rnorm(n, mean = 500, sd = 1000)),

diff --git a/tests/testthat/test-classification.R b/tests/testthat/test-classification.R
@@ -31,7 +31,8 @@ test_that("row level classification works", {
   expect_equal(
     colnames(res),
     c(
-      "input_age",
+      "input_age_years",
+      "input_age_months",
       "input_sex",
       "input_pregnancy_status",
       "input_pregnancyweeks",
@@ -82,7 +83,6 @@ test_that("optional values have their prototype values", {
   )))
   expect_true(all(
     c(
-      "input_age",
       "input_sex",
       "input_ferritin",
       "input_iodine",
@@ -185,5 +185,64 @@ test_that("you can use lubridate to define the age", {
     AGP = testdata$agp_measurement
   )
   expect_true(is.data.frame(res))
-  expect_true(is.duration(res$input_age))
+  expect_equal(res$input_age_years, as.numeric(age, "years"))
+  expect_equal(res$input_age_months, as.numeric(age, "months"))
+})
+
+test_that("it warns if `is_smoker` is NULL but `smokes_cigarattes_per_day` is not", {
+  testdata <- random_datset(100)
+  expect_warning(
+    individual_classification(
+      indicators = list(
+        indicator_anaemia()
+      ),
+      age = testdata$age_years,
+      sex = testdata$sex,
+      iodine = testdata$iodine,
+      haemoglobin = testdata$haemoglobin_measurement,
+      altitude = testdata$altitude,
+      smokes_cigarettes_per_day = testdata$smokes_cigarettes_per_day
+    ),
+    regexp = "is_smoker"
+  )
+})
+
+test_that("it warns if `pregnancy_status` is NULL but other preganancy related variables have values", {
+  testdata <- random_datset(100)
+  expect_warning(
+    expect_warning(
+      individual_classification(
+        indicators = list(
+          indicator_anaemia()
+        ),
+        age = testdata$age_years,
+        sex = testdata$sex,
+        iodine = testdata$iodine,
+        haemoglobin = testdata$haemoglobin_measurement,
+        altitude = testdata$altitude,
+        pregnancyweeks = testdata$pregnancyweeks,
+        pregnancymonths = testdata$pregnancymonths
+      ),
+      regexp = "pregnancyweeks"
+    ),
+    regexp = "pregnancymonths"
+  )
+})
+
+test_that("age is in years and months in the output", {
+  testdata <- random_datset(100)
+  res <- individual_classification(
+    indicators = list(
+      indicator_anaemia()
+    ),
+    age = testdata$age_years,
+    sex = testdata$sex,
+    ferritin = testdata$ferritin_measurement,
+    haemoglobin = testdata$haemoglobin_measurement,
+    iodine = testdata$iodine,
+    altitude = testdata$altitude
+  )
+  expect_contains(colnames(res), c("input_age_years", "input_age_months"))
+  expect_null(res[["input_age"]])
+  expect_true(all(res$input_age_months >= res$input_age_years))
 })