diff --git a/R/classifications.R b/R/classifications.R index 8387c01..24d7cf4 100644 --- a/R/classifications.R +++ b/R/classifications.R @@ -8,7 +8,8 @@ #' . If missing, prevalence will not be calculated for any biomarker because micronutrient status cut offs are sex-specific. #' @param age A vector of ages in years for the individuals in the dataset. It can also be a \code{lubridate::duration} object which is then converted to years. For example: if you have age in months, you can create a lubridate duration object like this \code{lubridate::duration(age_in_months, units = "months")}. #' @param pregnancy_status (Optional) A vector indicating pregnancy status. Accepted values: For Yes ("Y", "y", or "1"), No ("N", "n", or "2"), Unknown ("unk" or "3" or blank). -#' When Unknown ("unk" or "3" or blank), it will be categorized as "not pregnant" +#' When Unknown ("unk" or "3" or blank), it will be categorized as "not pregnant". +#' Note that 'pregnancy_status' should not be missing when 'pregnancyweeks' or 'pregnancymonths' contain valid, non-missing values. Missing 'pregnancy_status' in these cases will be considered as 'not pregnant'. #' @param lactating_status (Optional) A vector indicating lactation status. Accepted values: For Yes ("Y", "y", or "1"), No ("N", "n", or "2"). #' @param pregnancyweeks (Optional) A numeric vector indicating the number of weeks of pregnancy. #' @param pregnancymonths (Optional) A numeric vector indicating the number of months of pregnancy. @@ -136,7 +137,7 @@ classify_data_internal <- function( malaria = malaria ) cols <- names(concept_list$values) - + validate_concepts(concept_list) values <- lapply(indicators, function(x) { value_concept <- indicator_value_concept(x) if (is.null(value_concept)) { @@ -153,10 +154,49 @@ classify_data_internal <- function( } colnames(df) <- paste0("indicator_", colnames(df)) concept_df <- dplyr::bind_cols(concept_list$values[concept_list$non_nulls]) + # age is part of the output in years and months + concept_df$age_years <- as.numeric(concept_df$age, "years") + concept_df$age_months <- as.numeric(concept_df$age, "months") + concept_df$age <- NULL + concept_df <- concept_df[, c( + c("age_years", "age_months"), + setdiff(colnames(concept_df), c("age_years", "age_months")) + )] colnames(concept_df) <- paste0("input_", colnames(concept_df)) dplyr::bind_cols(concept_df, df) } +# validate_concepts does some general prechecks independent of +# the actual indicators being used. +validate_concepts <- function(concepts) { + is_smoker <- concepts$values$is_smoker + smokes_cigarettes_per_day <- concepts$values$smokes_cigarettes_per_day + if (!is.null(is_smoker) && !is.null(smokes_cigarettes_per_day)) { + if (any(is.na(is_smoker) & !is.na(smokes_cigarettes_per_day))) { + warning( + "Missing `is_smoker`: a non NA value for `is_smoker` is required when `smokes_cigarettes_per_day` is not NA." + ) + } + } + pregnancy_status <- concepts$values$pregnancy_status + pregnancyweeks <- concepts$values$pregnancyweeks + pregnancymonths <- concepts$values$pregnancymonths + if (!is.null(pregnancy_status) && !is.null(pregnancyweeks)) { + if (any(is.na(pregnancy_status) & !is.na(pregnancyweeks))) { + warning( + "Missing `pregnancy_status`: a non NA value for `pregnancy_status` is required when `pregnancyweeks` is not NA." + ) + } + } + if (!is.null(pregnancy_status) && !is.null(pregnancymonths)) { + if (any(is.na(pregnancy_status) & !is.na(pregnancymonths))) { + warning( + "Missing `pregnancy_status`: a non NA value for `pregnancy_status` is required when `pregnancymonths` is not NA." + ) + } + } +} + validate_indicators <- function(indicators) { stopifnot(is.list(indicators)) stopifnot(all(vapply( diff --git a/R/indicators-anaemia.R b/R/indicators-anaemia.R index 442d957..f2248f7 100644 --- a/R/indicators-anaemia.R +++ b/R/indicators-anaemia.R @@ -20,7 +20,6 @@ anaemia_adjustment <- function( length(altitude) == length(is_smoker), length(is_smoker) == length(smokes_cigarettes_per_day) ) - altitude <- as.numeric(altitude) altitude[is.na(altitude)] <- 0 altitude_adjustments <- (0.0056384 * altitude) + (0.0000003 * altitude^2) @@ -275,10 +274,5 @@ anaemia_indicator <- indicator( reorder_columns = list( short = NULL, long = NULL - ), - plot_settings = list( - dot_plot = list( - show_ci = TRUE - ) ) ) diff --git a/R/indicators-iodine.R b/R/indicators-iodine.R index c0799a2..74b6db0 100644 --- a/R/indicators-iodine.R +++ b/R/indicators-iodine.R @@ -131,10 +131,5 @@ iodine_indicator <- indicator( prevalence_reports = list( long = FALSE, short = TRUE - ), - plot_settings = list( - dot_plot = list( - show_ci = FALSE - ) ) ) diff --git a/R/indicators.R b/R/indicators.R index 68f0991..2c13730 100644 --- a/R/indicators.R +++ b/R/indicators.R @@ -15,7 +15,6 @@ indicator <- function( drop_columns = NULL, rename_columns = NULL, reorder_columns = NULL, - plot_settings = NULL, prevalence_reports = list( long = TRUE, short = TRUE @@ -52,7 +51,6 @@ indicator <- function( drop_columns = drop_columns, rename_columns = rename_columns, reorder_columns = reorder_columns, - plot_settings = plot_settings, parent_env = envir ), class = "indicator" @@ -105,12 +103,6 @@ indicator_prevalence_names <- function(indicator) { indicator$prevalence_category_names } - -indicator_plot_settings <- function(indicator) { - stopifnot(is_indicator(indicator)) - indicator$plot_settings -} - prevalence_report_long <- function(indicator) { isTRUE(indicator$prevalence_report$long) } diff --git a/man/individual_classification.Rd b/man/individual_classification.Rd index 01f4214..3e9c555 100644 --- a/man/individual_classification.Rd +++ b/man/individual_classification.Rd @@ -32,7 +32,8 @@ individual_classification( \item{age}{A vector of ages in years for the individuals in the dataset. It can also be a \code{lubridate::duration} object which is then converted to years. For example: if you have age in months, you can create a lubridate duration object like this \code{lubridate::duration(age_in_months, units = "months")}.} \item{pregnancy_status}{(Optional) A vector indicating pregnancy status. Accepted values: For Yes ("Y", "y", or "1"), No ("N", "n", or "2"), Unknown ("unk" or "3" or blank). -When Unknown ("unk" or "3" or blank), it will be categorized as "not pregnant"} +When Unknown ("unk" or "3" or blank), it will be categorized as "not pregnant". +Note that 'pregnancy_status' should not be missing when 'pregnancyweeks' or 'pregnancymonths' contain valid, non-missing values. Missing 'pregnancy_status' in these cases will be considered as 'not pregnant'.} \item{pregnancyweeks}{(Optional) A numeric vector indicating the number of weeks of pregnancy.} diff --git a/man/micronutrients_stats.Rd b/man/micronutrients_stats.Rd index da75cd1..c7a60d8 100644 --- a/man/micronutrients_stats.Rd +++ b/man/micronutrients_stats.Rd @@ -40,7 +40,8 @@ micronutrients_stats( \item{age}{A vector of ages in years for the individuals in the dataset. It can also be a \code{lubridate::duration} object which is then converted to years. For example: if you have age in months, you can create a lubridate duration object like this \code{lubridate::duration(age_in_months, units = "months")}.} \item{pregnancy_status}{(Optional) A vector indicating pregnancy status. Accepted values: For Yes ("Y", "y", or "1"), No ("N", "n", or "2"), Unknown ("unk" or "3" or blank). -When Unknown ("unk" or "3" or blank), it will be categorized as "not pregnant"} +When Unknown ("unk" or "3" or blank), it will be categorized as "not pregnant". +Note that 'pregnancy_status' should not be missing when 'pregnancyweeks' or 'pregnancymonths' contain valid, non-missing values. Missing 'pregnancy_status' in these cases will be considered as 'not pregnant'.} \item{lactating_status}{(Optional) A vector indicating lactation status. Accepted values: For Yes ("Y", "y", or "1"), No ("N", "n", or "2").} diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R index d5d99d9..2457d86 100644 --- a/tests/testthat/helper.R +++ b/tests/testthat/helper.R @@ -6,10 +6,10 @@ random_datset <- function(n) { age_years = pmax(0, rnorm(n, mean = 20, sd = 10)), sex = sample(c(1, 2), size = n, replace = TRUE, prob = c(0.1, 0.9)), pregnancy_status = sample( - c(1L, 2L, NA_integer_), + c(1L, 2L), size = n, replace = TRUE, - prob = c(0.1, 0.89, 0.01) + prob = c(0.1, 0.9) ), pregnancyweeks = sample(1:(9 * 4), size = n, replace = TRUE), pregnancymonths = sample(1:9, size = n, replace = TRUE), @@ -25,10 +25,10 @@ random_datset <- function(n) { agp_measurement = pmax(0.1, rnorm(n, mean = 0.5, sd = 0.5)), haemoglobin_measurement = pmax(0.1, rnorm(n, mean = 100, sd = 50)), is_smoker = sample( - c(1L, 2L, NA_integer_), + c(1L, 2L), size = n, replace = TRUE, - prob = c(0.1, 0.89, 0.01) + prob = c(0.1, 0.90) ), smokes_cigarettes_per_day = rpois(n, 3), altitude = pmax(0, rnorm(n, mean = 500, sd = 1000)), diff --git a/tests/testthat/test-classification.R b/tests/testthat/test-classification.R index 006893d..197faf9 100644 --- a/tests/testthat/test-classification.R +++ b/tests/testthat/test-classification.R @@ -31,7 +31,8 @@ test_that("row level classification works", { expect_equal( colnames(res), c( - "input_age", + "input_age_years", + "input_age_months", "input_sex", "input_pregnancy_status", "input_pregnancyweeks", @@ -82,7 +83,6 @@ test_that("optional values have their prototype values", { ))) expect_true(all( c( - "input_age", "input_sex", "input_ferritin", "input_iodine", @@ -185,5 +185,64 @@ test_that("you can use lubridate to define the age", { AGP = testdata$agp_measurement ) expect_true(is.data.frame(res)) - expect_true(is.duration(res$input_age)) + expect_equal(res$input_age_years, as.numeric(age, "years")) + expect_equal(res$input_age_months, as.numeric(age, "months")) +}) + +test_that("it warns if `is_smoker` is NULL but `smokes_cigarattes_per_day` is not", { + testdata <- random_datset(100) + expect_warning( + individual_classification( + indicators = list( + indicator_anaemia() + ), + age = testdata$age_years, + sex = testdata$sex, + iodine = testdata$iodine, + haemoglobin = testdata$haemoglobin_measurement, + altitude = testdata$altitude, + smokes_cigarettes_per_day = testdata$smokes_cigarettes_per_day + ), + regexp = "is_smoker" + ) +}) + +test_that("it warns if `pregnancy_status` is NULL but other preganancy related variables have values", { + testdata <- random_datset(100) + expect_warning( + expect_warning( + individual_classification( + indicators = list( + indicator_anaemia() + ), + age = testdata$age_years, + sex = testdata$sex, + iodine = testdata$iodine, + haemoglobin = testdata$haemoglobin_measurement, + altitude = testdata$altitude, + pregnancyweeks = testdata$pregnancyweeks, + pregnancymonths = testdata$pregnancymonths + ), + regexp = "pregnancyweeks" + ), + regexp = "pregnancymonths" + ) +}) + +test_that("age is in years and months in the output", { + testdata <- random_datset(100) + res <- individual_classification( + indicators = list( + indicator_anaemia() + ), + age = testdata$age_years, + sex = testdata$sex, + ferritin = testdata$ferritin_measurement, + haemoglobin = testdata$haemoglobin_measurement, + iodine = testdata$iodine, + altitude = testdata$altitude + ) + expect_contains(colnames(res), c("input_age_years", "input_age_months")) + expect_null(res[["input_age"]]) + expect_true(all(res$input_age_months >= res$input_age_years)) })