Add glmnet support (#165)

EmilHvitfeldt · web-flow · commit dd586ca5cda0 · 2025-11-06T16:43:44.000-08:00
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -33,6 +33,7 @@ Suggests:
     DBI,
     dbplyr,
     earth (>= 5.1.2),
+    glmnet,
     methods,
     mlbench,
     modeldata,
@@ -53,5 +54,5 @@ Config/testthat/edition: 3
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.3
-Remotes:  
+Remotes: 
     topepo/Cubist
diff --git a/NAMESPACE b/NAMESPACE
@@ -7,6 +7,7 @@ S3method(knit_print,tidypredict_test)
 S3method(parse_model,cubist)
 S3method(parse_model,earth)
 S3method(parse_model,glm)
+S3method(parse_model,glmnet)
 S3method(parse_model,lm)
 S3method(parse_model,model_fit)
 S3method(parse_model,party)
@@ -19,6 +20,7 @@ S3method(tidypredict_fit,"_xgb.Booster")
 S3method(tidypredict_fit,cubist)
 S3method(tidypredict_fit,earth)
 S3method(tidypredict_fit,glm)
+S3method(tidypredict_fit,glmnet)
 S3method(tidypredict_fit,lm)
 S3method(tidypredict_fit,model_fit)
 S3method(tidypredict_fit,party)
@@ -34,6 +36,7 @@ S3method(tidypredict_interval,list)
 S3method(tidypredict_interval,lm)
 S3method(tidypredict_test,"_xgb.Booster")
 S3method(tidypredict_test,default)
+S3method(tidypredict_test,glmnet)
 S3method(tidypredict_test,model_fit)
 S3method(tidypredict_test,party)
 S3method(tidypredict_test,xgb.Booster)
diff --git a/NEWS.md b/NEWS.md
@@ -24,6 +24,8 @@
 
 - tree based models now uses `.default` argument in produced `case_when()` code when applicable. (#153)
 
+- Added support for glmnet models. (#165)
+
 # tidypredict 0.5.1
 
 - Exported a number of internal functions to be used in {orbital} package
diff --git a/R/model-glmnet.R b/R/model-glmnet.R
@@ -0,0 +1,69 @@
+# Predict ---------------------------------------
+
+#' @export
+tidypredict_fit.glmnet <- function(model) {
+  parsedmodel <- parse_model(model)
+  build_fit_formula(parsedmodel)
+}
+
+# Parse model --------------------------------------
+
+#' @export
+parse_model.glmnet <- function(model) {
+  parse_model_glmnet(model)
+}
+
+parse_model_glmnet <- function(model, call = rlang::caller_env()) {
+  if (length(model$lambda) != 1) {
+    cli::cli_abort(
+      "{.fn tidypredict_fit} requires that there are only 1 penalty selected,
+      {length(model$lambda)} were provided.",
+      call = call
+    )
+  }
+  if (inherits(model$beta, "dgCMatrix")) {
+    model$beta <- setNames(as.numeric(model$beta), rownames(model$beta))
+  }
+  coefs <- c("(Intercept)" = unname(model$a0), model$beta)
+
+  names <- names(coefs)
+  values <- as.vector(coefs)
+
+  terms <- map2(values, names, \(value, name) {
+    if (value == 0) {
+      return(NULL)
+    }
+    list(
+      label = name,
+      coef = value,
+      is_intercept = as.integer(name == "(Intercept)"),
+      fields = list(list(type = "ordinary", col = name))
+    )
+  })
+
+  terms <- purrr::discard(terms, is.null)
+
+  pm <- list()
+  pm$general$model <- class(model)[[2]]
+  pm$general$version <- 1
+  pm$general$type <- "regression"
+  pm$general$is_glm <- 1
+  pm$terms <- terms
+
+  if (inherits(model, "elnet")) {
+    pm$general$family <- "gaussian"
+    pm$general$link <- "identity"
+  } else if (inherits(model, "lognet")) {
+    pm$general$family <- "binomial"
+    pm$general$link <- "logit"
+  } else if (inherits(model, "fishnet")) {
+    pm$general$family <- "poisson"
+    pm$general$link <- "log"
+  } else {
+    cli::cli_abort(
+      "Model fit with this {.arg family} is not supported."
+    )
+  }
+
+  as_parsed_model(pm)
+}
diff --git a/R/tidymodels.R b/R/tidymodels.R
@@ -7,14 +7,37 @@ tidypredict_fit._xgb.Booster <- function(model) {
 
 #' @export
 tidypredict_fit.model_fit <- function(model) {
+  model <- glmnet_set_lambda(model)
   tidypredict_fit(model$fit)
 }
 
 #' @export
 parse_model.model_fit <- function(model) {
+  model <- glmnet_set_lambda(model)
   parse_model(model$fit)
 }
 
+# glmnet adjustment ------------------------------------------------------
+
+glmnet_set_lambda <- function(model) {
+  if (inherits(model$fit, "glmnet")) {
+    penalty <- model$spec$args$penalty
+    coef <- glmnet::predict.glmnet(
+      model$fit,
+      s = penalty,
+      type = "coefficients"
+    )
+
+    if ("(Intercept)" %in% rownames(coef)) {
+      model$fit$a0 <- coef["(Intercept)", ]
+      coef <- coef["(Intercept)" != rownames(coef), ]
+    }
+    model$fit$lambda <- penalty
+    model$fit$beta <- coef
+  }
+  model
+}
+
 # broom ------------------------------------------------------------------
 
 #' @export
diff --git a/R/tidypredict_test.R b/R/tidypredict_test.R
@@ -186,6 +186,121 @@ tidypredict_test_default <- function(
   structure(results, class = c("tidypredict_test", "list"))
 }
 
+#' @export
+tidypredict_test.glmnet <- function(
+  model,
+  df = model$model,
+  threshold = 0.000000000001,
+  include_intervals = FALSE,
+  max_rows = NULL,
+  xg_df = NULL
+) {
+  offset <- model$call$offset
+  ismodels <- paste0(colnames(model$model), collapse = " ") ==
+    paste0(colnames(df), collapse = " ")
+
+  if (!is.null(offset) && ismodels) {
+    index <- colnames(df) == "(offset)"
+    colnames(df) <- replace(colnames(df), index, as.character(offset))
+  }
+
+  interval <- "none"
+  if (include_intervals) {
+    interval <- "prediction"
+  }
+
+  if (is.numeric(max_rows)) {
+    df <- head(df, max_rows)
+  }
+
+  preds <- predict(model, as.matrix(df), interval = interval, type = "response")
+
+  if (!include_intervals) {
+    base <- data.frame(fit = as.vector(preds), row.names = NULL)
+  } else {
+    base <- as.data.frame(preds)
+  }
+
+  te <- tidypredict_to_column(
+    df,
+    model,
+    add_interval = include_intervals,
+    vars = c("fit_te", "upr_te", "lwr_te")
+  )
+  if (include_intervals) {
+    te <- te[, c("fit_te", "upr_te", "lwr_te")]
+  } else {
+    te <- data.frame(fit_te = te[, "fit_te"])
+  }
+
+  raw_results <- cbind(base, te)
+  raw_results$fit_diff <- raw_results$fit - raw_results$fit_te
+  raw_results$fit_threshold <- abs(raw_results$fit_diff) > threshold
+
+  if (include_intervals) {
+    raw_results$lwr_diff <- abs(raw_results$lwr - raw_results$lwr_te)
+    raw_results$upr_diff <- abs(raw_results$upr - raw_results$upr_te)
+    raw_results$lwr_threshold <- raw_results$lwr_diff > threshold
+    raw_results$upr_threshold <- raw_results$upr_diff > threshold
+  }
+
+  rowid <- seq_len(nrow(raw_results))
+  raw_results <- cbind(data.frame(rowid), raw_results)
+
+  threshold_df <- data.frame(fit_threshold = sum(raw_results$fit_threshold))
+  if (include_intervals) {
+    threshold_df$lwr_threshold <- sum(raw_results$lwr_threshold)
+    threshold_df$upr_threshold <- sum(raw_results$upr_threshold)
+  }
+
+  alert <- any(threshold_df > 0)
+
+  message <- paste0(
+    "tidypredict test results\n",
+    "Difference threshold: ",
+    threshold,
+    "\n"
+  )
+
+  if (alert) {
+    difference <- data.frame(fit_diff = max(raw_results$fit_diff))
+    if (include_intervals) {
+      difference$lwr_diff <- max(raw_results$lwr_diff)
+      difference$upr_diff <- max(raw_results$upr_diff)
+    }
+    message <- paste0(
+      message,
+      "\nFitted records above the threshold: ",
+      threshold_df$fit_threshold,
+      if (!is.null(threshold_df$lwr_threshold)) {
+        "\nLower interval records above the threshold: "
+      },
+      threshold_df$lwr_threshold,
+      if (!is.null(threshold_df$upr_threshold)) {
+        "\nUpper interval records above the threshold: "
+      },
+      threshold_df$upr_threshold,
+      "\n\nFit max  difference:",
+      difference$upr_diff,
+      "\nLower max difference:",
+      difference$lwr_diff,
+      "\nUpper max difference:",
+      difference$fit_diff
+    )
+  } else {
+    message <- paste0(
+      message,
+      "\n All results are within the difference threshold"
+    )
+  }
+  results <- list()
+  results$model_call <- model$call
+  results$raw_results <- raw_results
+  results$message <- message
+  results$alert <- alert
+  structure(results, class = c("tidypredict_test", "list"))
+}
+
 #' @export
 tidypredict_test.xgb.Booster <- function(
   model,
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -21,6 +21,8 @@ navbar:
       href: articles/lm.html 
     - text: Generalized Regression - glm()
       href: articles/glm.html
+    - text: Regularized Regression - glmnet()
+      href: articles/glmnet.html
     - text: Random Forest - Ranger - ranger()
       href: articles/ranger.html
     - text: Random Forest - randomForest()
diff --git a/tests/testthat/_snaps/model-glmnet.md b/tests/testthat/_snaps/model-glmnet.md
@@ -0,0 +1,63 @@
+# returns the right output
+
+    Code
+      rlang::expr_text(tf)
+    Output
+      [1] "35.3137765116027 + (cyl * -0.871451193824228) + (hp * -0.0101173960249783) + \n    (wt * -2.59443677687505)"
+
+# formulas produces correct predictions
+
+    Code
+      tidypredict_test(glmnet::glmnet(mtcars[, -1], mtcars$mpg, family = "gaussian",
+      lambda = 1), mtcars[, -1])
+    Output
+      tidypredict test results
+      Difference threshold: 1e-12
+      
+       All results are within the difference threshold
+
+---
+
+    Code
+      tidypredict_test(glmnet::glmnet(mtcars[, -8], mtcars$vs, family = "binomial",
+      lambda = 1), mtcars[, -1])
+    Output
+      tidypredict test results
+      Difference threshold: 1e-12
+      
+       All results are within the difference threshold
+
+---
+
+    Code
+      tidypredict_test(glmnet::glmnet(mtcars[, -8], mtcars$vs, family = "poisson",
+      lambda = 1), mtcars[, -1])
+    Output
+      tidypredict test results
+      Difference threshold: 1e-12
+      
+       All results are within the difference threshold
+
+# errors if more than 1 penalty is selected
+
+    Code
+      tidypredict_fit(model)
+    Condition
+      Error in `parse_model()`:
+      ! `tidypredict_fit()` requires that there are only 1 penalty selected, 79 were provided.
+
+---
+
+    Code
+      tidypredict_fit(model)
+    Condition
+      Error in `parse_model()`:
+      ! `tidypredict_fit()` requires that there are only 1 penalty selected, 2 were provided.
+
+# glmnet are handeld neatly with parsnip
+
+    Code
+      rlang::expr_text(tf)
+    Output
+      [1] "35.3140536966127 + (cyl * -0.871623418095165) + (hp * -0.0101157918502673) + \n    (wt * -2.59426484734253)"
+
diff --git a/tests/testthat/test-model-glmnet.R b/tests/testthat/test-model-glmnet.R
diff --git a/vignettes/glmnet.Rmd b/vignettes/glmnet.Rmd