From 67b6e5263c8a0e8985f9613958063e30e2b1cc8d Mon Sep 17 00:00:00 2001 From: Harsh Agrawal Date: Fri, 22 Aug 2025 01:11:12 +0530 Subject: [PATCH 1/7] added changed to read values from csv file --- base/db/R/get.trait.data.R | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/base/db/R/get.trait.data.R b/base/db/R/get.trait.data.R index b307da08c0b..e3300a6ec16 100644 --- a/base/db/R/get.trait.data.R +++ b/base/db/R/get.trait.data.R @@ -42,6 +42,41 @@ get.trait.data <- PEcAn.logger::logger.severe('At least one pft in settings is missing its "outdir"') } + #check for flatfile path if present use it + use_flatfile <- !is.null(pfts$file_path) && file.exists(pfts$file_path) + + if (use_flatfile) { + PEcAn.logger::logger.info("Using flat file for trait data instead of database") + + # Load flat file as data.frame + trait_data_flat <- read.csv(pfts$file_path, stringsAsFactors = FALSE) + + # Build trait.names from flat file if not already provided + if (is.null(trait.names)) { + pft_names <- vapply(pfts, "[[", character(1), "name") + pft_ids <- unique(trait_data_flat$pft_id[ + trait_data_flat$pft_name %in% pft_names & + trait_data_flat$pft_type == modeltype + ]) + trait.names <- unique(trait_data_flat$trait_name[ + trait_data_flat$pft_id %in% pft_ids + ]) + } + + # Call get.trait.data.pft with trait_data instead of dbcon + result <- lapply(pfts, get.trait.data.pft, + modeltype = modeltype, + dbfiles = dbfiles, + dbcon = NULL, + trait_data = trait_data_flat, + write = write, + forceupdate = forceupdate, + trait.names = trait.names) + + return(invisible(result)) + } + + dbcon <- db.open(database) on.exit(db.close(dbcon), add = TRUE) From 35b8fb6bfba7a49e563d1543050080faa52c3cd4 Mon Sep 17 00:00:00 2001 From: Harsh Agrawal Date: Thu, 28 Aug 2025 18:35:39 +0530 Subject: [PATCH 2/7] updated get.trait.data to accept input_file for file_path --- base/db/R/get.trait.data.R | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/base/db/R/get.trait.data.R b/base/db/R/get.trait.data.R index e3300a6ec16..6aa1151c843 100644 --- a/base/db/R/get.trait.data.R +++ b/base/db/R/get.trait.data.R @@ -31,7 +31,8 @@ get.trait.data <- database, forceupdate, write = FALSE, - trait.names = NULL) { + trait.names = NULL, + input_file= NULL) { if (!is.list(pfts)) { PEcAn.logger::logger.severe('pfts must be a list') @@ -43,14 +44,18 @@ get.trait.data <- } #check for flatfile path if present use it - use_flatfile <- !is.null(pfts$file_path) && file.exists(pfts$file_path) - + file_path <- input_file + if(is.null(file_path)){ + file.path <- pfts$file_path + } + use_flatfile <- !is.null(file_path) && file.exists(file_path) + if (use_flatfile) { PEcAn.logger::logger.info("Using flat file for trait data instead of database") - + # Load flat file as data.frame trait_data_flat <- read.csv(pfts$file_path, stringsAsFactors = FALSE) - + # Build trait.names from flat file if not already provided if (is.null(trait.names)) { pft_names <- vapply(pfts, "[[", character(1), "name") @@ -62,7 +67,7 @@ get.trait.data <- trait_data_flat$pft_id %in% pft_ids ]) } - + # Call get.trait.data.pft with trait_data instead of dbcon result <- lapply(pfts, get.trait.data.pft, modeltype = modeltype, From 3d0956d9a80c63d1678e58ec7139fbf3a3360d90 Mon Sep 17 00:00:00 2001 From: Chris Black Date: Mon, 22 Sep 2025 12:06:43 -0700 Subject: [PATCH 3/7] space --- base/db/R/get.trait.data.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/db/R/get.trait.data.R b/base/db/R/get.trait.data.R index 6aa1151c843..66d9cfa9025 100644 --- a/base/db/R/get.trait.data.R +++ b/base/db/R/get.trait.data.R @@ -32,7 +32,7 @@ get.trait.data <- forceupdate, write = FALSE, trait.names = NULL, - input_file= NULL) { + input_file = NULL) { if (!is.list(pfts)) { PEcAn.logger::logger.severe('pfts must be a list') From fe4ecef941500b921bddaeee61c08ae14556a3a9 Mon Sep 17 00:00:00 2001 From: Chris Black Date: Thu, 6 Nov 2025 09:54:00 -0800 Subject: [PATCH 4/7] Apply suggestions from code review --- base/db/R/get.trait.data.R | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/base/db/R/get.trait.data.R b/base/db/R/get.trait.data.R index 66d9cfa9025..b8312705479 100644 --- a/base/db/R/get.trait.data.R +++ b/base/db/R/get.trait.data.R @@ -43,18 +43,16 @@ get.trait.data <- PEcAn.logger::logger.severe('At least one pft in settings is missing its "outdir"') } - #check for flatfile path if present use it - file_path <- input_file - if(is.null(file_path)){ - file.path <- pfts$file_path - } - use_flatfile <- !is.null(file_path) && file.exists(file_path) - - if (use_flatfile) { + #check for flatfile path, if present use it + file_path <- input_file %||% pfts$file_path + if (!is.null(file_path) { + if (!file.exists(file_path) { + PEcAn.logger::logger.error("trait data file not found at specified path", sQuote(file_path)) + } PEcAn.logger::logger.info("Using flat file for trait data instead of database") # Load flat file as data.frame - trait_data_flat <- read.csv(pfts$file_path, stringsAsFactors = FALSE) + trait_data_flat <- read.csv(file_path, stringsAsFactors = FALSE) # Build trait.names from flat file if not already provided if (is.null(trait.names)) { From 0a88a28f96c7cca6f97f82956d8a6dfaa8d2ea9c Mon Sep 17 00:00:00 2001 From: Chris Black Date: Thu, 6 Nov 2025 09:56:34 -0800 Subject: [PATCH 5/7] typo --- base/db/R/get.trait.data.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/db/R/get.trait.data.R b/base/db/R/get.trait.data.R index b8312705479..c5c0d4ad1d2 100644 --- a/base/db/R/get.trait.data.R +++ b/base/db/R/get.trait.data.R @@ -45,7 +45,7 @@ get.trait.data <- #check for flatfile path, if present use it file_path <- input_file %||% pfts$file_path - if (!is.null(file_path) { + if (!is.null(file_path)) { if (!file.exists(file_path) { PEcAn.logger::logger.error("trait data file not found at specified path", sQuote(file_path)) } From f0aa21c954b118b2427f9602ba17cc9b200ad1aa Mon Sep 17 00:00:00 2001 From: Chris Black Date: Thu, 6 Nov 2025 10:33:09 -0800 Subject: [PATCH 6/7] =?UTF-8?q?typo=20again=20=F0=9F=A4=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- base/db/R/get.trait.data.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/db/R/get.trait.data.R b/base/db/R/get.trait.data.R index c5c0d4ad1d2..ee75a9f469b 100644 --- a/base/db/R/get.trait.data.R +++ b/base/db/R/get.trait.data.R @@ -46,7 +46,7 @@ get.trait.data <- #check for flatfile path, if present use it file_path <- input_file %||% pfts$file_path if (!is.null(file_path)) { - if (!file.exists(file_path) { + if (!file.exists(file_path)) { PEcAn.logger::logger.error("trait data file not found at specified path", sQuote(file_path)) } PEcAn.logger::logger.info("Using flat file for trait data instead of database") From 0f607fe9306b4a2ba430d78812aa98eec8795ced Mon Sep 17 00:00:00 2001 From: Chris Black Date: Sun, 9 Nov 2025 01:56:25 -0800 Subject: [PATCH 7/7] doc, ::, whitespace --- base/db/NAMESPACE | 1 + base/db/R/get.trait.data.R | 62 +++++++++++++++++++---------------- base/db/man/get.trait.data.Rd | 12 ++++++- 3 files changed, 46 insertions(+), 29 deletions(-) diff --git a/base/db/NAMESPACE b/base/db/NAMESPACE index 938b1cfe2d4..22594703c20 100644 --- a/base/db/NAMESPACE +++ b/base/db/NAMESPACE @@ -66,5 +66,6 @@ export(workflows) importFrom(magrittr,"%>%") importFrom(rlang,"!!!") importFrom(rlang,"!!") +importFrom(rlang,"%||%") importFrom(rlang,":=") importFrom(rlang,.data) diff --git a/base/db/R/get.trait.data.R b/base/db/R/get.trait.data.R index ee75a9f469b..377761d931f 100644 --- a/base/db/R/get.trait.data.R +++ b/base/db/R/get.trait.data.R @@ -6,7 +6,12 @@ ##' - `settings$database$bety` ##' - `settings$database$dbfiles` ##' - `settings$meta.analysis$update` -##' +##' +##' If either `input_file` or `settings$pfts$file_path` is provided, +##' it should be a valid path to a CSV (with at least columns +##' `name`, `distn`, `parama`, `paramb`, `n`) and will be used instead of +##' `database` for trait lookup. +##' ##' @param pfts the list of pfts to get traits for ##' @param modeltype type of model that is used, this is is used to distinguish ##' between different PFTs with the same name. @@ -21,39 +26,40 @@ ##' @param trait.names Character vector of trait names to search. If ##' `NULL` (default), use all traits that have a prior for at least ##' one of the `pfts`. +##' @param input_file Path to a CSV file containing prior information. +##' If specified, `database` is not used. ##' @return list of PFTs with update posteriorids ##' @author David LeBauer, Shawn Serbin, Alexey Shiklomanov +##' @importFrom rlang %||% ##' @export -get.trait.data <- - function(pfts, - modeltype, - dbfiles, - database, - forceupdate, - write = FALSE, - trait.names = NULL, - input_file = NULL) { - +get.trait.data <- function(pfts, + modeltype, + dbfiles, + database, + forceupdate, + write = FALSE, + trait.names = NULL, + input_file = NULL) { if (!is.list(pfts)) { - PEcAn.logger::logger.severe('pfts must be a list') + PEcAn.logger::logger.severe("pfts must be a list") } # Check that all PFTs have associated outdir entries - pft_outdirs <- lapply(pfts, '[[', 'outdir') + pft_outdirs <- lapply(pfts, "[[", "outdir") if (any(sapply(pft_outdirs, is.null))) { - PEcAn.logger::logger.severe('At least one pft in settings is missing its "outdir"') + PEcAn.logger::logger.severe("At least one pft in settings is missing its `outdir`") } - - #check for flatfile path, if present use it + + #check for flatfile path, if present use it file_path <- input_file %||% pfts$file_path if (!is.null(file_path)) { if (!file.exists(file_path)) { PEcAn.logger::logger.error("trait data file not found at specified path", sQuote(file_path)) } PEcAn.logger::logger.info("Using flat file for trait data instead of database") - + # Load flat file as data.frame - trait_data_flat <- read.csv(file_path, stringsAsFactors = FALSE) - + trait_data_flat <- utils::read.csv(file_path, stringsAsFactors = FALSE) + # Build trait.names from flat file if not already provided if (is.null(trait.names)) { pft_names <- vapply(pfts, "[[", character(1), "name") @@ -65,24 +71,24 @@ get.trait.data <- trait_data_flat$pft_id %in% pft_ids ]) } - + # Call get.trait.data.pft with trait_data instead of dbcon result <- lapply(pfts, get.trait.data.pft, modeltype = modeltype, dbfiles = dbfiles, - dbcon = NULL, - trait_data = trait_data_flat, + dbcon = NULL, + trait_data = trait_data_flat, write = write, forceupdate = forceupdate, trait.names = trait.names) - return(invisible(result)) + return(invisible(result)) } dbcon <- db.open(database) on.exit(db.close(dbcon), add = TRUE) - + if (is.null(trait.names)) { PEcAn.logger::logger.debug(paste0( "`trait.names` is NULL, so retrieving all traits ", @@ -93,7 +99,7 @@ get.trait.data <- # NOTE: Use `format` here to avoid implicit (incorrect) coercion # to double by `lapply`. This works fine if we switch to # `query_priors`, but haven't done so yet because that requires - # prepared statements and therefore requires the Postgres driver. + # prepared statements and therefore requires the Postgres driver. all_priors_list <- lapply(format(pft_ids, scientific = FALSE), query.priors, con = dbcon, trstr = trait.names) trait.names <- unique(unlist(lapply(all_priors_list, rownames))) @@ -101,7 +107,7 @@ get.trait.data <- # all_priors <- query_priors(pfts, params = database) # trait.names <- unique(all_priors[["name"]]) } - + # process all pfts result <- lapply(pfts, get.trait.data.pft, modeltype = modeltype, @@ -110,6 +116,6 @@ get.trait.data <- write = write, forceupdate = forceupdate, trait.names = trait.names) - + invisible(result) -} \ No newline at end of file +} diff --git a/base/db/man/get.trait.data.Rd b/base/db/man/get.trait.data.Rd index 5ff68a92a4a..151a9195504 100644 --- a/base/db/man/get.trait.data.Rd +++ b/base/db/man/get.trait.data.Rd @@ -11,7 +11,8 @@ get.trait.data( database, forceupdate, write = FALSE, - trait.names = NULL + trait.names = NULL, + input_file = NULL ) } \arguments{ @@ -35,6 +36,9 @@ BETYdb. Defaults to FALSE.} \item{trait.names}{Character vector of trait names to search. If `NULL` (default), use all traits that have a prior for at least one of the `pfts`.} + +\item{input_file}{Path to a CSV file containing prior information. +If specified, `database` is not used.} } \value{ list of PFTs with update posteriorids @@ -47,6 +51,12 @@ This will use the following items from settings: - `settings$database$dbfiles` - `settings$meta.analysis$update` } +\details{ +If either `input_file` or `settings$pfts$file_path` is provided, + it should be a valid path to a CSV (with at least columns + `name`, `distn`, `parama`, `paramb`, `n`) and will be used instead of + `database` for trait lookup. +} \author{ David LeBauer, Shawn Serbin, Alexey Shiklomanov }