diff --git a/DESCRIPTION b/DESCRIPTION index ff6d9e69..53015db3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,22 +1,24 @@ Package: neotoma2 Title: Working with the Neotoma Paleoecology Database -Date: 2025-12-12 -Version: 1.0.11 +Date: 2026-04-23 +Version: 1.0.12 Authors@R: c(person(given = "Dominguez Vidana", family = "Socorro", role = c("aut", "cre"), email = "dominguezvid@wisc.edu", - comment = structure("0000-0002-7926-4935", .Names = "ORCID")), + comment = c(ORCID="0000-0002-7926-4935")), person(given = "Simon", family = "Goring", role = c("aut"), email = "goring@wisc.edu", - comment = structure("0000-0002-2700-4605", .Names = "ORCID"))) + comment = c(ORCID="0000-0002-2700-4605"))) URL: https://github.com/NeotomaDB/neotoma2 BugReports: https://github.com/NeotomaDB/neotoma2/issues Description: Access and manipulation of data using the Neotoma Paleoecology Database. . + Examples in functions that require API access are not executed during CRAN checks. + Vignettes do not execute as to avoid API calls during CRAN checks. License: MIT + file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE) diff --git a/NEWS.md b/NEWS.md index 624a86f5..c21c623c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,16 @@ # News neotoma2 R package +## neotoma2 1.0.12 +Updated description file to match CRAN's comments. +Now follows: + comment = c(ORCID = .......) + + ## neotoma2 1.0.11 -Added flag error=True to all vignette codechunks to not run when API is down in case API comes down in between a knitting. +Set examples to \dontrun{} to avoid violating CRAN's policy that allows access to internet. +Set vignettes to `eval=FALSE` as to avoid calling APIs that require access to internet. +Set all tests that require API call to `skip on CRAN`. ## neotoma2 1.0.10 diff --git a/R/01_classDefinitions.R b/R/01_classDefinitions.R index 3b7cb16c..f0c94a21 100644 --- a/R/01_classDefinitions.R +++ b/R/01_classDefinitions.R @@ -7,8 +7,9 @@ setClassUnion("id", c("character", "integer", "numeric")) #' @name contacts_classes #' @description An unordered list of individual S4 `contact` objects. #' @export -#' @examples +#' @examples { #' new("contact", familyname = "Goring", givennames = "Simon J.") +#' } #' @returns object of class `contact` #' @aliases contact-class #' @md diff --git a/R/clean.R b/R/clean.R index 4bcd5235..d9942da1 100644 --- a/R/clean.R +++ b/R/clean.R @@ -16,7 +16,7 @@ #' * After: \{site: 1, dataset: \[1, 2\]\} #' So the site is gathered, and the datasets are now part of an #' array of datasets. -#' @examples \donttest{ +#' @examples \dontrun{ #' tryCatch({ #' alex <- get_sites(sitename = "Alex%") #' alex2 <- get_sites(24) diff --git a/R/filter.R b/R/filter.R index bdd005fa..71bbf764 100644 --- a/R/filter.R +++ b/R/filter.R @@ -49,7 +49,7 @@ #' @param .by (only used for filtering `data.frame` objects) #' @param .preserve (only used for filtering `data.frame` objects) #' @returns filtered `sites` object -#' @examples \donttest{ +#' @examples \dontrun{ #' # Download 10 sites, but only keep the sites that are close to sea level. #' tryCatch({ #' some_sites <- get_sites(sitename = "Lake%", limit = 3) diff --git a/R/get_datasets.R b/R/get_datasets.R index 56b5b459..3f670028 100644 --- a/R/get_datasets.R +++ b/R/get_datasets.R @@ -60,7 +60,7 @@ #' record. #' * `all_data` The API only downloads the first 25 records of the query. #' For the complete records, use `all_data=TRUE` -#' @examples \donttest{ +#' @examples \dontrun{ #' tryCatch({ #' random_sites <- get_sites(1) #' allds <- get_datasets(random_sites, limit=3) diff --git a/R/get_documentation.R b/R/get_documentation.R index 321bca61..9828d356 100644 --- a/R/get_documentation.R +++ b/R/get_documentation.R @@ -4,7 +4,7 @@ #' @importFrom utils browseURL #' @importFrom rlang is_interactive #' @returns NULL -#' @examples \donttest{ +#' @examples \dontrun{ #' if (interactive()) { #' get_documentation() #' } diff --git a/R/get_downloads.R b/R/get_downloads.R index e4d79823..46b32020 100644 --- a/R/get_downloads.R +++ b/R/get_downloads.R @@ -48,7 +48,7 @@ #' \item{ \code{pi list} }{P.I. info} #' \item{ \code{analyst} }{analyst info} #' \item{ \code{metadata} }{dataset metadata} -#' @examples \donttest{ +#' @examples \dontrun{ #' # To find the downloads object of dataset 24: #' tryCatch({ #' downloads24 <- get_downloads(24) diff --git a/R/get_manual.R b/R/get_manual.R index 15400d6c..95bdb4ac 100644 --- a/R/get_manual.R +++ b/R/get_manual.R @@ -3,7 +3,7 @@ #' @description Open up the Neotoma manual homepage. #' @importFrom utils browseURL #' @importFrom rlang is_interactive -#' @examples { +#' @examples \dontrun{ #' # This call does not work from `source()` calls or in testing. #' # interactive() just lets us know you are interacting with the console: #' if (interactive()) { diff --git a/R/get_publications.R b/R/get_publications.R index be2a0f49..0eb7ed4a 100644 --- a/R/get_publications.R +++ b/R/get_publications.R @@ -17,7 +17,7 @@ #' `year` The year the publication was released. #' `search` A plain text search string used to search the citation. #' @returns `publications` object -#' @examples \donttest{ +#' @examples \dontrun{ #' # How old are the papers in Neotoma that include the term "mammut"? #' tryCatch({ #' mammoth_papers <- get_publications(search="mammut") %>% diff --git a/R/get_sites.R b/R/get_sites.R index bee10f6a..3575445b 100644 --- a/R/get_sites.R +++ b/R/get_sites.R @@ -62,21 +62,11 @@ #' * `loc` An `sf` object that describes site's location. #' * `description` #' * `collunits` limited information on collunits -#' @examples -#' \donttest{ +#' @examples \dontrun{ #' ## Find sites with a min altitude of 12m and a max altitude of 25m -#' tryCatch({ -#' sites_12to25 <- get_sites(altmin=12, altmax=25) -#' }, error = function(e) { -#' message("Neotoma server not responding. Try again later.") -#' }) -#' ## Return all sites, using a minimum altitude of 2500m (returns >500 sites): -#' tryCatch({ -#' sites_2500 <- get_sites(altmin=2500, all_data = TRUE) -#' }, error = function(e) { -#' message("Neotoma server not responding. Try again later.") -#' }) -#' ## To find sites in Brazil +#' sites_12to25 <- get_sites(altmin=12, altmax=25) +#' sites_2500 <- get_sites(altmin=2500, all_data = TRUE) +#' ## To find sites in Brazil #' brazil <- '{"type": "Polygon", #' "coordinates": [[ #' [-73.125, -9.102096738726443], @@ -84,20 +74,7 @@ #' [-36.5625,-7.710991655433217], #' [-68.203125,13.923403897723347], #' [-73.125,-9.102096738726443]]]}' -#' tryCatch({ #' brazil_sites <- get_sites(loc = brazil[1]) -#' # Finding all sites with Liliaceae pollen in 1000 year bins: -#' lilysites <- c() -#' for (i in seq(0, 10000, by = 1000)) { -#' lily <- get_sites(taxa=c("Liliaceae"), -#' ageyoung = i - 500, -#' ageold = i + 500, -#' all_data = TRUE) -#' lilysites <- c(lilysites, length(lily)) -#' } -#' }, error = function(e) { -#' message("Neotoma server not responding. Try again later.") -#' }) #' } #' @md #' @export diff --git a/R/get_speleothems.R b/R/get_speleothems.R index a0aebf14..d4f41818 100644 --- a/R/get_speleothems.R +++ b/R/get_speleothems.R @@ -58,13 +58,9 @@ speleo_helper <- function(sites) { #' * `x` The unique dataset ID (integer) in Neotoma. Can be passed as a #' vector of dataset IDs. #' * `sites` A `sites` R object. -#' @examples { +#' @examples \dontrun{ #' ## Find speleothems by numeric datasetid: -#' tryCatch({ -#' speleo <- get_speleothems(c(2,5)) -#' }, error = function(e) { -#' message("Neotoma server not responding. Try again later.") -#' }) +#' speleo <- get_speleothems(c(2,5)) #' } #' @md #' @export diff --git a/R/get_stats.R b/R/get_stats.R index 58f26e75..3c747c15 100644 --- a/R/get_stats.R +++ b/R/get_stats.R @@ -21,7 +21,7 @@ #' added per month), and \code{dstypemonth} (the number of datasets added #' per dataset type per month). Default is \code{dsdbmonth}. #' @returns `data.frame` with summary statistics -#' @examples \donttest{ +#' @examples \dontrun{ #' tryCatch({ #' last_month <- get_stats(start = 0, end = 1, type = "dsdbmonth") #' }, error = function(e) { diff --git a/R/get_table.R b/R/get_table.R index 43a60472..ca546552 100644 --- a/R/get_table.R +++ b/R/get_table.R @@ -6,13 +6,9 @@ #' @param limit Default 25 records #' @param offset Default 0. #' @returns selected `table` values from the Database. -#' @examples { +#' @examples \dontrun{ #' # Returns only the first 25 specimen records. -#' tryCatch({ #' someSpec <- get_table('specimens') -#' }, error = function(e) { -#' message("Neotoma server not responding. Try again later.") -#' }) #' } #' @importFrom dplyr bind_rows #' @importFrom purrr map diff --git a/R/getids.r b/R/getids.r index 8e6d47a9..05a33270 100644 --- a/R/getids.r +++ b/R/getids.r @@ -9,13 +9,9 @@ #' @param x A Neotoma2 \code{sites} or \code{collunits} object. #' @param order sort items by `siteid`, `collunitid`, `datasetid` #' @returns `data.frame` containing `siteid`, `datasetid`, and `collunitid` -#' @examples \donttest{ -#' tryCatch({ +#' @examples \dontrun{ #' marion <- get_sites(sitename = "Marion Lake") #' collunitids <- getids(collunits(marion)) -#' }, error = function(e) { -#' message("Neotoma server not responding. Try again later.") -#' }) #' } #' @md #' @export diff --git a/R/pingNeotoma.r b/R/pingNeotoma.r index bfa6b093..9cada986 100644 --- a/R/pingNeotoma.r +++ b/R/pingNeotoma.r @@ -8,7 +8,7 @@ #' numeric port), \code{neotoma} or \code{dev}. #' @returns A valid HTTP status code or returns an error if a connection #' is refused. -#' @examples { +#' @examples \dontrun{ #' test_connection <- pingNeotoma("neotoma") #' } #' @export diff --git a/R/plotLeaflet.R b/R/plotLeaflet.R index 33e25e1e..2b180a7e 100644 --- a/R/plotLeaflet.R +++ b/R/plotLeaflet.R @@ -4,7 +4,7 @@ #' @importFrom leaflet leaflet addTiles addCircleMarkers #' @importFrom leaflet markerOptions markerClusterOptions #' @param object Sites object to plot -#' @examples \donttest{ +#' @examples \dontrun{ #' # Note that by default the limit for queries is 25 records: #' tryCatch({ #' modernSites <- get_sites(keyword = "Modern") diff --git a/R/samples.R b/R/samples.R index ee1e0e5d..927bb91e 100644 --- a/R/samples.R +++ b/R/samples.R @@ -3,13 +3,10 @@ #' @author Simon Goring \email{goring@wisc.edu} #' @param x sites object #' @description Obtain all samples within a sites object -#' @examples { -#' tryCatch({ +#' @examples \dontrun{ +#' # Get full data download from API and create a long table with samples data. #' dw <- get_downloads(1) #' pollen <- samples(dw) -#' }, error = function(e) { -#' message("Neotoma server not responding. Try again later.") -#' }) #' } #' @importFrom dplyr bind_rows left_join rename mutate #' @importFrom purrr map diff --git a/R/set_server.R b/R/set_server.R index 4d9bdbc1..ff0e2a1f 100644 --- a/R/set_server.R +++ b/R/set_server.R @@ -3,7 +3,7 @@ #' @importFrom assertthat assert_that #' @param server One of \code{local} (when the API is running locally on #' port 3005), \code{neotoma} or \code{dev}. -#' @examples \donttest{ +#' @examples \dontrun{ #' # The user is running the API locally using the node/express API #' # cloned from github: https://github.com/NeotomaDB/api_nodetest #' set_server(server = "local") diff --git a/R/site-methods.R b/R/site-methods.R index e7138c39..5a05f9be 100644 --- a/R/site-methods.R +++ b/R/site-methods.R @@ -48,13 +48,9 @@ setMethod(f = "show", #' @description Obtain one of the elements within a `sites`, #' `collectionunits`, `datasets`, etc... Neotoma objects. #' @returns sliced `site` object -#' @examples \donttest{ -#' tryCatch({ -#' some_site <- get_sites(sitename = "Site%", limit=3) -#' some_site[[2]] -#' }, error = function(e) { -#' message("Neotoma server not responding. Try again later.") -#' }) +#' @examples \dontrun{ +#' some_site <- get_sites(sitename = "Site%", limit=3) +#' some_site[[2]] #' } #' @aliases [[,sites,numeric-method #' @exportMethod [[ @@ -407,13 +403,10 @@ setMethod(f = "summary", #' @importFrom dplyr bind_rows full_join select arrange filter #' @importFrom dplyr mutate group_by row_number #' @returns `data.frame` object with DOIs information. -#' @examples { -#' tryCatch({ +#' @examples \dontrun{ +#' # Get datasets metadata from API and retrieve DOIs #' ds <- get_datasets(1) #' doi(ds) -#' }, error = function(e) { -#' message("Neotoma server not responding. Try again later.") -#' }) #' } #' @aliases doi,sites-method #' @exportMethod doi @@ -460,13 +453,9 @@ setMethod(f = "doi", #' @importFrom purrr map #' @importFrom dplyr bind_rows full_join select arrange filter #' @returns `data.frame` object with citation information. -#' @examples { -#' tryCatch({ +#' @examples \dontrun{ +#' # Get datasets metadata from API #' ds <- get_datasets(1) -#' cite_data(ds) -#' }, error = function(e) { -#' message("Neotoma server not responding. Try again later.") -#' }) #' } #' @aliases cite_data,sites-method #' @exportMethod cite_data diff --git a/R/speleothemdetails.R b/R/speleothemdetails.R index 065a94a9..5d76f3eb 100644 --- a/R/speleothemdetails.R +++ b/R/speleothemdetails.R @@ -7,16 +7,12 @@ #' @returns `data.frame` with speleothem records #' @description Obtain elements on the speleothems level #' Experimental function: API and behavior may change. -#' @examples \donttest{ -#' tryCatch({ +#' @examples \dontrun{ #' kesang <- get_sites(sitename = "Kesang cave") %>% #' get_datasets() %>% #' filter(datasettype == "pollen") %>% #' get_speleothems() #' sp <- speleothemdetails(kesang) -#' }, error = function(e) { -#' message("Neotoma server not responding. Try again later.") -#' }) #' } #' @md #' @export diff --git a/R/speleothems.R b/R/speleothems.R index 3a262ac5..b2d03488 100644 --- a/R/speleothems.R +++ b/R/speleothems.R @@ -7,13 +7,9 @@ #' @returns `data.frame` with sample records #' @description Obtain all speleothems within a sites object #' Experimental function: API and behavior may change. -#' @examples { -#' tryCatch({ +#' @examples \dontrun{ #' ds <- get_datasets(37302) #' sp <- speleothems(ds) -#' }, error = function(e) { -#' message("Neotoma server not responding. Try again later.") -#' }) #' } #' @md #' @export diff --git a/R/taxa.R b/R/taxa.R index 98fef5c6..12d803ff 100644 --- a/R/taxa.R +++ b/R/taxa.R @@ -6,15 +6,11 @@ #' @returns A \code{data.frame} reporting the taxa/data objects, units, #' elements and other features within a set of records. #' @description Extract taxonomic data from a set of sites. -#' @examples \donttest{ -#' tryCatch({ +#' @examples \dontrun{ #' somesites <- get_sites(datasettype = "diatom") %>% #' get_downloads() #' diatomtaxa <- taxa(somesites) -#' }, error = function(e) { -#' message("Neotoma server not responding. Try again later.") -#' }) -#' } +#' } #' @md #' @export setMethod(f = "taxa", diff --git a/R/toWide.R b/R/toWide.R index c68ac1c7..6c35d3dd 100644 --- a/R/toWide.R +++ b/R/toWide.R @@ -14,9 +14,7 @@ #' @param operation label or vector of operations to be chosen from: #' 'prop', 'sum', 'presence'. #' @returns wide `data.frame` obtained from long `samples` `data.frame` -#' @examples -#' \donttest{ -#' tryCatch({ +#' @examples \dontrun{ #' fc_sites <- neotoma2::get_datasets(limit=5, datasettype = "vertebrate fauna") #' fc_ds <- fc_sites %>% #' neotoma2::get_downloads() @@ -24,10 +22,7 @@ #' fc_smp <- samples(fc_dl1) #' toWide(fc_smp, ecologicalgroups=c('AVES', 'RODE'), #' elementtypes='bone/tooth', unit='present/absent') -#' }, error = function(e) { -#' message("Neotoma server not responding. Try again later.") -#' }) -#'} +#' } #' @description Obtain a wide table with information regarding of #' samples grouped by variablename and depth/age. #' @export diff --git a/codemeta.json b/codemeta.json index 452816b8..a462260d 100644 --- a/codemeta.json +++ b/codemeta.json @@ -2,12 +2,12 @@ "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "@type": "SoftwareSourceCode", "identifier": "neotoma2", - "description": "Access and manipulation of data using the Neotoma Paleoecology Database. .", + "description": "Access and manipulation of data using the Neotoma Paleoecology Database. . Examples in functions that require API access are not executed during CRAN checks. Vignettes do not execute as to avoid API calls during CRAN checks.", "name": "neotoma2: Working with the Neotoma Paleoecology Database", "codeRepository": "https://github.com/NeotomaDB/neotoma2", "issueTracker": "https://github.com/NeotomaDB/neotoma2/issues", "license": "https://spdx.org/licenses/MIT", - "version": "1.0.11", + "version": "1.0.12", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", @@ -324,7 +324,7 @@ }, "SystemRequirements": null }, - "fileSize": "9142.596KB", + "fileSize": "6728.991KB", "citation": [ { "@type": "SoftwareSourceCode", diff --git a/cran-comments.md b/cran-comments.md index fff265b9..ed4ca891 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -7,9 +7,26 @@ ----- +## DESCRIPTION modification neotoma2 1.0.12 + +DESCRIPTION Authors@R field. + +Complaints seem to be about mis-writing the ORCID info. This should +be of the form + + comment = c(ORCID = .......) + +and not + + comment(ORCID = .......) + ORCID = ........ + ## Bugfix neotoma2 1.0.11 -Added flag error=True to all vignette codechunks to not run when API is down in case API comes down in between a knitting. +Set examples to \dontrun{} to avoid violating CRAN's policy that allows access to internet. +Set vignettes to `eval=FALSE` as to avoid calling APIs that require access to internet. +Set all tests that require API call to `skip on CRAN`. + ## Bugfix neotoma2 1.0.10 diff --git a/inst/doc/neotoma2-package.html b/inst/doc/neotoma2-package.html deleted file mode 100644 index 2c939b7d..00000000 --- a/inst/doc/neotoma2-package.html +++ /dev/null @@ -1,5198 +0,0 @@ - - - - - - - - - - - - - - -The neotoma2 R Package - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

The neotoma2 R Package

- - - -
-closeup of several Neotoma sites in the Caribbean. -
closeup of several Neotoma sites in the -Caribbean.
-
-
-

Neotoma Resources

-

The Neotoma Paleoecology -Database is a domain-specific data resource containing millions of -fossil records from around the globe, covering the last 5.4 million -years. The neotoma2 R package simplifies some of the data -structures and concepts to facilitate statistical analysis and -visualization. Users may wish to gain a deeper understanding of the -resource itself, or build more complex data objects and relationships. -For those users a partial list is provided here, including a table of -code examples focusing on different geographic regions, languages and -dataset types.

-
-

Resources

- -
-
-
-

Neotoma Data Structure

-
-Three panels showing context for Neotoma’s geographic representation of sites. In panel a a site is defined by the boundaries of a lake. The site also has a bounding box, and the core location is defined by a collection unit within the site that is defined with precise coordinates. In panel b a site is defined as a single point, for example, from a textual reference indicating the site is at the intersection of two roads. Here the site and collection unit share the unique point location. In panel c we show how that site location may be obfuscated using a bounding box as the site delimiter. In this case the collection unit would not be defined (but is represented as the triangle for illustration). Figure obtained from the Neotoma Database Manual. -
Three panels showing context for Neotoma’s -geographic representation of sites. In panel a a site is defined by the -boundaries of a lake. The site also has a bounding box, and the core -location is defined by a collection unit within the site that is defined -with precise coordinates. In panel b a site is defined as a single -point, for example, from a textual reference indicating the site is at -the intersection of two roads. Here the site and collection unit share -the unique point location. In panel c we show how that site location may -be obfuscated using a bounding box as the site delimiter. In this case -the collection unit would not be defined (but is represented as the -triangle for illustration). Figure obtained from the Neotoma Database -Manual.
-
-

Data in Neotoma is associated with sites, specific locations with -lat/long coordinates. Within a site, there may be one or more collection -units – locations at which samples are physically collected -within the site. For example, an archaeological site -may have one or more collection units, pits within a -broader dig site; a pollen sampling site on a lake may -have multiple collection units – core sites within the -lake basin. Collection units may have higher resolution GPS locations, -but are considered to be part of the broader site. Within a -collection unit data is collected at various -[analysis units] from which samples -are obtained.

-

Because Neotoma is made up of a number of constituent databases -(e.g., the Indo-Pacific Pollen Database, NANODe, FAUNMAP), a set of -samples associated with a collection -unit are assigned to a single dataset -associated with a particular dataset type (e.g., -pollen, diatom, vertebrate fauna) and constituent -database.

-
-Figure. The structure of sites, collection units and datasets within Neotoma. A site contains one or more collection units. Chronologies are associated with collection units. Data of a common type (pollen, diatoms, vertebrate fauna) are assigned to a dataset. -
Figure. The structure of -sites, collection units and datasets within Neotoma. A site contains one -or more collection units. Chronologies are associated with collection -units. Data of a common type (pollen, diatoms, vertebrate fauna) are -assigned to a dataset.
-
-

Researchers often begin by searching for sites within a particular -study area, whether that is defined by geographic or political -boundaries. From there they interrogate the available datasets for their -particular dataset type of interest. When they find records of interest, -they will then often call for the data and associated chronologies.

-

The neotoma2 R package is intended to act as the -intermediary to support these research activities using the Neotoma -Paleoecology Database. Because R is not a relational database, we needed -to modify the data structures of the objects. To do this the package -uses a set of S4 objects to represent different elements within the -database.

-
-A diagram showing the different major classes within the neotoma2 R package, and the way the elements are related to one another. Individual boxes represent the major classes (sites, site, collectionunits, etc.). Each box then has a list of the specific metadata contained within the class, and the variable type (e.g., siteid: integer). Below these are the functions that can be applied to the object (e.g., [[<-). -
A diagram showing the different major classes -within the neotoma2 R package, and the way the elements are -related to one another. Individual boxes represent the major classes -(sites, site, collectionunits, etc.). Each box then has a list of the -specific metadata contained within the class, and the variable type -(e.g., siteid: integer). Below these are the functions that can -be applied to the object (e.g., [[<-).
-
-

It is important to note, here and elsewhere: Almost -everything you will interact with is a sites -object. A sites object is the general currency of -this package. sites may have more or less metadata -associated with them, but they are the primary object, and, as you can -see in the diagram above, they have the most functions associated with -them.

-
-

Package Requirements

-

The earlier neotoma package tried to use base R as much -as possible. The neotoma2 package now draws primarily on -dplyr and purrr packages from the -tidyverse, and on the sf spatial data package. -The choice to integrate tidyverse packages was made largely -because of the current ubiquity of the tidyverse in R -education.

-
-
-
-

Site Searches

-

The highest level object in Neotoma is the site. -Sites have spatial coordinates and, in many cases, additional metadata -related to lake parameters, or other site-specific properties.

-

Sites can be searched using the get_sites() function, -or, can be created using the set_site() function. A single -site object is a special object in R, that can be combined -with other sites into a sites object. A sites -object is effectively a list() of site objects -with special methods for printing, plotting and exporting -information.

-
-

Finding Sites

-

All sites in Neotoma have a unique numeric identifier. With the -neotoma2 package you can search for a site using the -get_sites() function by its unique site id -(siteid), by name (sitename), by altitude -(altmin, altmax), by geopolitical name -(gpid), location (loc) or age bounds.

-

If we’re looking for a site and we know its specific identifier, we -can use the simplest implementation of get_sites(). Here we -are searching for a site (Alexander Lake), where we know that the siteid -for the record in Neotoma is 24. We can get these siteids -using the Neotoma -Explorer web application, or if we have some familiarity with the -site records already.

-
# Search for site by a single numeric ID:
-alex <- get_sites(24)
-alex
-#>  siteid       sitename      lat      long altitude
-#>      24 Alexander Lake 53.33333 -60.58333       73
-
-# Search for sites with multiple IDs using c():
-multiple_sites <- get_sites(c(24, 47))
-multiple_sites
-#>  siteid       sitename      lat      long altitude
-#>      24 Alexander Lake 53.33333 -60.58333       73
-#>      47        Liberty 43.52000 -90.78000      353
-

Once you search for a site, the neotoma2 R package makes -a call to the Neotoma Database, and returns a structured -sites object that contains metadata about the sites, and -some additional metadata about collection units and datasets at those -sites. This limited metadata helps speed up further searches, but is not -complete, for the purposes of analysis.

-
-The result of a hypothetical get_sites() call, is a sites object containing two individual site objects. Each site object contains a collunits object with some limited metadata. The top site appears to have two collection units, while the lower site has only a single collection unit. Each of the top two collection units appear to contain two datasets, while the bottom site has only the one collection unit with only one dataset. -
The result of a hypothetical -get_sites() call, is a sites object containing -two individual site objects. Each site object -contains a collunits object with some limited metadata. The -top site appears to have two collection units, while the -lower site has only a single collection unit. Each of the top two -collection units appear to contain two datasets, while the bottom site -has only the one collection unit with only one dataset.
-
-
-

Searching for Sites by Name

-

Often we do not know the particular siteid. If we’re -looking for a site and we know its name or a part of its name, we can -search using the function with the sitename argument, -get_site(sitename = 'XXX'), where 'XXX' is the -site name. This does not support multiple text strings (i.e., you can’t -use c()).

-
alex <- get_sites(sitename = "Alexander Lake")
-alex
-#>  siteid       sitename      lat      long altitude
-#>      24 Alexander Lake 53.33333 -60.58333       73
-

Neotoma uses a Postgres Database to manage data. Postgres uses the -% sign as a general wildcard, so we can use the -% in the sitename argument operator to help us -find sites when we’re not sure the exact match. Note that the search is -case insensitive so a search for alex% or -Alex% will return the same results.

-
alex <- get_sites(sitename = 'Alex%')
-alex
-#>  siteid           sitename      lat      long altitude
-#>      24     Alexander Lake 53.33333 -60.58333       73
-#>      25        Alexis Lake 52.51667 -57.03333      193
-#>    4478 Alexander [3CN117] 35.25000 -92.61667      180
-#>   26226     Alexandra Lake 43.29030 -74.16966      351
-

Since this new sites object has 4. elements that belong -to site, we may want to access only one of the objects, or -sets of metadata from our variable alex.

-
-
-

Searching for Sites by Age

-

There are several ways of searching for sites using age parameters. -These are represented below:

-
-Site searches using age parameters including ageof, ageyoung, ageold, maxage and minage. -
Site searches using age parameters including -ageof, ageyoung, ageold, -maxage and minage.
-
-

We offer several methods of searching because different users have -different requirements. A user might be only interested in one specific -point in time in the past, for example the 8.2ka event. In this instance -they would search get_sites(ageof = 8200). They may want -sites with records that completely span a time period, for example the -Atlantic chronozone of the Holocene: -get_sites(ageyounger = 5000, ageolder = 8000). These sites -would have samples both within and outside the defined age range, so -that the user could track change into and out of the time period. A user -may also be interested in any record within a time bin, regardless of -whether the site spans that time zone or not. They would query -get_sites(minage = 5000, maxage = 8000).

-

We can see how these age bounds differ:

-
# Note, we are using the `all_data = TRUE` flag here to avoid the default limit of 25 records, discussed below.
-# Because these queries are searching through every record they are slow and and are not
-# run in knitting this vignette.
-get_sites(ageof = 8200, all_data = TRUE) %>% length()
-get_sites(ageyounger = 5000, ageolder = 8000, all_data = TRUE) %>% length()
-get_sites(minage = 5000, maxage = 8000, all_data = TRUE) %>% length()
-

It is possible to pass all parameters (ageof, -minage, maxage, ageyounger, . . . -), but it is likely that these will conflict and result in an empty set -of records. To avoid this, be aware of the relationships among these -search parameters, and how they might affect your search window.

-
-
-
-

Accessing sites metadata

-

Although the sites are structured using S4 objects (see -Hadley Wickham’s S4 -documentation), we’ve added helper functions to make accessing -elements easier for users.

-

The alex object is composed of several smaller objects -of class site. We can call any individual site using -[[ ]], placing the index of the desired object between the -brackets. Then we can also call the particular variable we want using -the $ symbol.

-
alex <- get_sites(sitename = "Alexander Lake")
-alex[[1]]$siteid
-#> [1] 24
-

The elements within a site are the same as the defined -columns within the Neotoma ndb.sites -table, with the exception of the collunits slot, which -contains the collection units and associated datasets that are found -within a site. You can see all the site slots using the -names() function. You can select individual elements of a -site, and you can assign values to these parameters:

-
names(alex[[1]])
-#> [1] "siteid"       "sitename"     "geography"    "altitude"     "geopolitical"
-#> [6] "area"         "notes"        "description"  "collunits"
-
-# Modify a value using $<- assignment:
-alex[[1]]$area
-#> [1] NA
-alex[[1]]$area <- 100
-alex[[1]]$area
-#> [1] 100
-
-# Modify a value using [<- assignment:
-alex[[1]]["area"] <- 30
-alex[[1]]$area
-#> [1] 30
-# alex[[1]][7] <- 30  This fails because the `Notes` field expects a character string.
-

Using assignment, we can add information programmatically, for -example, by working interactively with a digital elevation model or -hydrographic data to obtain lake area measurements. Although not -currently implemented, the goal is to support direct upload of updated -information by users.

-
-
-

Creating a Site

-

As explained above, a site is the fundamental unit of -the Neotoma Database. If you are working with your own data, you might -want to create a site object to allow it to interact with -other data within Neotoma. You can create a site with the -set_site() function. It will ask you to provide important -information such as sitename, lat, and -long attributes.

-
my_site <- set_site(sitename = "My Lake", 
-                    geography = st_sf(a = 3, st_sfc(st_point(1:2))), 
-                    description = "my lake", 
-                    altitude = 30)
-my_site
-#>    siteid sitename lat long altitude
-#>  35960541  My Lake   2    1       30
-

If we have a set of sites that we are analyzing, we can add the new -site to the set of sites, either by appending it to the end, using -c(), or by replacing a particular element using -[[<-.

-

This method allows us to begin modifying site information for -existing sites if we have updated knowledge about site properties.

-
# Add a new site that's been edited using set_site()
-longer_alex <- c(alex, my_site)
-# Or replace an element within the existing list of sites
-# with the newly created site.
-longer_alex[[2]] <- my_site
-
-# Or append to the `sites` list with assignment:
-longer_alex[[3]] <- my_site
-

We can also use set_sites() as a tool to update the -metadata associated with an existing site object:

-
# Update a value within an existing `sites` object:
-longer_alex[[3]] <- set_site(longer_alex[[3]],
-altitude = 3000)
-longer_alex
-
-
-
-

Datasets

-

If you need to get to a deeper level of the sites object, you may -want to look at the get_datasets() function. You can use -get_datasets() using search parameters, or you can use it -on an existing sites object, such as our prior -alex dataset.

-

get_datasets() adds additional metadata to the -site objects, letting us know which -datasettypes are associated with a site, and the dataset -sample locations at the site.

-
-Using get_datasets() provides more complete metadata about a record, including the addition of chronological information, and more complete metadata about the datasets, compared to the get_sites() call, shown above. The objects here are the same as above, but now have chronology metadata, and contact metadata for the records. Note that there is still no sample or taxonomic information about these records. This comes from the get_downloads() function. -
Using get_datasets() provides more -complete metadata about a record, including the addition of -chronological information, and more complete metadata about the -datasets, compared to the get_sites() call, shown above. -The objects here are the same as above, but now have chronology -metadata, and contact metadata for the records. Note that there is still -no sample or taxonomic information about these records. This comes from -the get_downloads() function.
-
-

Getting the datasets by id is the easiest call, you can also pass a -vector of IDs or, if you already have a sites object, you -can pass a sites object.

-
# Getting datasets by ID
-my_datasets <- get_datasets(c(5, 10, 15, 20))
-my_datasets
-#>  siteid                   sitename       lat      long altitude
-#>       5                       17/2  55.25000 -74.93333      300
-#>      10 Site 1 (Cohen unpublished)  30.83000 -82.33000       36
-#>      15                    Aguilar -23.83333 -65.75000     3828
-#>      20                   Akuvaara  69.12326  27.67406      159
-

You can also retrieve datasets by type directly from the API.

-
# Getting datasets by type
-my_pollen_datasets <- get_datasets(datasettype = "pollen", limit = 25)
-my_pollen_datasets
-#>  siteid                            sitename       lat       long altitude
-#>       7                     Three Pines Bog  47.00000  -80.11667      329
-#>       8                 Abalone Rocks Marsh  33.95639 -119.97667        9
-#>       9                              Adange  43.30556   41.33333     2065
-#>      11        Konus Exposure, Adycha River  67.75000  135.58333      137
-#>      12                       Ageröds Mosse  55.93329   13.42559       47
-#>      13                     Aguas Calientes -23.08333  -67.40000     4233
-#>      14                   Aguas Calientes 2 -23.50000  -67.58333     4198
-#>      15                             Aguilar -23.83333  -65.75000     3828
-#>      16                           Ahlenmoor  53.69908    8.74688        5
-#>      17                               Ajata -18.25000  -69.20000     4773
-#>      18                    South Soefje Bog  29.60000  -97.51694      100
-#>      19             Akulinin Exposure P1282  47.11667  138.55000      367
-#>      20                            Akuvaara  69.12326   27.67406      159
-#>      21 Alazeya River Exposure, 8 m Terrace  68.50000  154.50000       50
-#>      22 Alazeya River Exposure, 9 m Terrace  64.33333  154.50000      125
-#>      24                      Alexander Lake  53.33333  -60.58333       73
-#>      25                         Alexis Lake  52.51667  -57.03333      193
-#>      27                          Aliuk Pond  54.58333  -57.36667        9
-#>      29                          Lake Allie  44.80156  -94.55982      320
-#>      30                         Almora Lake  46.20611  -95.29361      437
-#>      31                           Alut Lake  60.13667  152.31278      488
-#>      32                             Amarete -15.23333  -68.98333     3755
-#>      33             Amba River Exposure 596  43.31667  131.81667        0
-#>      68     Amguema River Valley Exposure 1  67.75000  178.70000      493
-#>      69     Amguema River Valley Exposure 2  67.66667  178.60000      376
-

It can be computationally intensive to obtain the full set of records -for sites or datasets. By default the -limit for all queries is 25. The default -offset is 0. To capture all results we can use -the all_data = TRUE flag in our calls. -However, this is hard on the Neotoma servers. We tend -to prefer that users use all_data = TRUE once their -analytic workflow is mostly complete.

-

We can use that all_data = TRUE in R in the following -way:

-
allSites_dt <- get_sites(datasettype = "diatom")
-allSites_dt_all <- get_sites(datasettype = "diatom", all_data = TRUE)
-
-# Because we used the `all_data = TRUE` flag, there will be more sites
-# in allSites_dt_all, because it represents all sites containing diatom datasets.
-length(allSites_dt_all) > length(allSites_dt)
-
-

Spatial Searches

-

You can get the coordinates to create a GeoJson bounding box from here, or you can use -pre-existing objects within R, for example, country-level data within -the spData package:

-

Accessing datasets by bounding box:

-
brazil <- '{"type": "Polygon", 
-            "coordinates": [[
-                [-73.125, -9.102],
-                [-56.953, -33.138],
-                [-36.563, -7.711],
-                [-68.203, 13.923],
-                [-73.125, -9.102]
-              ]]}'
-
-# We can make the geojson a spatial object if we want to use the
-# functionality of the `sf` package.
-brazil_sf <- geojsonsf::geojson_sf(brazil)
-
-
-brazil_datasets <- get_datasets(loc = brazil_sf)
-

Now we have an object called brazil_datasets that -contains 19.

-

You can plot these findings!

-
plotLeaflet(brazil_datasets)
-
- -
-
-
-

Filtering Records

-

Sometimes we take a large number of records, do some analysis, and -then choose to select a subset. For example, we may want to select all -sites in a region, and then subset those by dataset type. If we want to -look at only the geochronological datasets from Brazil, we can start -with the set of records returned from our get_datasets() -query, and then use the filter function in -neotoma2 to select only those datasets that are -geochronologic:

-

-brazil_dates <- neotoma2::filter(brazil_datasets,
-                                   datasettype == "geochronologic")
-
-# or:
-brazil_dates <- brazil_datasets %>%
-    neotoma2::filter(datasettype == "geochronologic")
-
-# With boolean operators:
-brazil_space <- brazil_datasets %>% neotoma2::filter(lat > -18 & lat < -16)
-

The filter() function takes as the first argument, a -datasets object, followed by the criteria we want to use to filter. -Current supported criteria includes:

-
    -
  • lat
  • -
  • long
  • -
  • elev
  • -
  • datasettype
  • -
-

You also need to make sure that you accompany any of these terms with -the following boolean operators: <, > or -==, !=. datasettype has to be of -type string, while the other terms must be numeric. If you need to -filter by the same argument, let’s say, you need to filter -“geochronologic” and “pollen data types, then you will also make use of -& and | operators.

-
-
-

Sample and Taxonomic data

-

Once we have the set of records we wish to examine, we then want to -recover the actual sample data. This will provide us with information -about the kinds of elements found at the site, within the dataset, their -sample ages, and their counts or measurements. To do this we use the -get_downloads() call. Note, as before, we are returning a -sites objects, but this time with the most complete -metadata.

-
-Using get_downloads() returns a sites object, but one that contains dataset objects with filled samples slots. The samples slot is often very large relative to the other metadata associated with sites, and so it is commonly held back until a direct request is provided. Helper functions at the sites level can pull out sample data once get_downloads() has been called. -
Using get_downloads() returns a -sites object, but one that contains dataset objects with -filled samples slots. The samples slot is -often very large relative to the other metadata associated with -sites, and so it is commonly held back until a direct -request is provided. Helper functions at the sites level -can pull out sample data once get_downloads() -has been called.
-
-

Assuming we continue with our example from Brazil, we want to extract -records from the country, filter to only pollen records with samples -covering the last 10,000 years, and then look at the relative frequency -of taxa across sites. We might do something like this:

-
brazil <- '{"type": "Polygon", 
-            "coordinates": [[
-                [-73.125, -9.102],
-                [-56.953, -33.138],
-                [-36.563, -7.711],
-                [-68.203, 13.923],
-                [-73.125, -9.102]
-              ]]}'
-
-# We can make the geojson a spatial object if we want to use the
-# functionality of the `sf` package.
-brazil_sf <- geojsonsf::geojson_sf(brazil)
-
-brazil_records <- get_datasets(loc = brazil_sf, all_data=TRUE) %>%
-    neotoma2::filter(datasettype == "pollen" & age_range_young <= 1000 & age_range_old >= 10000) %>%
-    get_downloads()
-
-
-count_by_site <- samples(brazil_records) %>%
-  dplyr::filter(elementtype == "pollen" & units == "NISP") %>%
-  group_by(siteid, variablename) %>%
-  summarise(n = n()) %>%
-  group_by(variablename) %>%
-  summarise(n = n()) %>%
-  arrange(desc(n))
-#> `summarise()` has grouped output by 'siteid'. You can override using the
-#> `.groups` argument.
-

In this code chunk we define the bounding polygon for our sites, -filter by time and dataset type, and then return the full records for -those sites. We get a sites object with dataset and sample -information (because we used get_downloads()). We execute -the samples() function to extract all the samples from the -sites objects, and then filter the resulting -data.frame to pull only pollen (a pollen dataset may -contain spores and other elements that are not, strictly speaking, -pollen) that are counted using the number of identified specimens (or -NISP). We then group_by() the unique site identifiers -(siteid) and the taxa (variablename) to get a -count of the number of times each taxon appears in each site. We then -want to summarize() to a higher level, just trying to -understand how many sites each taxon appears in. After that we -arrange() so that the records show the most common taxa -first in the resulting variable count_by_site.

-
-
-

Publications

-

Many Neotoma records have publications associated with them. The -publication object (and the publications -collection) provide the opportunity to do this. The publication -table in Neotoma contains an extensive number of fields. The methods for -publications in the neotoma2 package provide -us with tools to retrieve publication data from Neotoma, to set and -manipulate publication data locally, and to retrieve publication data -from external sources (e.g., using a DOI).

-
-

get_publications() from Neotoma

-

The most simple case is a search for a publication based on one or -more publication IDs. Most people do not know the unique publication ID -of individual articles, but this provides a simple method to highlight -the way Neotoma retrieves and presents publication information.

-
-

Get Publication By ID

-

We can use a single publication ID or multiple IDs. In either case -the API returns the publication(s) and creates a new -publications object (which consists of multiple individual -publications).

-
one <- get_publications(12)
-two <- get_publications(c(12, 14))
-

From there we can then then subset and extract elements from the list -using the standard [[ format. For example:

-
two[[2]]
-

Will return the second publication in the list, corresponding to the -publication with publicationid 14 in this case.

-
- -
-

Create (or Import) New Publications

-

Just as we can use the set_sites() function to set new -site information, we can also create new publication information using -set_publications(). With set_publications() -you can enter as much or as little of the article metadata as you’d -like, but it’s designed (in part) to use the CrossRef API to return -information from a DOI.

-
new_pub <- set_publications(
-articletitle = "Myrtle Lake: a late- and post-glacial pollen diagram from northern Minnesota",
-journal = "Canadian Journal of Botany",
-volume = 46)
-

A publication has a large number of slots that can be -defined. These may be left blank, they may be set directly after the -publication is defined:

-
new_pub@pages <- "1397-1410"
-
-
-
-
-

Workshops and Code Examples

-
    -
  • 2022 International AL/IPA Meeting; Bariloche, Argentina
  • -
  • English -Language Simple Workflow
  • -
  • Topics: Simple search, climate gradients, stratigraphic -plotting
  • -
  • Spatial Domain: South America
  • -
  • Dataset Types: Diatoms
  • -
  • Spanish -Language Simple Workflow
  • -
  • Topics: Simple search, climate gradients, stratigraphic -plotting
  • -
  • Spatial Domain: South America
  • -
  • Dataset Types: Diatoms
  • -
  • English -Language Complex Workflow
  • -
  • Topics: Chronology building, Bchron
  • -
  • Spatial Domain: South America
  • -
  • Dataset Types: Diatoms
  • -
  • Spanish -Language Complex Workflow
  • -
  • Topics: Chronology building, Bchron
  • -
  • Spatial Domain: South America
  • -
  • Dataset Types: Diatoms
  • -
  • 2022 European Pollen Database Meeting; Prague, Czech Republic
  • -
  • English -Language Simple Workflow
  • -
  • Topics: Simple search, climate gradients, stratigraphic plotting, -taxonomic harmonization
  • -
  • Spatial Domain: Europe/Czech Republic
  • -
  • Dataset Types: Pollen
  • -
  • English -Language Complex Workflow
  • -
  • Topics: Chronology building, Bchron
  • -
  • Spatial Domain: Europe/Czech Republic
  • -
  • Dataset Types: Pollen
  • -
  • 2022 American Quaternary Association Meeting
  • -
  • English -Language Simple Workflow
  • -
  • Topics: Simple search, climate gradients, stratigraphic -plotting
  • -
  • Spatial Domain: North America
  • -
  • Dataset Types: Pollen
  • -
  • English -Language Complex Workflow
  • -
  • Topics: Chronologies
  • -
  • Spatial Domain: North America
  • -
  • Dataset Types: Pollen
  • -
  • Neotoma-charcoal Workshop, Göttingen, Germany. Authors: Petr Kuneš -& Thomas Giesecke
  • -
  • English -Language Workflow
  • -
  • Topics: Simple Search, PCA, DCA, Charcoal/Pollen Correlation
  • -
  • Spatial Domain: Global/Czech Republic
  • -
  • Dataset Types: Pollen, Charcoal
  • -
-
- - - - - - - - - - - diff --git a/man/cite_data-sites-method.Rd b/man/cite_data-sites-method.Rd index 546f6723..3aecf3fe 100644 --- a/man/cite_data-sites-method.Rd +++ b/man/cite_data-sites-method.Rd @@ -18,12 +18,8 @@ Given complete dataset objects in Neotoma (must have used citation for the record, including the dataset DOI. } \examples{ -{ -tryCatch({ +\dontrun{ +# Get datasets metadata from API ds <- get_datasets(1) -cite_data(ds) -}, error = function(e) { -message("Neotoma server not responding. Try again later.") -}) } } diff --git a/man/clean.Rd b/man/clean.Rd index fc17a981..c74756ae 100644 --- a/man/clean.Rd +++ b/man/clean.Rd @@ -39,7 +39,7 @@ array of datasets. } } \examples{ -\donttest{ +\dontrun{ tryCatch({ alex <- get_sites(sitename = "Alex\%") alex2 <- get_sites(24) diff --git a/man/contacts_classes.Rd b/man/contacts_classes.Rd index 8fb1b811..084d8bc5 100644 --- a/man/contacts_classes.Rd +++ b/man/contacts_classes.Rd @@ -14,5 +14,7 @@ object of class \code{contact} An unordered list of individual S4 \code{contact} objects. } \examples{ +{ new("contact", familyname = "Goring", givennames = "Simon J.") } +} diff --git a/man/doi-sites-method.Rd b/man/doi-sites-method.Rd index 77a9ab02..6e3d9a7b 100644 --- a/man/doi-sites-method.Rd +++ b/man/doi-sites-method.Rd @@ -18,12 +18,9 @@ Given complete dataset objects in Neotoma (must have used DOI for the record. } \examples{ -{ -tryCatch({ +\dontrun{ +# Get datasets metadata from API and retrieve DOIs ds <- get_datasets(1) doi(ds) -}, error = function(e) { -message("Neotoma server not responding. Try again later.") -}) } } diff --git a/man/filter.Rd b/man/filter.Rd index 38533df9..a1490566 100644 --- a/man/filter.Rd +++ b/man/filter.Rd @@ -71,7 +71,7 @@ depositional environment. } } \examples{ -\donttest{ +\dontrun{ # Download 10 sites, but only keep the sites that are close to sea level. tryCatch({ some_sites <- get_sites(sitename = "Lake\%", limit = 3) diff --git a/man/get_datasets.Rd b/man/get_datasets.Rd index 79a12e29..a823f465 100644 --- a/man/get_datasets.Rd +++ b/man/get_datasets.Rd @@ -88,7 +88,7 @@ For the complete records, use \code{all_data=TRUE} } } \examples{ -\donttest{ +\dontrun{ tryCatch({ random_sites <- get_sites(1) allds <- get_datasets(random_sites, limit=3) diff --git a/man/get_documentation.Rd b/man/get_documentation.Rd index 1f2c8eca..f36e1e9a 100644 --- a/man/get_documentation.Rd +++ b/man/get_documentation.Rd @@ -10,7 +10,7 @@ get_documentation() Open up the Neotoma R homepage. } \examples{ -\donttest{ +\dontrun{ if (interactive()) { get_documentation() } diff --git a/man/get_downloads.Rd b/man/get_downloads.Rd index 369a88ea..7e750bc8 100644 --- a/man/get_downloads.Rd +++ b/man/get_downloads.Rd @@ -71,7 +71,7 @@ For the complete records, use \code{all_data=TRUE} } } \examples{ -\donttest{ +\dontrun{ # To find the downloads object of dataset 24: tryCatch({ downloads24 <- get_downloads(24) diff --git a/man/get_manual.Rd b/man/get_manual.Rd index 33f19784..e0213852 100644 --- a/man/get_manual.Rd +++ b/man/get_manual.Rd @@ -13,7 +13,7 @@ NULL side effect for opening browser with the manual Open up the Neotoma manual homepage. } \examples{ -{ +\dontrun{ # This call does not work from `source()` calls or in testing. # interactive() just lets us know you are interacting with the console: if (interactive()) { diff --git a/man/get_publications.Rd b/man/get_publications.Rd index bca472a6..efa11cd6 100644 --- a/man/get_publications.Rd +++ b/man/get_publications.Rd @@ -42,7 +42,7 @@ Uses the Neotoma API to search and access information about publications associated with data in the Neotoma Paleoecology Database } \examples{ -\donttest{ +\dontrun{ # How old are the papers in Neotoma that include the term "mammut"? tryCatch({ mammoth_papers <- get_publications(search="mammut") \%>\% diff --git a/man/get_sites.Rd b/man/get_sites.Rd index 50678692..34d375c4 100644 --- a/man/get_sites.Rd +++ b/man/get_sites.Rd @@ -88,20 +88,11 @@ and datasets located at that site. } } \examples{ -\donttest{ +\dontrun{ ## Find sites with a min altitude of 12m and a max altitude of 25m -tryCatch({ - sites_12to25 <- get_sites(altmin=12, altmax=25) -}, error = function(e) { - message("Neotoma server not responding. Try again later.") -}) -## Return all sites, using a minimum altitude of 2500m (returns >500 sites): -tryCatch({ - sites_2500 <- get_sites(altmin=2500, all_data = TRUE) -}, error = function(e) { - message("Neotoma server not responding. Try again later.") -}) - ## To find sites in Brazil +sites_12to25 <- get_sites(altmin=12, altmax=25) +sites_2500 <- get_sites(altmin=2500, all_data = TRUE) +## To find sites in Brazil brazil <- '{"type": "Polygon", "coordinates": [[ [-73.125, -9.102096738726443], @@ -109,20 +100,7 @@ brazil <- '{"type": "Polygon", [-36.5625,-7.710991655433217], [-68.203125,13.923403897723347], [-73.125,-9.102096738726443]]]}' -tryCatch({ brazil_sites <- get_sites(loc = brazil[1]) -# Finding all sites with Liliaceae pollen in 1000 year bins: -lilysites <- c() -for (i in seq(0, 10000, by = 1000)) { - lily <- get_sites(taxa=c("Liliaceae"), - ageyoung = i - 500, - ageold = i + 500, - all_data = TRUE) - lilysites <- c(lilysites, length(lily)) -} -}, error = function(e) { - message("Neotoma server not responding. Try again later.") -}) } } \author{ diff --git a/man/get_speleothems.Rd b/man/get_speleothems.Rd index def9d9b4..f9702f42 100644 --- a/man/get_speleothems.Rd +++ b/man/get_speleothems.Rd @@ -36,13 +36,9 @@ vector of dataset IDs. } } \examples{ -{ +\dontrun{ ## Find speleothems by numeric datasetid: -tryCatch({ - speleo <- get_speleothems(c(2,5)) -}, error = function(e) { - message("Neotoma server not responding. Try again later.") -}) +speleo <- get_speleothems(c(2,5)) } } \author{ diff --git a/man/get_stats.Rd b/man/get_stats.Rd index 2bf5f1e3..ba80d05d 100644 --- a/man/get_stats.Rd +++ b/man/get_stats.Rd @@ -36,7 +36,7 @@ information about the overall number of sites/datasets (using an arbitrarily high value for \code{end}). } \examples{ -\donttest{ +\dontrun{ tryCatch({ last_month <- get_stats(start = 0, end = 1, type = "dsdbmonth") }, error = function(e) { diff --git a/man/get_table.Rd b/man/get_table.Rd index 03b0b96a..66a341ab 100644 --- a/man/get_table.Rd +++ b/man/get_table.Rd @@ -22,12 +22,8 @@ Call Neotoma and return a table (with limits & offsets for large tables) } \examples{ -{ +\dontrun{ # Returns only the first 25 specimen records. -tryCatch({ someSpec <- get_table('specimens') -}, error = function(e) { - message("Neotoma server not responding. Try again later.") -}) } } diff --git a/man/getids.Rd b/man/getids.Rd index 393b31e5..6f01db3b 100644 --- a/man/getids.Rd +++ b/man/getids.Rd @@ -32,13 +32,9 @@ This function parses a site object, from \code{site} to site, collectionunit and dataset IDs for each element within the site. } \examples{ -\donttest{ -tryCatch({ +\dontrun{ marion <- get_sites(sitename = "Marion Lake") collunitids <- getids(collunits(marion)) -}, error = function(e) { - message("Neotoma server not responding. Try again later.") -}) } } \author{ diff --git a/man/pingNeotoma.Rd b/man/pingNeotoma.Rd index 38701bbb..801e4ba1 100644 --- a/man/pingNeotoma.Rd +++ b/man/pingNeotoma.Rd @@ -19,7 +19,7 @@ A quick function to test whether or not the Neotoma Database API is currently running. } \examples{ -{ +\dontrun{ test_connection <- pingNeotoma("neotoma") } } diff --git a/man/plotLeaflet-sites-method.Rd b/man/plotLeaflet-sites-method.Rd index 8fe958cc..4be5e233 100644 --- a/man/plotLeaflet-sites-method.Rd +++ b/man/plotLeaflet-sites-method.Rd @@ -16,7 +16,7 @@ Plot sites on a leaflet map } \examples{ -\donttest{ +\dontrun{ # Note that by default the limit for queries is 25 records: tryCatch({ modernSites <- get_sites(keyword = "Modern") diff --git a/man/samples-sites-method.Rd b/man/samples-sites-method.Rd index 305aa752..35dadaf1 100644 --- a/man/samples-sites-method.Rd +++ b/man/samples-sites-method.Rd @@ -16,13 +16,10 @@ Obtain all samples within a sites object } \examples{ -{ -tryCatch({ +\dontrun{ +# Get full data download from API and create a long table with samples data. dw <- get_downloads(1) pollen <- samples(dw) -}, error = function(e) { - message("Neotoma server not responding. Try again later.") -}) } } \author{ diff --git a/man/set_server.Rd b/man/set_server.Rd index b0f62deb..4e4bad96 100644 --- a/man/set_server.Rd +++ b/man/set_server.Rd @@ -18,7 +18,7 @@ Choose to pull Neotoma data from the main Neotoma server, the development server or from a local instance of the API. } \examples{ -\donttest{ +\dontrun{ # The user is running the API locally using the node/express API # cloned from github: https://github.com/NeotomaDB/api_nodetest set_server(server = "local") diff --git a/man/speleothemdetails-sites-method.Rd b/man/speleothemdetails-sites-method.Rd index d03ae053..706f9d39 100644 --- a/man/speleothemdetails-sites-method.Rd +++ b/man/speleothemdetails-sites-method.Rd @@ -17,16 +17,12 @@ Obtain elements on the speleothems level Experimental function: API and behavior may change. } \examples{ -\donttest{ -tryCatch({ +\dontrun{ kesang <- get_sites(sitename = "Kesang cave") \%>\% get_datasets() \%>\% filter(datasettype == "pollen") \%>\% get_speleothems() sp <- speleothemdetails(kesang) -}, error = function(e) { -message("Neotoma server not responding. Try again later.") -}) } } \author{ diff --git a/man/speleothems-sites-method.Rd b/man/speleothems-sites-method.Rd index d732211e..96d42b77 100644 --- a/man/speleothems-sites-method.Rd +++ b/man/speleothems-sites-method.Rd @@ -17,13 +17,9 @@ Obtain all speleothems within a sites object Experimental function: API and behavior may change. } \examples{ -{ -tryCatch({ +\dontrun{ ds <- get_datasets(37302) sp <- speleothems(ds) -}, error = function(e) { -message("Neotoma server not responding. Try again later.") -}) } } \author{ diff --git a/man/sub-sub.Rd b/man/sub-sub.Rd index e7eeeaa5..e2f21f51 100644 --- a/man/sub-sub.Rd +++ b/man/sub-sub.Rd @@ -54,12 +54,8 @@ Obtain one of the elements within a \code{sites}, \code{collectionunits}, \code{datasets}, etc... Neotoma objects. } \examples{ -\donttest{ -tryCatch({ - some_site <- get_sites(sitename = "Site\%", limit=3) - some_site[[2]] -}, error = function(e) { - message("Neotoma server not responding. Try again later.") -}) +\dontrun{ +some_site <- get_sites(sitename = "Site\%", limit=3) +some_site[[2]] } } diff --git a/man/taxa-sites-method.Rd b/man/taxa-sites-method.Rd index 10219715..ec1f1a1f 100644 --- a/man/taxa-sites-method.Rd +++ b/man/taxa-sites-method.Rd @@ -17,15 +17,11 @@ elements and other features within a set of records. Extract taxonomic data from a set of sites. } \examples{ -\donttest{ -tryCatch({ +\dontrun{ somesites <- get_sites(datasettype = "diatom") \%>\% get_downloads() diatomtaxa <- taxa(somesites) -}, error = function(e) { -message("Neotoma server not responding. Try again later.") -}) - } +} } \author{ Socorro Dominguez \email{dominguezvid@wisc.edu} diff --git a/man/toWide.Rd b/man/toWide.Rd index 40366c78..ae6457d9 100644 --- a/man/toWide.Rd +++ b/man/toWide.Rd @@ -40,8 +40,7 @@ Obtain a wide table with information regarding of samples grouped by variablename and depth/age. } \examples{ -\donttest{ -tryCatch({ +\dontrun{ fc_sites <- neotoma2::get_datasets(limit=5, datasettype = "vertebrate fauna") fc_ds <- fc_sites \%>\% neotoma2::get_downloads() @@ -49,9 +48,6 @@ fc_dl1 <- fc_dl[[1]] fc_smp <- samples(fc_dl1) toWide(fc_smp, ecologicalgroups=c('AVES', 'RODE'), elementtypes='bone/tooth', unit='present/absent') -}, error = function(e) { -message("Neotoma server not responding. Try again later.") -}) } } \author{ diff --git a/vignettes/neotoma2-package.Rmd b/vignettes/neotoma2-package.Rmd index eebc53f0..2fc070ce 100644 --- a/vignettes/neotoma2-package.Rmd +++ b/vignettes/neotoma2-package.Rmd @@ -14,22 +14,18 @@ library(dplyr) library(neotoma2) ``` +> **Note** +> This package is an interface to an external web API. +> To comply with CRAN policies on Internet access, code examples in this vignette +> are shown but not executed during CRAN checks. +> Users can run all examples locally after installation. + ```{r setOpts, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -api_available <- FALSE -res <- pingNeotoma() -if (res$status_code == 200) { - api_available <- TRUE - } -# Disable evaluation globally if API is down knitr::opts_chunk$set(eval = FALSE, error = TRUE) ``` -```{r setup_md, include=FALSE, error=TRUE} +```{r setup_md, include=FALSE} safe_eval <- function(expr, fallback = "N/A") { tryCatch(eval(expr, envir = .GlobalEnv), error = function(e) fallback) } @@ -89,7 +85,7 @@ All sites in Neotoma have a unique numeric identifier. With the `neotoma2` pack If we're looking for a site and we know its specific identifier, we can use the simplest implementation of `get_sites()`. Here we are searching for a site (Alexander Lake), where we know that the siteid for the record in Neotoma is `24`. We can get these siteids using the [Neotoma Explorer web application](https://apps.neotomadb.org/explorer/), or if we have some familiarity with the site records already. -```{r getSiteBySiteID, error=TRUE, eval=FALSE} +```{r getSiteBySiteID} # Search for site by a single numeric ID: alex <- get_sites(24) alex @@ -107,14 +103,14 @@ Once you search for a site, the `neotoma2` R package makes a call to the Neotoma Often we do not know the particular `siteid`. If we're looking for a site and we know its name or a part of its name, we can search using the function with the `sitename` argument, `get_site(sitename = 'XXX')`, where `'XXX'` is the site name. This does not support multiple text strings (i.e., you can't use `c()`). -```{r getsitename, error=TRUE} +```{r getsitename} alex <- get_sites(sitename = "Alexander Lake") alex ``` Neotoma uses a Postgres Database to manage data. Postgres uses the `%` sign as a general wildcard, so we can use the `%` in the `sitename` argument operator to help us find sites when we're not sure the exact match. Note that the search is case **insensitive** so a search for `alex%` or `Alex%` will return the same results. -```{r sitewithwildcardname, error=TRUE} +```{r sitewithwildcardname} alex <- get_sites(sitename = 'Alex%') alex ``` @@ -131,7 +127,7 @@ We offer several methods of searching because different users have different req We can see how these age bounds differ: -```{r agebounds, eval=FALSE, error=TRUE} +```{r agebounds} # Note, we are using the `all_data = TRUE` flag here to avoid the default limit of 25 records, discussed below. # Because these queries are searching through every record they are slow and and are not # run in knitting this vignette. @@ -148,14 +144,14 @@ Although the `sites` are structured using S4 objects (see [Hadley Wickham's S4 d The `alex` object is composed of several smaller objects of class `site`. We can call any individual site using `[[ ]]`, placing the index of the desired object between the brackets. Then we can also call the particular variable we want using the `$` symbol. -```{r extractElement, error=TRUE} +```{r extractElement} alex <- get_sites(sitename = "Alexander Lake") alex[[1]]$siteid ``` -The elements within a `site` are the same as the defined columns within the Neotoma [`ndb.sites`](https://open.neotomadb.org/dbschema/ndb/tables/sites.html) table, with the exception of the `collunits` slot, which contains the collection units and associated datasets that are found within a site. You can see all the `site` slots using the `names()` function. You can select individual elements of a `site`, and you can assign values to these parameters: +The elements within a `site` are the same as the defined columns within the Neotoma [`ndb.sites`](https://open.neotomadb.org/dbschema/tables/sites.html) table, with the exception of the `collunits` slot, which contains the collection units and associated datasets that are found within a site. You can see all the `site` slots using the `names()` function. You can select individual elements of a `site`, and you can assign values to these parameters: -```{r showallNamesSite, error=TRUE} +```{r showallNamesSite} names(alex[[1]]) # Modify a value using $<- assignment: @@ -175,7 +171,7 @@ Using assignment, we can add information programmatically, for example, by worki As explained above, a `site` is the fundamental unit of the Neotoma Database. If you are working with your own data, you might want to create a `site` object to allow it to interact with other data within Neotoma. You can create a site with the `set_site()` function. It will ask you to provide important information such as `sitename`, `lat`, and `long` attributes. -```{r setsitefunction, error=TRUE} +```{r setsitefunction} my_site <- set_site(sitename = "My Lake", geography = st_sf(a = 3, st_sfc(st_point(1:2))), description = "my lake", @@ -187,7 +183,7 @@ If we have a set of sites that we are analyzing, we can add the new site to the This method allows us to begin modifying site information for existing sites if we have updated knowledge about site properties. -```{r addtosites, error=TRUE} +```{r addtosites} # Add a new site that's been edited using set_site() longer_alex <- c(alex, my_site) # Or replace an element within the existing list of sites @@ -217,7 +213,7 @@ If you need to get to a deeper level of the sites object, you may want to look a Getting the datasets by id is the easiest call, you can also pass a vector of IDs or, if you already have a `sites` object, you can pass a sites object. -```{r getdatasetsbyid, error=TRUE} +```{r getdatasetsbyid} # Getting datasets by ID my_datasets <- get_datasets(c(5, 10, 15, 20)) my_datasets @@ -225,7 +221,7 @@ my_datasets You can also retrieve datasets by type directly from the API. -```{r getdatasetsbytype, error=TRUE} +```{r getdatasetsbytype} # Getting datasets by type my_pollen_datasets <- get_datasets(datasettype = "pollen", limit = 25) my_pollen_datasets @@ -235,7 +231,7 @@ It can be computationally intensive to obtain the full set of records for `sites We can use that `all_data = TRUE` in R in the following way: -```{r all_data, eval=FALSE, error=TRUE} +```{r all_data} allSites_dt <- get_sites(datasettype = "diatom") allSites_dt_all <- get_sites(datasettype = "diatom", all_data = TRUE) @@ -250,7 +246,7 @@ You can get the coordinates to create a GeoJson bounding box from [here](https:/ Accessing datasets by bounding box: -```{r boundingBox, error=TRUE} +```{r boundingBox} brazil <- '{"type": "Polygon", "coordinates": [[ [-73.125, -9.102], @@ -272,7 +268,7 @@ Now we have an object called `brazil_datasets` that contains `r tryCatch(length( You can plot these findings! -```{r leafletBrazil, error=TRUE} +```{r leafletBrazil} plotLeaflet(brazil_datasets) ``` @@ -280,7 +276,7 @@ plotLeaflet(brazil_datasets) Sometimes we take a large number of records, do some analysis, and then choose to select a subset. For example, we may want to select all sites in a region, and then subset those by dataset type. If we want to look at only the geochronological datasets from Brazil, we can start with the set of records returned from our `get_datasets()` query, and then use the `filter` function in `neotoma2` to select only those datasets that are geochronologic: -```{r filterBrazil, error=TRUE} +```{r filterBrazil} brazil_dates <- neotoma2::filter(brazil_datasets, datasettype == "geochronologic") @@ -309,7 +305,7 @@ Once we have the set of records we wish to examine, we then want to recover the Assuming we continue with our example from Brazil, we want to extract records from the country, filter to only pollen records with samples covering the last 10,000 years, and then look at the relative frequency of taxa across sites. We might do something like this: -```{r filterAndShowTaxa, error=TRUE} +```{r filterAndShowTaxa} brazil <- '{"type": "Polygon", "coordinates": [[ [-73.125, -9.102], @@ -342,7 +338,7 @@ In this code chunk we define the bounding polygon for our sites, filter by time ## Publications -Many Neotoma records have publications associated with them. The `publication` object (and the `publications` collection) provide the opportunity to do this. The [`publication`](https://open.neotomadb.org/dbschema/ndb/tables/publications.html) table in Neotoma contains an extensive number of fields. The methods for `publications` in the `neotoma2` package provide us with tools to retrieve publication data from Neotoma, to set and manipulate publication data locally, and to retrieve publication data from external sources (e.g., using a DOI). +Many Neotoma records have publications associated with them. The `publication` object (and the `publications` collection) provide the opportunity to do this. The [`publication`](https://open.neotomadb.org/dbschema/tables/publications.html) table in Neotoma contains an extensive number of fields. The methods for `publications` in the `neotoma2` package provide us with tools to retrieve publication data from Neotoma, to set and manipulate publication data locally, and to retrieve publication data from external sources (e.g., using a DOI). ### `get_publications()` from Neotoma @@ -352,14 +348,14 @@ The most simple case is a search for a publication based on one or more publicat We can use a single publication ID or multiple IDs. In either case the API returns the publication(s) and creates a new `publications` object (which consists of multiple individual `publication`s). -```{r pubsbyid, eval=FALSE, error=TRUE} +```{r pubsbyid} one <- get_publications(12) two <- get_publications(c(12, 14)) ``` From there we can then then subset and extract elements from the list using the standard `[[` format. For example: -```{r showSinglePub, eval=FALSE, error=TRUE} +```{r showSinglePub} two[[2]] ``` @@ -379,7 +375,7 @@ We can also use search elements to search for publications. The `get_publicatio * `limit` * `offset` -```{r fulltestPubSearch, error=TRUE} +```{r fulltestPubSearch} michPubs <- get_publications(search = "Michigan", limit = 2) ``` @@ -387,7 +383,7 @@ This results in a set of `r tryCatch(length(michPubs), error = function(e) "N/A" Text matching in Neotoma is approximate, meaning it is a measure of the overall similarity between the search string and the set of article titles. This means that using a nonsense string may still return results results: -```{r nonsenseSearch, error=TRUE} +```{r nonsenseSearch} noise <- get_publications(search = "Canada Banada Nanada", limit = 5) ``` @@ -395,7 +391,7 @@ This returns a result set of length `r tryCatch(length(noise), error = function( This returns the (Neotoma) ID, the citation and the publication DOI (if that is stored in Neotoma). We can get the first publication using the standard `[[` nomenclature: -```{r getSecondPub, eval=FALSE, error=TRUE} +```{r getSecondPub} two[[1]] ``` @@ -403,7 +399,7 @@ The output will look similar to the output for `two` above, however you will see We can select an array of `publication` objects using the `[[` method, either as a sequence (`1:10`, or as a numeric vector (`c(1, 2, 3)`)): -```{r subsetPubs, eval=FALSE, error=TRUE} +```{r subsetPubs} # Select publications with Neotoma Publication IDs 1 - 10. pubArray <- get_publications(1:10) # Select the first five publications: @@ -415,7 +411,7 @@ subPub Just as we can use the `set_sites()` function to set new site information, we can also create new publication information using `set_publications()`. With `set_publications()` you can enter as much or as little of the article metadata as you'd like, but it's designed (in part) to use the CrossRef API to return information from a DOI. -```{r setNewPub, eval=FALSE, error=TRUE} +```{r setNewPub} new_pub <- set_publications( articletitle = "Myrtle Lake: a late- and post-glacial pollen diagram from northern Minnesota", journal = "Canadian Journal of Botany", @@ -424,44 +420,44 @@ volume = 46) A `publication` has a large number of slots that can be defined. These may be left blank, they may be set directly after the publication is defined: -```{r setPubValue, eval=FALSE, error=TRUE} +```{r setPubValue} new_pub@pages <- "1397-1410" ``` ## Workshops and Code Examples * 2022 International AL/IPA Meeting; Bariloche, Argentina -* [English Language Simple Workflow](https://open.neotomadb.org/Workshops/IAL_IPA-November2022/simple_workflow.html) +* [English Language Simple Workflow](https://open.neotomadb.org/Workshops/2022_IAL-IPA/simple_workflow.html) * Topics: Simple search, climate gradients, stratigraphic plotting * Spatial Domain: South America * Dataset Types: Diatoms -* [Spanish Language Simple Workflow](https://open.neotomadb.org/Workshops/IAL_IPA-November2022/simple_workflow_ES.html) +* [Spanish Language Simple Workflow](https://open.neotomadb.org/Workshops/2022_IAL-IPA/simple_workflow_ES.html) * Topics: Simple search, climate gradients, stratigraphic plotting * Spatial Domain: South America * Dataset Types: Diatoms -* [English Language Complex Workflow](https://open.neotomadb.org/Workshops/IAL_IPA-November2022/complex_workflow.html) +* [English Language Complex Workflow](https://open.neotomadb.org/Workshops/2022_IAL-IPA/complex_workflow.html) * Topics: Chronology building, Bchron * Spatial Domain: South America * Dataset Types: Diatoms -* [Spanish Language Complex Workflow](https://open.neotomadb.org/Workshops/IAL_IPA-November2022/complex_workflow_ES.html) +* [Spanish Language Complex Workflow](https://open.neotomadb.org/Workshops/2022_IAL-IPA/complex_workflow_ES.html) * Topics: Chronology building, Bchron * Spatial Domain: South America * Dataset Types: Diatoms * 2022 European Pollen Database Meeting; Prague, Czech Republic -* [English Language Simple Workflow](https://open.neotomadb.org/Workshops/EPD-May2022/simple_workflow.html) +* [English Language Simple Workflow](https://open.neotomadb.org/Workshops/2022_EPD/simple_workflow.html) * Topics: Simple search, climate gradients, stratigraphic plotting, taxonomic harmonization * Spatial Domain: Europe/Czech Republic * Dataset Types: Pollen -* [English Language Complex Workflow](https://open.neotomadb.org/Workshops/EPD-May2022/complex_workflow.html) +* [English Language Complex Workflow](https://open.neotomadb.org/Workshops/2022_EPD/complex_workflow.html) * Topics: Chronology building, Bchron * Spatial Domain: Europe/Czech Republic * Dataset Types: Pollen * 2022 American Quaternary Association Meeting -* [English Language Simple Workflow](https://open.neotomadb.org/Workshops/AMQUA-June2022/simple_workflow.html) +* [English Language Simple Workflow](https://open.neotomadb.org/Workshops/2022_AMQUA/simple_workflow.html) * Topics: Simple search, climate gradients, stratigraphic plotting * Spatial Domain: North America * Dataset Types: Pollen -* [English Language Complex Workflow](https://open.neotomadb.org/Workshops/AMQUA-June2022/complex_workflow.html) +* [English Language Complex Workflow](https://open.neotomadb.org/Workshops/2022_AMQUA/complex_workflow.html) * Topics: Chronologies * Spatial Domain: North America * Dataset Types: Pollen