Skip to content

Commit c3bda6c

Browse files
committed
Updated functions for SewerRat version 1.2.0.
- listRegisteredDirectories() supports pagination. - query() has a metadata=FALSE option if metadata is not required. - Added the listFields() and listTokens() functions.
1 parent e852178 commit c3bda6c

18 files changed

+503
-39
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
*.swp
22
*.html
3+
*.rds

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: SewerRat
2-
Version: 0.3.5
3-
Date: 2025-03-20
2+
Version: 0.4.0
3+
Date: 2025-06-09
44
Title: Client for the SewerRat API
55
Description:
66
Search metadata files across a shared filesystem via the SewerRat API.

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22

33
export(deregister)
44
export(formatQueryResults)
5+
export(listFields)
56
export(listFiles)
67
export(listRegisteredDirectories)
8+
export(listTokens)
79
export(query)
810
export(register)
911
export(retrieveDirectory)

R/listFields.R

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#' List metadata fields
2+
#'
3+
#' List all known metadata fields in the SewerRat database.
4+
#'
5+
#' @inheritParams query
6+
#' @param pattern String specifying a pattern for filtering fields, using the usual \code{*} and \code{?} wildcards.
7+
#' Only fields matching to the pattern will be returned.
8+
#' If \code{NULL}, no filtering is performed.
9+
#' @param count Logical scalar indicating whether to count the number of metadata files associated with each field.
10+
#'
11+
#' @return List of named lists, where each inner list corresponds to a field and contains:
12+
#' \itemize{
13+
#' \item \code{field}, string containing the field.
14+
#' \item \code{count}, integer scalar specifying the number of files associated with the field.
15+
#' This is only present if \code{count=TRUE} in the arguments.
16+
#' }
17+
#'
18+
#' @examples
19+
#' # Starting up an example SewerRat service:
20+
#' info <- startSewerRat()
21+
#'
22+
#' # Mocking up a directory of stuff to query.
23+
#' mydir <- tempfile()
24+
#' dir.create(mydir)
25+
#' write(file=file.path(mydir, "metadata.json"), '{ "first": "Aaron", "last": "Lun" }')
26+
#' dir.create(file.path(mydir, "diet"))
27+
#' write(file=file.path(mydir, "diet", "metadata.json"),
28+
#' '{ "meal": "lunch", "ingredients": "water" }')
29+
#'
30+
#' # Registering it:
31+
#' register(mydir, "metadata.json", url=info$url)
32+
#'
33+
#' # Pulling out all the fields.
34+
#' listFields(info$url)
35+
#' listFields(info$url, pattern="fir*")
36+
#' listFields(info$url, count=TRUE)
37+
#'
38+
#' @export
39+
#' @import httr2
40+
#' @importFrom utils URLencode
41+
listFields <- function(url, pattern=NULL, count=FALSE, number=1000, on.truncation=c("message", "warning", "none")) {
42+
query <- character(0)
43+
44+
if (!is.null(pattern)) {
45+
query <- c(query, paste0("pattern=", URLencode(pattern, reserved=TRUE)))
46+
}
47+
if (count) {
48+
query <- c(query, "count=true")
49+
}
50+
51+
on.truncation <- match.arg(on.truncation)
52+
if (on.truncation != "none") {
53+
original.number <- number
54+
number <- number + 1L
55+
}
56+
57+
stub <- "/fields"
58+
use.question <- TRUE
59+
if (length(query)) {
60+
stub <- paste0(stub, "?", paste(query, collapse="&"))
61+
use.question <- FALSE
62+
}
63+
64+
collected <- list()
65+
while (length(collected) < number) {
66+
current.url <- paste0(url, stub)
67+
if (!is.infinite(number)) {
68+
sep <- if (use.question) "?" else "&"
69+
current.url <- paste0(current.url, sep, "limit=", number - length(collected))
70+
}
71+
72+
req <- request(current.url)
73+
req <- handle_error(req)
74+
res <- req_perform(req)
75+
76+
payload <- resp_body_json(res)
77+
collected <- c(collected, payload$results)
78+
stub <- payload$`next`
79+
use.question <- FALSE
80+
if (is.null(stub)) {
81+
break
82+
}
83+
}
84+
85+
handle_truncated_pages(on.truncation, original.number, collected)
86+
}

R/listRegisteredDirectories.R

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#'
33
#' List the directories that were registered in SewerRat.
44
#'
5-
#' @param url String containing the URL of the SewerRat REST API.
5+
#' @inheritParams query
66
#' @param user String containing the name of a user.
77
#' If not \code{NULL}, results are filtered to directories registered by this user.
88
#' If \code{TRUE}, this is set to the current user.
@@ -47,7 +47,7 @@
4747
#'
4848
#' @export
4949
#' @import httr2
50-
listRegisteredDirectories <- function(url, user=NULL, contains=NULL, prefix=NULL, within=NULL, exists=NULL) {
50+
listRegisteredDirectories <- function(url, user=NULL, contains=NULL, prefix=NULL, within=NULL, exists=NULL, number=100, on.truncation=c("message", "warning", "none")) {
5151
query <- character(0)
5252
if (!is.null(user) && !isFALSE(user)) {
5353
if (isTRUE(user)) {
@@ -68,14 +68,39 @@ listRegisteredDirectories <- function(url, user=NULL, contains=NULL, prefix=NULL
6868
query <- c(query, paste0("exists=", tolower(exists)))
6969
}
7070

71-
url <- paste0(url, "/registered")
71+
on.truncation <- match.arg(on.truncation)
72+
if (on.truncation != "none") {
73+
original.number <- number
74+
number <- number + 1L
75+
}
76+
77+
stub <- "/registered"
78+
use.question <- TRUE
7279
if (length(query)) {
73-
url <- paste0(url, "?", paste(query, collapse="&"))
80+
stub <- paste0(stub, "?", paste(query, collapse="&"))
81+
use.question <- FALSE
7482
}
7583

76-
req <- request(url)
77-
req <- handle_error(req)
78-
res <- req_perform(req)
79-
resp_body_json(res)
80-
}
84+
collected <- list()
85+
while (length(collected) < number) {
86+
current.url <- paste0(url, stub)
87+
if (!is.infinite(number)) {
88+
sep <- if (use.question) "?" else "&"
89+
current.url <- paste0(current.url, sep, "limit=", number - length(collected))
90+
}
91+
92+
req <- request(current.url)
93+
req <- handle_error(req)
94+
res <- req_perform(req)
8195

96+
payload <- resp_body_json(res)
97+
collected <- c(collected, payload$results)
98+
stub <- payload$`next`
99+
use.question <- FALSE
100+
if (is.null(stub)) {
101+
break
102+
}
103+
}
104+
105+
handle_truncated_pages(on.truncation, original.number, collected)
106+
}

R/listTokens.R

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#' List tokens
2+
#'
3+
#' List available tokens in the SewerRat database.
4+
#'
5+
#' @inheritParams query
6+
#' @param pattern String specifying a pattern for filtering tokens, using the usual \code{*} and \code{?} wildcards.
7+
#' Only tokens matching to the pattern will be returned.
8+
#' If \code{NULL}, no filtering is performed.
9+
#' @param field String specifying a metadata property field for filtering tokens.
10+
#' Only tokens found in the specified field will be returned.
11+
#' If \code{NULL}, no filtering is performed.
12+
#' @param count Logical scalar indicating whether to count the number of metadata files associated with each token.
13+
#'
14+
#' @return List of named lists, where each inner list corresponds to a token and contains:
15+
#' \itemize{
16+
#' \item \code{token}, string containing the token.
17+
#' \item \code{count}, integer scalar specifying the number of files associated with the token.
18+
#' This is only present if \code{count=TRUE} in the arguments.
19+
#' }
20+
#'
21+
#' @examples
22+
#' # Starting up an example SewerRat service:
23+
#' info <- startSewerRat()
24+
#'
25+
#' # Mocking up a directory of stuff to query.
26+
#' mydir <- tempfile()
27+
#' dir.create(mydir)
28+
#' write(file=file.path(mydir, "metadata.json"), '{ "first": "Aaron", "last": "Lun" }')
29+
#' dir.create(file.path(mydir, "diet"))
30+
#' write(file=file.path(mydir, "diet", "metadata.json"),
31+
#' '{ "meal": "lunch", "ingredients": "water" }')
32+
#'
33+
#' # Registering it:
34+
#' register(mydir, "metadata.json", url=info$url)
35+
#'
36+
#' # Pulling out all the tokens.
37+
#' listTokens(info$url)
38+
#' listTokens(info$url, pattern="lun*")
39+
#' listTokens(info$url, field="ingredients")
40+
#' listTokens(info$url, count=TRUE)
41+
#'
42+
#' @export
43+
#' @import httr2
44+
#' @importFrom utils URLencode
45+
listTokens <- function(url, pattern=NULL, field=NULL, count=FALSE, number=1000, on.truncation=c("message", "warning", "none")) {
46+
query <- character(0)
47+
48+
if (!is.null(pattern)) {
49+
query <- c(query, paste0("pattern=", URLencode(pattern, reserved=TRUE)))
50+
}
51+
if (!is.null(field)) {
52+
query <- c(query, paste0("field=", URLencode(field, reserved=TRUE)))
53+
}
54+
if (count) {
55+
query <- c(query, "count=true")
56+
}
57+
58+
on.truncation <- match.arg(on.truncation)
59+
if (on.truncation != "none") {
60+
original.number <- number
61+
number <- number + 1L
62+
}
63+
64+
stub <- "/tokens"
65+
use.question <- TRUE
66+
if (length(query)) {
67+
stub <- paste0(stub, "?", paste(query, collapse="&"))
68+
use.question <- FALSE
69+
}
70+
71+
collected <- list()
72+
while (length(collected) < number) {
73+
current.url <- paste0(url, stub)
74+
if (!is.infinite(number)) {
75+
sep <- if (use.question) "?" else "&"
76+
current.url <- paste0(current.url, sep, "limit=", number - length(collected))
77+
}
78+
79+
req <- request(current.url)
80+
req <- handle_error(req)
81+
res <- req_perform(req)
82+
83+
payload <- resp_body_json(res)
84+
collected <- c(collected, payload$results)
85+
stub <- payload$`next`
86+
use.question <- FALSE
87+
if (is.null(stub)) {
88+
break
89+
}
90+
}
91+
92+
handle_truncated_pages(on.truncation, original.number, collected)
93+
}

R/query.R

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
#' If missing, no filtering is applied to remove old files.
1414
#' @param until A \link{POSIXt} object to filter out newer files, i.e., only files older than \code{until} will be retained.
1515
#' If missing, no filtering is applied to remove new files.
16+
#' @param metadata Logical scalar indicating whether the metadata itself should be returned.
17+
#' This can be set to \code{FALSE} for better performance if only the path is of interest.
1618
#' @param number Integer specifying the maximum number of results to return.
1719
#' This can also be \code{Inf} to return all results.
1820
#' @param on.truncation String specifying what to do when the number of results exceeds \code{number}.
@@ -26,9 +28,10 @@
2628
#' \item \code{user}, the identity of the file owner.
2729
#' \item \code{time}, the Unix time of most recent file modification.
2830
#' \item \code{metadata}, a list representing the JSON contents of the file.
31+
#' Only present if metadata retrieval was requested via \code{metadata=TRUE} in the \code{query} call.
2932
#' }
3033
#'
31-
#' For \code{formatQueryResults}, a data frame containing \code{path}, \code{user}, \code{time} and \code{metadata}.
34+
#' For \code{formatQueryResults}, a data frame containing \code{path}, \code{user}, \code{time} and (if requested) \code{metadata}.
3235
#' Each row corresponds to one of the search results in \code{results}.
3336
#' Each \code{time} is now a \link{POSIXct} object.
3437
#'
@@ -86,8 +89,7 @@
8689
#' formatQueryResults(q)
8790
#' @export
8891
#' @import httr2
89-
#' @importFrom utils head
90-
query <- function(text, user, path, from, until, url, number=100, on.truncation=c("message", "warning", "none")) {
92+
query <- function(text, user, path, from, until, url, number=100, metadata=TRUE, on.truncation=c("message", "warning", "none")) {
9193
conditions <- list()
9294

9395
if (!missing(text)) {
@@ -125,8 +127,11 @@ query <- function(text, user, path, from, until, url, number=100, on.truncation=
125127
}
126128

127129
stub <- paste0("/query?translate=true")
128-
collected <- list()
130+
if (!metadata) {
131+
stub <- paste0(stub, "&metadata=false")
132+
}
129133

134+
collected <- list()
130135
while (length(collected) < number) {
131136
current.url <- paste0(url, stub)
132137
if (!is.infinite(number)) {
@@ -148,19 +153,7 @@ query <- function(text, user, path, from, until, url, number=100, on.truncation=
148153
}
149154
}
150155

151-
if (on.truncation != "none") {
152-
if (!is.infinite(original.number) && original.number < length(collected)) {
153-
msg <- sprintf("truncated query results to the first %i matches", original.number)
154-
if (on.truncation == "warning") {
155-
warning(msg)
156-
} else {
157-
message(msg)
158-
}
159-
collected <- head(collected, original.number)
160-
}
161-
}
162-
163-
collected
156+
handle_truncated_pages(on.truncation, original.number, collected)
164157
}
165158

166159
#' @export
@@ -171,19 +164,27 @@ formatQueryResults <- function(results) {
171164
all.times <- double(N)
172165
all.users <- character(N)
173166
all.meta <- vector("list", N)
167+
has.metadata <- FALSE
174168

175169
for (i in seq_along(results)) {
176170
y <- results[[i]]
177171
all.paths[i] <- y$path
178172
all.times[i] <- y$time
179173
all.users[i] <- y$user
180-
all.meta[[i]] <- y$metadata
174+
if (!is.null(y$metadata)) {
175+
all.meta[[i]] <- y$metadata
176+
has.metadata <- TRUE
177+
}
181178
}
182179

183-
data.frame(
180+
output <- data.frame(
184181
path=all.paths,
185182
user=all.users,
186-
time=as.POSIXct(all.times),
187-
metadata=I(all.meta)
188-
)
183+
time=as.POSIXct(all.times)
184+
)
185+
if (has.metadata) {
186+
output$metadata <- I(all.meta)
187+
}
188+
189+
output
189190
}

R/startSewerRat.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
#' startSewerRat() # initialize a new instance.
2626
#'
2727
#' @export
28-
startSewerRat <- function(db=tempfile(fileext=".sqlite3"), port=NULL, wait = 1, version = "1.1.3", overwrite = FALSE) {
28+
startSewerRat <- function(db=tempfile(fileext=".sqlite3"), port=NULL, wait = 1, version = "1.2.0", overwrite = FALSE) {
2929
if (!is.null(running$active)) {
3030
return(list(new=FALSE, port=running$port, url=assemble_url(running$port)))
3131
}

R/utils.R

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,19 @@ download_file <- function(url, path) {
8181
Sys.setFileTime(path, mod)
8282
}
8383
}
84+
85+
#' @importFrom utils head
86+
handle_truncated_pages <- function(on.truncation, original.number, collected) {
87+
if (on.truncation != "none") {
88+
if (!is.infinite(original.number) && original.number < length(collected)) {
89+
msg <- sprintf("truncated results to the first %i entries", original.number)
90+
if (on.truncation == "warning") {
91+
warning(msg)
92+
} else {
93+
message(msg)
94+
}
95+
collected <- head(collected, original.number)
96+
}
97+
}
98+
collected
99+
}

0 commit comments

Comments
 (0)