-
Notifications
You must be signed in to change notification settings - Fork 1
Open
Description
File from actel issue #164
download.file(
'https://github.com/user-attachments/files/18086120/NexTrak-R1.801032.2024-10-25.093307.csv',
destfile = 'NexTrak-R1.801032.2024-10-25.093307.csv'
)
fathom_csv <- 'NexTrak-R1.801032.2024-10-25.093307.csv'
data.table version:
read_fathom_dt <- function(fathom_csv, data_type = "DET"){
# Update: this is needed as grep is not in default Windows
search_fun <- ifelse(.Platform$OS.type == "windows", 'FINDSTR', "grep")
data.table::fread(
cmd = paste(
search_fun, data_type, fathom_csv
),
header = F,
col.names = {
as.character(
data.table::fread(cmd = paste(
search_fun, paste0(data_type, "_DESC"), fathom_csv
),
header = F)
)
}
)
}
readr version:
read_fathom_readr <- function(fathom_csv, data_type = "DET") {
search_fun <- ifelse(.Platform$OS.type == "windows", 'FINDSTR', "grep")
# cheating here. Could/should be switched from system
header <- system2(
search_fun,
c(paste0(data_type, "_DESC"),
fathom_csv
),
stdout = T) |>
strsplit(',') |>
unlist()
select_detections <- function(x, pos) x[x[,1] == data_type,]
readr::read_csv_chunked(
fathom_csv,
readr::DataFrameCallback$new(select_detections),
col_names = header,
col_types = readr::cols()
) |>
readr::type_convert()
}
Drag race:
microbenchmark::microbenchmark(
readr = read_fathom_readr(fathom_csv),
dt = read_fathom_dt(fathom_csv),
times = 20
)
Unit: milliseconds
expr min lq mean median uq max neval cld
readr 2311.1762 2474.3908 2670.6241 2616.3300 2717.2186 4093.1096 20 a
dt 299.9092 309.8798 320.0476 313.5691 328.8836 361.5658 20 b
Note that data.table can read from HTTP if not on Windows, if desired. readr, as coded cannot.
read_fathom_readr('https://github.com/user-attachments/files/18086120/NexTrak-R1.801032.2024-10-25.093307.csv')
read_fathom_dt('https://github.com/user-attachments/files/18086120/NexTrak-R1.801032.2024-10-25.093307.csv')
Metadata
Metadata
Assignees
Labels
No labels