weecology · gmyenni · Nov 18, 2024 · Nov 18, 2024 · Nov 18, 2024
diff --git a/DataCleaningScripts/get_regional_weather.R b/DataCleaningScripts/get_regional_weather.R
@@ -64,60 +64,88 @@ regional_wunder <- function(stationid) {
   })
 }
 
-#' Reports regional weather data with the data from Wunderground (www.wunderground.com) and 
+#' Reports regional weather data with the data from Wunderground (www.wunderground.com) and
 #' DAILY GLOBAL HISTORICAL CLIMATOLOGY NETWORK (GHCN-DAILY).
-#' Metadata are described here: 
+#' Metadata are described here:
 #' https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/doc/GHCND_documentation.pdf
 #' 
 #' 
 #' @example get_regional_weather()
 
 get_regional_weather <- function() {
-  
-  options(dplyr.summarise.inform = FALSE)  
-  
+
+  options(dplyr.summarise.inform = FALSE)
+
 # Portal 4sw station
   portal4sw <- read.csv(
     'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/USC00026716.csv') %>%
     dplyr::rename_all(.funs = tolower) %>%
-    dplyr::mutate_all(as.character) %>% 
+    dplyr::mutate_all(as.character) %>%
     dplyr::mutate_at(c("latitude","longitude","elevation","prcp","snow","snwd","tmax","tmin","dapr","dasf",
                        "mdpr","mdsf","tobs","wt01","wt03","wt04","wt05","wt11"), as.numeric) %>%
     dplyr::mutate_at("date",as.Date) %>%
-    dplyr::mutate(day = lubridate::day(date), 
-                  month = lubridate::month(date), 
+    dplyr::mutate(day = lubridate::day(date),
+                  month = lubridate::month(date),
                   year = lubridate::year(date),
                   prcp = prcp/10,
                   tmax = tmax/10,
-                  tmin = tmin/10, 
+                  tmin = tmin/10,
                   tobs = tobs/10) %>%
     dplyr::select(year, month, day, dplyr::everything())
   all_4sw <- read.csv(file = "Weather/Portal4sw_regional_weather.csv",header=T, stringsAsFactors=FALSE)
-    all_4sw$date <- lubridate::ymd(all_4sw$date)
-    new_4sw <- dplyr::setdiff(portal4sw,all_4sw) %>% dplyr::filter(year>2010)
+  all_4sw$date <- lubridate::ymd(all_4sw$date)
+  new_4sw <- dplyr::setdiff(portal4sw,all_4sw) %>% dplyr::filter(year>2010)
 
 # San Simon station
   sansimon <- read.csv(
     'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/US1AZCH0005.csv') %>%
     dplyr::rename_all(.funs = tolower) %>%
-    dplyr::mutate_all(as.character) %>% 
+    dplyr::mutate_all(as.character) %>%
     dplyr::mutate_at(c("latitude","longitude","elevation","prcp","snow","snwd","wesd","wesf"), as.numeric) %>%
     dplyr::mutate_at("date",as.Date) %>%
-    dplyr::mutate(day = lubridate::day(date), 
-                  month = lubridate::month(date), 
+    dplyr::mutate(day = lubridate::day(date),
+                  month = lubridate::month(date),
                   year = lubridate::year(date),
                   prcp = prcp/10) %>%
     dplyr::select(year, month, day, dplyr::everything())
   all_sansimon <- read.csv(file = "Weather/Sansimon_regional_weather.csv",header=T, stringsAsFactors=FALSE)
-    all_sansimon$date <- lubridate::ymd(all_sansimon$date) 
-    new_sansimon <- dplyr::setdiff(sansimon,all_sansimon)
-
-# All wunderground stations
-  stationids <- c("KNMRODEO13","KAZSANSI26","KNMRODEO11","KAZPORTA10","KNMRODEO8","KNMANIMA10","KNMANIMA15")  
-  rodeo <- lapply(stationids,regional_wunder) %>% dplyr::bind_rows()
-  all_rodeo <- read.csv(file = "Weather/Rodeo_regional_weather.csv",header=TRUE, stringsAsFactors=FALSE)
-    all_rodeo$timestamp <- lubridate::ymd_hms(all_rodeo$timestamp) 
-    new_rodeo <- dplyr::anti_join(rodeo, all_rodeo, by = c("year","month","day","hour","timestamp","stationid"))
+  all_sansimon$date <- lubridate::ymd(all_sansimon$date)
+  new_sansimon <- dplyr::setdiff(sansimon,all_sansimon)
+
+  # All wunderground stations
+  stationids <- c("KNMRODEO13","KAZSANSI26","KNMRODEO11","KAZPORTA10","KNMRODEO8","KNMANIMA10","KNMANIMA15")
+  # Combine data from station IDs
+  rodeo <- lapply(stationids, regional_wunder) %>% bind_rows()
+  # Read the existing CSV file
+  all_rodeo <- read.csv(file = "Weather/Rodeo_regional_weather.csv", header = TRUE, stringsAsFactors = FALSE)
+  # Define a function to convert dates
+  convert_to_full_timestamp <- function(timestamp) {
+    # If timestamp matches "YYYY-MM-DD", append a default time
+    if (grepl("^\\d{4}-\\d{2}-\\d{2}$", timestamp)) {
+      return(paste0(timestamp, " 12:00:00"))
+    }
+    # Return original if no match
+    return(timestamp)
+  }
+
+  # Apply the conversion to the 'timestamp' column
+  all_rodeo$timestamp <- sapply(all_rodeo$timestamp, convert_to_full_timestamp)
+  # Convert the resulting timestamps to datetime objects
+  all_rodeo$timestamp <- ymd_hms(all_rodeo$timestamp, quiet = TRUE)
+
+  # Check for parsing issues
+  invalid_timestamps <- all_rodeo$timestamp[is.na(all_rodeo$timestamp)]
+  if (length(invalid_timestamps) > 0) {
+    cat("Invalid timestamps after conversion:\n")
+    print(invalid_timestamps)
+  }
+
+  # Perform anti_join only if 'rodeo' has data; otherwise, return 'all_rodeo'
+  new_rodeo <- if (nrow(rodeo) > 0) {
+    anti_join(rodeo, all_rodeo, by = c("year", "month", "day", "hour", "timestamp", "stationid"))
+  } else {
+    all_rodeo
+  }
 
 return(list(new_4sw=new_4sw, new_sansimon=new_sansimon, new_rodeo=new_rodeo))
 
@@ -130,9 +158,9 @@ return(list(new_4sw=new_4sw, new_sansimon=new_sansimon, new_rodeo=new_rodeo))
 #' @example append_regional_weather()
 
 append_regional_weather <- function() {
-  
+
   data <- get_regional_weather()
-  
+
   # append new data
   write.table(data$new_4sw, file = "Weather/Portal4sw_regional_weather.csv",
               row.names = FALSE, col.names = FALSE, na = "", append = TRUE, sep = ",")
@@ -141,7 +169,6 @@ append_regional_weather <- function() {
               row.names = FALSE, col.names = FALSE, na = "", append = TRUE, sep = ",")
 
   write.table(data$new_rodeo, file = "Weather/Rodeo_regional_weather.csv",
-              row.names = FALSE, col.names = FALSE, na = "", append = TRUE, 
+              row.names = FALSE, col.names = FALSE, na = "", append = TRUE,
               sep = ",", quote=c(5:7))
-
 }
diff --git a/DataCleaningScripts/update_ndvi.R b/DataCleaningScripts/update_ndvi.R
@@ -4,6 +4,8 @@
 
 `%>%` <- magrittr::`%>%`
 library(terra, warn.conflicts=FALSE, quietly = TRUE)
+library(dplyr)
+library(stringr)
 
 #' @title create_portal_area
 #'
@@ -117,14 +119,18 @@ writendvitable <- function() {
   if(file.exists("./NDVI/scenes.csv")) {
     if(file.size("./NDVI/scenes.csv") != 0L) {
   targetpath <- "./NDVI/landsat-data"
-  undone <- read.csv("./NDVI/undone-scenes.csv")
-  records <- read.csv("./NDVI/scenes.csv") %>% dplyr::filter(!display_id %in% undone$display_id)
-
-    new_data <- as.data.frame(do.call(rbind, 
-                                      apply(records, 1, summarize_ndvi_snapshot, targetpath))) %>%
-      dplyr::arrange(date,sensor)
-
-    write.table(new_data, file='./NDVI/ndvi.csv', sep = ",", row.names=FALSE, col.names=FALSE,
+  # Read the files
+  undone <- read.csv("./NDVI/undone-scenes.csv", stringsAsFactors = FALSE)
+  scenes <- read.csv("./NDVI/scenes.csv", stringsAsFactors = FALSE)
+
+  # Filter scenes where display_id is not a substring of any undone$display_id
+  records <- scenes %>%
+    filter(!sapply(display_id, function(id) any(str_detect(undone$display_id, fixed(id)))))
+
+  new_data <- as.data.frame(do.call(rbind, apply(records, 1, summarize_ndvi_snapshot, targetpath))) %>%
+    dplyr::arrange(date,sensor)
+
+  write.table(new_data, file='./NDVI/ndvi.csv', sep = ",", row.names=FALSE, col.names=FALSE,
                 append=TRUE, na="")
   }}
 }