Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 54 additions & 27 deletions DataCleaningScripts/get_regional_weather.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,60 +64,88 @@ regional_wunder <- function(stationid) {
})
}

#' Reports regional weather data with the data from Wunderground (www.wunderground.com) and
#' Reports regional weather data with the data from Wunderground (www.wunderground.com) and
#' DAILY GLOBAL HISTORICAL CLIMATOLOGY NETWORK (GHCN-DAILY).
#' Metadata are described here:
#' Metadata are described here:
#' https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/doc/GHCND_documentation.pdf
#'
#'
#' @example get_regional_weather()

get_regional_weather <- function() {
options(dplyr.summarise.inform = FALSE)

options(dplyr.summarise.inform = FALSE)

# Portal 4sw station
portal4sw <- read.csv(
'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/USC00026716.csv') %>%
dplyr::rename_all(.funs = tolower) %>%
dplyr::mutate_all(as.character) %>%
dplyr::mutate_all(as.character) %>%
dplyr::mutate_at(c("latitude","longitude","elevation","prcp","snow","snwd","tmax","tmin","dapr","dasf",
"mdpr","mdsf","tobs","wt01","wt03","wt04","wt05","wt11"), as.numeric) %>%
dplyr::mutate_at("date",as.Date) %>%
dplyr::mutate(day = lubridate::day(date),
month = lubridate::month(date),
dplyr::mutate(day = lubridate::day(date),
month = lubridate::month(date),
year = lubridate::year(date),
prcp = prcp/10,
tmax = tmax/10,
tmin = tmin/10,
tmin = tmin/10,
tobs = tobs/10) %>%
dplyr::select(year, month, day, dplyr::everything())
all_4sw <- read.csv(file = "Weather/Portal4sw_regional_weather.csv",header=T, stringsAsFactors=FALSE)
all_4sw$date <- lubridate::ymd(all_4sw$date)
new_4sw <- dplyr::setdiff(portal4sw,all_4sw) %>% dplyr::filter(year>2010)
all_4sw$date <- lubridate::ymd(all_4sw$date)
new_4sw <- dplyr::setdiff(portal4sw,all_4sw) %>% dplyr::filter(year>2010)

# San Simon station
sansimon <- read.csv(
'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/US1AZCH0005.csv') %>%
dplyr::rename_all(.funs = tolower) %>%
dplyr::mutate_all(as.character) %>%
dplyr::mutate_all(as.character) %>%
dplyr::mutate_at(c("latitude","longitude","elevation","prcp","snow","snwd","wesd","wesf"), as.numeric) %>%
dplyr::mutate_at("date",as.Date) %>%
dplyr::mutate(day = lubridate::day(date),
month = lubridate::month(date),
dplyr::mutate(day = lubridate::day(date),
month = lubridate::month(date),
year = lubridate::year(date),
prcp = prcp/10) %>%
dplyr::select(year, month, day, dplyr::everything())
all_sansimon <- read.csv(file = "Weather/Sansimon_regional_weather.csv",header=T, stringsAsFactors=FALSE)
all_sansimon$date <- lubridate::ymd(all_sansimon$date)
new_sansimon <- dplyr::setdiff(sansimon,all_sansimon)

# All wunderground stations
stationids <- c("KNMRODEO13","KAZSANSI26","KNMRODEO11","KAZPORTA10","KNMRODEO8","KNMANIMA10","KNMANIMA15")
rodeo <- lapply(stationids,regional_wunder) %>% dplyr::bind_rows()
all_rodeo <- read.csv(file = "Weather/Rodeo_regional_weather.csv",header=TRUE, stringsAsFactors=FALSE)
all_rodeo$timestamp <- lubridate::ymd_hms(all_rodeo$timestamp)
new_rodeo <- dplyr::anti_join(rodeo, all_rodeo, by = c("year","month","day","hour","timestamp","stationid"))
all_sansimon$date <- lubridate::ymd(all_sansimon$date)
new_sansimon <- dplyr::setdiff(sansimon,all_sansimon)

# All wunderground stations
stationids <- c("KNMRODEO13","KAZSANSI26","KNMRODEO11","KAZPORTA10","KNMRODEO8","KNMANIMA10","KNMANIMA15")
# Combine data from station IDs
rodeo <- lapply(stationids, regional_wunder) %>% bind_rows()
# Read the existing CSV file
all_rodeo <- read.csv(file = "Weather/Rodeo_regional_weather.csv", header = TRUE, stringsAsFactors = FALSE)
# Define a function to convert dates
convert_to_full_timestamp <- function(timestamp) {
# If timestamp matches "YYYY-MM-DD", append a default time
if (grepl("^\\d{4}-\\d{2}-\\d{2}$", timestamp)) {
return(paste0(timestamp, " 12:00:00"))
}
# Return original if no match
return(timestamp)
}

# Apply the conversion to the 'timestamp' column
all_rodeo$timestamp <- sapply(all_rodeo$timestamp, convert_to_full_timestamp)
# Convert the resulting timestamps to datetime objects
all_rodeo$timestamp <- ymd_hms(all_rodeo$timestamp, quiet = TRUE)

# Check for parsing issues
invalid_timestamps <- all_rodeo$timestamp[is.na(all_rodeo$timestamp)]
if (length(invalid_timestamps) > 0) {
cat("Invalid timestamps after conversion:\n")
print(invalid_timestamps)
}

# Perform anti_join only if 'rodeo' has data; otherwise, return 'all_rodeo'
new_rodeo <- if (nrow(rodeo) > 0) {
anti_join(rodeo, all_rodeo, by = c("year", "month", "day", "hour", "timestamp", "stationid"))
} else {
all_rodeo
}

return(list(new_4sw=new_4sw, new_sansimon=new_sansimon, new_rodeo=new_rodeo))

Expand All @@ -130,9 +158,9 @@ return(list(new_4sw=new_4sw, new_sansimon=new_sansimon, new_rodeo=new_rodeo))
#' @example append_regional_weather()

append_regional_weather <- function() {

data <- get_regional_weather()

# append new data
write.table(data$new_4sw, file = "Weather/Portal4sw_regional_weather.csv",
row.names = FALSE, col.names = FALSE, na = "", append = TRUE, sep = ",")
Expand All @@ -141,7 +169,6 @@ append_regional_weather <- function() {
row.names = FALSE, col.names = FALSE, na = "", append = TRUE, sep = ",")

write.table(data$new_rodeo, file = "Weather/Rodeo_regional_weather.csv",
row.names = FALSE, col.names = FALSE, na = "", append = TRUE,
row.names = FALSE, col.names = FALSE, na = "", append = TRUE,
sep = ",", quote=c(5:7))

}
22 changes: 14 additions & 8 deletions DataCleaningScripts/update_ndvi.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

`%>%` <- magrittr::`%>%`
library(terra, warn.conflicts=FALSE, quietly = TRUE)
library(dplyr)
library(stringr)

#' @title create_portal_area
#'
Expand Down Expand Up @@ -117,14 +119,18 @@ writendvitable <- function() {
if(file.exists("./NDVI/scenes.csv")) {
if(file.size("./NDVI/scenes.csv") != 0L) {
targetpath <- "./NDVI/landsat-data"
undone <- read.csv("./NDVI/undone-scenes.csv")
records <- read.csv("./NDVI/scenes.csv") %>% dplyr::filter(!display_id %in% undone$display_id)

new_data <- as.data.frame(do.call(rbind,
apply(records, 1, summarize_ndvi_snapshot, targetpath))) %>%
dplyr::arrange(date,sensor)

write.table(new_data, file='./NDVI/ndvi.csv', sep = ",", row.names=FALSE, col.names=FALSE,
# Read the files
undone <- read.csv("./NDVI/undone-scenes.csv", stringsAsFactors = FALSE)
scenes <- read.csv("./NDVI/scenes.csv", stringsAsFactors = FALSE)

# Filter scenes where display_id is not a substring of any undone$display_id
records <- scenes %>%
filter(!sapply(display_id, function(id) any(str_detect(undone$display_id, fixed(id)))))

new_data <- as.data.frame(do.call(rbind, apply(records, 1, summarize_ndvi_snapshot, targetpath))) %>%
dplyr::arrange(date,sensor)

write.table(new_data, file='./NDVI/ndvi.csv', sep = ",", row.names=FALSE, col.names=FALSE,
append=TRUE, na="")
}}
}