@@ -652,37 +652,11 @@ fg_em <- unique(fg_em)
652652# Counts of vessels listed in EM
653653fg_em [, .N , by = FLAG ]
654654
655- # * Effort prediction ----
656- effort_strata <- work.data | >
657- # select necessary columns
658- _[CVG_NEW == " PARTIAL" , .(ADP , STRATA = STRATA_NEW , TRIP_TARGET_DATE , TRIP_ID )
659- # ensure one trip target date per trip, making it the minimum trip target date
660- ][, TRIP_TARGET_DATE : = min(TRIP_TARGET_DATE ), by = TRIP_ID ] | >
661- unique() | >
662- setorder(ADP , STRATA , TRIP_TARGET_DATE ) | >
663- _[ , TRIPS : = 1 / .N , by = TRIP_ID
664- # find julian dates
665- ][, JULIAN_DATE : = yday(TRIP_TARGET_DATE )
666- # set julian date to 1 for trips that left in year adp - 1
667- ][, JULIAN_DATE : = ifelse(year(TRIP_TARGET_DATE ) < ADP , 1 , JULIAN_DATE )
668- # set julian date to 366 for trips that left in year adp + 1
669- ][, JULIAN_DATE : = ifelse(year(TRIP_TARGET_DATE ) > ADP , 366 , JULIAN_DATE )][]
670-
671- # ' isolate the latest date for which we have data in the most recent year of VALHALLA
672- effort_strata.max_date <- max(effort_strata [ADP == ADPyear - 1 , JULIAN_DATE ])
673- # count trips through max_date and total trips by year and stratum
674- effort_strata <- effort_strata [, .(
675- MAX_DATE_TRIPS = sum(TRIPS [JULIAN_DATE < = effort_strata.max_date ]),
676- TOTAL_TRIPS = sum(TRIPS )
677- ), by = .(ADP , STRATA )
678- # make total trips NA for ADPyear - 1, since the year is not over
679- ][ADP == ADPyear - 1 , TOTAL_TRIPS : = NA ][]
680-
681655# ' *===================================================================================================================*
682656
683- # MANUAL CORRECTIONS : Split long TRIP_IDs ----
657+ # FINAL MANUAL CORRECTIONS : Split long TRIP_IDs ----
684658
685- # These corrections need to be made before they affect trip_duration.R
659+ # ' * These corrections need to be made before they affect effort_prediction.R and trip_duration.R*
686660
687661# ' [2026FinalADP: Splitting two OB_TRW_GOA tender trips that merged trips over a large time span]
688662
@@ -717,6 +691,32 @@ work.data |>
717691
718692# ' *===================================================================================================================*
719693
694+ # * Effort prediction ----
695+ effort_strata <- work.data | >
696+ # select necessary columns
697+ _[CVG_NEW == " PARTIAL" , .(ADP , STRATA = STRATA_NEW , TRIP_TARGET_DATE , TRIP_ID )
698+ # ensure one trip target date per trip, making it the minimum trip target date
699+ ][, TRIP_TARGET_DATE : = min(TRIP_TARGET_DATE ), by = TRIP_ID ] | >
700+ unique() | >
701+ setorder(ADP , STRATA , TRIP_TARGET_DATE ) | >
702+ _[ , TRIPS : = 1 / .N , by = TRIP_ID
703+ # find julian dates
704+ ][, JULIAN_DATE : = yday(TRIP_TARGET_DATE )
705+ # set julian date to 1 for trips that left in year adp - 1
706+ ][, JULIAN_DATE : = ifelse(year(TRIP_TARGET_DATE ) < ADP , 1 , JULIAN_DATE )
707+ # set julian date to 366 for trips that left in year adp + 1
708+ ][, JULIAN_DATE : = ifelse(year(TRIP_TARGET_DATE ) > ADP , 366 , JULIAN_DATE )][]
709+
710+ # ' isolate the latest date for which we have data in the most recent year of VALHALLA
711+ effort_strata.max_date <- max(effort_strata [ADP == ADPyear - 1 , JULIAN_DATE ])
712+ # count trips through max_date and total trips by year and stratum
713+ effort_strata <- effort_strata [, .(
714+ MAX_DATE_TRIPS = sum(TRIPS [JULIAN_DATE < = effort_strata.max_date ]),
715+ TOTAL_TRIPS = sum(TRIPS )
716+ ), by = .(ADP , STRATA )
717+ # make total trips NA for ADPyear - 1, since the year is not over
718+ ][ADP == ADPyear - 1 , TOTAL_TRIPS : = NA ][]
719+
720720# * Trip duration ----
721721
722722# ' First, trim off the most recent 4 years of trips. Modeling trip duration over the entire dataset is not needed and
0 commit comments