Packages & setup

# packages
if (!require("librarian")){
  install.packages("librarian")
  library(librarian)
}
librarian::shelf(
  dplyr, DT, dygraphs, glue,googlesheets4, gstat, here, lubridate, mapview, purrr, readr, 
  raster, rmapshaper, sf, skimr, stars, stringr, tidyr)
select <- dplyr::select
mapviewOptions(fgb = FALSE)
# paths
dir_data <- switch(
  Sys.info()["nodename"],
  `ben-mbpro` = "/Users/bbest/My Drive (ben@ecoquants.com)/projects/calcofi/data",
  `Cristinas-MacBook-Pro.local` = "/Volumes/GoogleDrive/.shortcut-targets-by-id/13pWB5x59WSBR0mr9jJjkx7rri9hlUsMv/calcofi/data",
  `Bens-MacBook-Air.local` = "/Volumes/GoogleDrive/My Drive/projects/calcofi/data")
  # TODO: get Erin's Google Drive path and "nodename")

Read dataset keys from gdrive

bottle_key_csv <- file.path(dir_data, "/dataset_keys/bottle_field_descriptions.csv")
cast_key_csv   <- file.path(dir_data, "/dataset_keys/cast_field_descriptions.csv")

key_cast   <- read_csv(cast_key_csv)

key_bottle <- googlesheets4::read_sheet("https://docs.google.com/spreadsheets/d/18c6eSGRf0bSdraDocjn3-j1rUIxN2WKqxsEnPQCR6rA/edit#gid=2046976359") %>% 
  na_if("n.a.") %>% 
  mutate(
    dataset    = "bottle_cast",
    source_url = source_url[1])


key_DIC <- googlesheets4::read_sheet("https://docs.google.com/spreadsheets/d/1SGfGMJUhiYZKIh746p5pGn7cD0cOcTA3g_imAnvyz88/edit#gid=0") %>% 
  na_if("n.a.") %>% 
  mutate(
    dataset    = "DIC_cast",
    source_url = source_url[1])

data_key <- bind_rows(key_bottle, key_DIC) %>% 
  select(dataset, field_name, title, description_abbv, everything())


# convert to var_lookup
var_lookup_key_tbl <- data_key %>% 
  filter(!is.na(description_abbv)) 
var_lookup_key <- var_lookup_key_tbl %>% 
  split(seq(nrow(.))) %>% 
  lapply(as.list)
names(var_lookup_key) <- var_lookup_key_tbl$field_name

var_lookup_tbl <- googlesheets4::read_sheet("https://docs.google.com/spreadsheets/d/1ghM30pIdcsun7XWzLRKh4EilUplN60YdHayfiilPFwE/edit#gid=0") 
var_lookup <- var_lookup_tbl %>%
  split(seq(nrow(.))) %>% 
  lapply(as.list)
names(var_lookup) <- var_lookup_tbl$var

Read Oceano Bottle data from gdrive

# get data file paths from gdrive
bottle_csv <- file.path(dir_data, "/oceanographic-data/bottle-database/CalCOFI_Database_194903-202001_csv_22Sep2021/194903-202001_Bottle.csv")
cast_csv <- file.path(dir_data, "/oceanographic-data/bottle-database/CalCOFI_Database_194903-202001_csv_22Sep2021/194903-202001_Cast.csv")
bottle_cast_rds <- file.path(dir_data, "/oceanographic-data/bottle-database/bottle_cast.rds")
DIC_csv  <- file.path(dir_data, "/DIC/CalCOFI_DICs_200901-201507_28June2018.csv")

calcofi_geo           <- here("data/calcofi_oceano-bottle-stations_convex-hull.geojson")
calcofi_offshore_geo  <- here("data/calcofi_oceano-bottle-stations_convex-hull_offshore.geojson")
calcofi_nearshore_geo <- here("data/calcofi_oceano-bottle-stations_convex-hull_nearshore.geojson")

# check paths
stopifnot(dir.exists(dir_data))
stopifnot(any(file.exists(bottle_csv,cast_csv)))

# read data
d_cast   <- read_csv(cast_csv) %>% 
  separate(
    Sta_ID, c("Sta_ID_Line", "Sta_ID_Station"), 
    sep=" ", remove=F) %>% 
  mutate(
    Sta_ID_Line    = as.double(Sta_ID_Line),
    Sta_ID_Station = as.double(Sta_ID_Station))

d_bottle <- read_csv(bottle_csv, skip=1, col_names = F, guess_max = 1000000)
#d_bottle_problems() <- problems()
names(d_bottle) <- str_split(
  readLines(bottle_csv, n=1), ",")[[1]] %>% 
  str_replace("\xb5", "µ")

table(is.na(d_bottle$pH1))
## 
##  FALSE   TRUE 
##     84 889416
# FALSE   TRUE 
#    84 889416 
table(is.na(d_bottle$pH2))
## 
##  FALSE   TRUE 
##     10 889490
# FALSE   TRUE 
#    10 889490
table(is.na(d_bottle$O2ml_L))
## 
##  FALSE   TRUE 
## 719993 169507
#  FALSE   TRUE 
# 719993 169507
table(is.na(d_bottle$O2Sat))
## 
##  FALSE   TRUE 
## 685072 204428
# FALSE   TRUE 
# 685072 204428 
table(is.na(d_bottle[,'Oxy_µmol/Kg']))
## 
##  FALSE   TRUE 
## 685061 204439
#  FALSE   TRUE 
# 685061 204439

d_DIC <- read_csv(DIC_csv, skip=1, col_names = F, guess_max = 1000000)
names(d_DIC) <- str_split(
  readLines(DIC_csv, n=1), ",")[[1]] %>% 
  str_replace("\xb5", "µ")
# %>% 
#   separate(
#     `Line Sta_ID`, c("Sta_ID_Line", "Sta_ID_Station"), 
#     sep=" ", remove=F) %>% 
#   mutate(
#     Sta_ID_Line    = as.double(Sta_ID_Line),
#     Sta_ID_Station = as.double(Sta_ID_Station),
#     offshore       = ifelse(Sta_ID_Station > 60, T, F)) 

# Join data

# for now... ideally would join with all the data but this causes some issues with shared variables that have different values
bottle_cast <- d_cast %>% 
  left_join(
    d_bottle %>% select(-Sta_ID),
    by = "Cst_Cnt") %>% 
  mutate(Date = lubridate::as_date(Date, format = "%m/%d/%Y")) 

DIC_cast <- d_cast %>% 
  left_join(
    d_DIC %>% 
      select(-`Line Sta_ID`) %>% 
      rename(Depthm = `Depth(m)`),
    by = c("Cst_Cnt" = "ID")) %>% 
  mutate(Date = lubridate::as_date(Date, format = "%m/%d/%Y"))
saveRDS(bottle_cast, bottle_cast_rds)

Read whales, seabirds, turtles data from gdrive

Bird Mammal Census

bird_mamm_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_observations.csv")
transects_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_transects.csv")
key_bird_mamm_beh_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_behaviorcodes.csv")
key_bird_mamm_sp_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_allspecieslist.csv")

key_bird_mamm_beh <- read_csv(key_bird_mamm_beh_csv) 
key_bird_mamm_sp  <- read_csv(key_bird_mamm_sp_csv) 


# bird & mammal counts from CalCOFI cruises
d_bird_mamm <- read_csv(bird_mamm_csv, guess_max = 1000000)

# log of transect datetimes & locations from CalCOFI cruises
d_transects <- read_csv(transects_csv, guess_max = 1000000)

bird_mamm_transects <- d_bird_mamm %>% 
  left_join(
    d_transects, 
    by = "GIS key") %>% 
  left_join(
    key_bird_mamm_beh %>% 
      select(Behavior, Behavior_Description = Description), 
    by = "Behavior") %>%
  left_join(
    key_bird_mamm_sp %>% 
      mutate(across(
        c("Bird", "LargeBird", "Fish", "Mammal", "Include", "Unidentified"), 
        as.logical)), 
    by = "Species")

Exploratory summary of bottle data

# d_cast
skim(d_cast)
# d_bottle
skim(d_bottle)
# d_DIC
skim(d_DIC)
Data summary
Name d_DIC
Number of rows 2084
Number of columns 23
_______________________
Column type frequency:
character 3
numeric 20
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Line Sta_ID 0 1.00 11 11 0 33 0
Depth_ID 0 1.00 38 38 0 2084 0
DIC Quality Comment 2029 0.03 28 115 0 36 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
ID 0 1.00 1058.07 613.45 1.00 521.75 1061.50 1585.25 2130.00 ▇▇▇▇▇
Cast_Index 0 1.00 33165.93 596.82 31785.00 32780.00 33312.00 33646.00 33973.00 ▂▂▅▆▇
Bottle_Index 0 1.00 833038.33 15412.45 796974.00 823082.75 836834.00 845385.50 853785.00 ▂▂▅▅▇
Cruise 0 1.00 201258.07 196.70 200808.00 201108.00 201304.00 201407.00 201507.00 ▂▁▆▃▇
Depth(m) 0 1.00 186.48 326.67 1.00 30.00 100.00 231.00 3542.00 ▇▁▁▁▁
DIC1 85 0.96 2153.24 113.00 1948.85 2028.33 2170.64 2253.81 2367.80 ▇▃▅▇▃
DIC2 1860 0.11 2168.15 154.85 1969.44 2008.98 2265.89 2315.52 2364.42 ▇▁▁▁▇
TA1 0 1.00 2256.06 34.84 2181.57 2230.32 2244.32 2278.50 2434.90 ▅▇▃▁▁
TA2 1850 0.11 2278.86 58.50 2198.15 2229.06 2247.50 2316.45 2437.00 ▇▁▇▁▁
pH1 2000 0.04 7.91 0.08 7.62 7.90 7.93 7.96 8.05 ▁▁▁▇▂
pH2 2074 0.00 7.95 0.02 7.92 7.93 7.95 7.96 7.99 ▇▂▃▃▂
Salinity1 0 1.00 33.76 0.40 32.84 33.42 33.73 34.15 34.68 ▂▇▃▇▁
Salinity2 1849 0.11 33.83 0.52 32.94 33.35 33.66 34.29 34.82 ▅▆▁▇▁
Temperature_degC 0 1.00 11.05 3.80 1.52 8.25 10.00 14.01 22.75 ▁▇▅▃▁
Bottle Salinity 0 1.00 33.77 0.40 32.84 33.42 33.74 34.15 34.68 ▂▇▃▇▁
Bottle O2(ml_L) 0 1.00 3.40 2.14 0.00 1.36 3.22 5.64 7.81 ▆▅▂▇▁
Bottle O2 (µmol/Kg) 0 1.00 148.28 93.39 0.00 59.31 140.12 245.95 340.42 ▆▅▂▇▁
Sigma-theta 0 1.00 25.74 0.98 22.98 24.91 25.96 26.58 27.78 ▁▅▅▇▂
DIC Bottle_ID1 0 1.00 4269.62 3603.41 1.00 390.75 2952.50 7503.25 10004.00 ▇▂▂▃▅
DIC Bottle_ID2 1852 0.11 3519.40 3441.65 2.00 331.25 2247.00 6907.50 9959.00 ▇▂▂▂▃

Get CINMS AOI

# get example AOI (Channel Islands NMS)
sanctuaries_geo <- "https://github.com/noaa-onms/onmsR/raw/12a87dfd4b90f2e3009ccb4913315fb2df7afddc/data-raw/sanctuaries.geojson"

cinms_ply <- sf::st_read(sanctuaries_geo) %>%
  dplyr::filter(nms == "CINMS")
## Reading layer `sanctuaries' from data source 
##   `https://github.com/noaa-onms/onmsR/raw/12a87dfd4b90f2e3009ccb4913315fb2df7afddc/data-raw/sanctuaries.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 16 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -15.38615 xmax: 180 ymax: 48.50589
## Geodetic CRS:  WGS 84
# get AOI geom points as WKT for later use with API
cinms_txt <- sf::st_as_text(cinms_ply$geometry)
#cinms_txt

mapview(cinms_ply) 
# get aoi
aoi = cinms_ply

Functions

Get Station IDs as points

# for summary, want to group by Sta_Code because each data point has a diff Sta_ID
get_pts <- function(data) {
  data %>% 
    filter(
    !is.na(Lat_Dec),
    !is.na(Lon_Dec)) %>% 
  group_by(
    Sta_ID) %>%
  summarize(
    lon            = mean(Lon_Dec),
    lat            = mean(Lat_Dec),
    Sta_ID_Line    = mean(Sta_ID_Line),
    Sta_ID_Station = mean(Sta_ID_Station)) %>%
  st_as_sf(
    coords = c("lon", "lat"), crs=4326, remove = F) %>% 
  mutate(
    offshore = ifelse(Sta_ID_Station > 60, T, F))
}


get_pts(bottle_cast) %>% mapview(zcol="offshore")
get_pts(DIC_cast) %>% mapview(zcol="offshore")
# pts %>% mapview(zcol="offshore")
# table(pts$offshore)

Make CalCOFI total study areas

Per guidance from Erin:

CalCOFI Regions– Station # > 60 = oceanic/offshore; <60 = neritic/nearshore/continental shelf (other regions include those from Venrick et al. 2012 or Stephens et al. 2018

make_hull <- function(x){
  st_union(x) %>% 
  st_convex_hull() %>% 
  st_buffer(0.1) %>%  # ~10 km
  ms_simplify(keep = 0.05) }

hull_geos <- c(calcofi_geo, calcofi_nearshore_geo, calcofi_offshore_geo)

if (any(!file.exists(hull_geos))){
  hull <- pts %>% 
    make_hull()
  st_write(hull, calcofi_geo)
  mapview(hull)
  
  hull_nearshore <- pts %>% 
    filter(!offshore) %>% 
    make_hull()
  st_write(hull_nearshore, calcofi_nearshore_geo)
  mapview(hull_nearshore)
  
  hull_offshore <- pts %>% 
    filter(offshore) %>% 
    make_hull()
  st_write(hull_offshore, calcofi_offshore_geo)
  mapview(hull_offshore)
}

Get latest variable for AOI

# # find stations in aoi
# pts_aoi <- pts %>% 
#   mutate(
#     x = st_intersects(pts, aoi) %>% as.logical()) %>% 
#   filter(x)

# mapview(aoi) +
#   mapview(pts_aoi)

Update dates for summarizing by date_step

update_date <- function(x, unit = c("second", "minute", "hour", "day", "week", "month", "quarter", "year", "decade")) {
  unit <- match.arg(unit)
  switch(unit,
    second  = update(x, seconds = floor(second(x))),
    minute  = update(x, seconds = 0),
    hour    = update(x, minutes = 0, seconds = 0),
    day     = update(x, hours   = 0, minutes = 0, seconds = 0),
    week    = update(x, wdays   = 1, hours = 0,   minutes = 0, seconds = 0),
    month   = update(x, mdays   = 1, hours = 0,   minutes = 0, seconds = 0),
    quarter = update(x, months  = ceiling(month(x)/3) * 3 - 2, mdays   = 1),
    year    = update(x, ydays   = 1,  hours = 0,  minutes = 0, seconds = 0),
    decade  = update(
      x, years = (year(x) - year(x) %% 10), 
      ydays = 1, hours = 0, minutes = 0, seconds = 0)
  )
}

Summarize data by date_step in preparation for timeseries plot

# choices for `date_step`: "day", "week", month", "quarter", "year", "decade"
get_oceano_var_aoi <- function(
  var, aoi, 
  date_step = c("year", "day", "week", "month", "quarter", "decade"), 
  depth_min = 0, depth_max = 10){
  
  # test values
  # var = "Bottle O2(ml_L)"; aoi = cinms_ply; date_step = "year"; depth_min = 0; depth_max = 4000
  # var = "Salnty"; aoi = cinms_ply; date_step = "year"; depth_min = 0; depth_max = 1000
  
  d <- eval(parse(text = glue("var_lookup$`{var}`$data_source_name"))) %>% 
    as.name() %>% eval()
  
  pts <- get_pts(d)
  
  # find stations in aoi
  pts_aoi <- pts %>% 
    mutate(
      x = st_intersects(pts, aoi) %>% as.logical()) %>% 
    filter(x)
  
  # d_var <- d %>% filter(!is.na(eval(parse(text = glue("d$`{var}`")))))
  
  d_summ     <- d %>% filter(!is.na(.data[[var]]))
  d_aoi_summ <- d_summ %>% filter(Sta_ID %in% pts_aoi$Sta_ID)

  # d_test <- d %>% filter(!is.na(`Bottle O2(ml_L)`))
  # d_test_aoi <- d_test %>% filter(Sta_ID %in% pts_aoi$Sta_ID)
  
  # d_aoi_summ <- d_aoi %>% 
  #   filter(!is.na(.data[[var]]))
  
  empty_data_for_var <- ifelse(nrow(d_aoi_summ) == 0, TRUE, FALSE)
  
  if (empty_data_for_var) {
    d_aoi_summ <- d_summ
  }
  if (any(!is.na(.data[[Depthm]]))) {
    d_aoi_summ <- d_aoi_summ %>% 
      filter(Depthm >= depth_min, Depthm < depth_max)
  }
  d_aoi_summ <- d_aoi_summ %>% 
    mutate(Date_Step = update_date(Date, unit = date_step)) %>% 
    group_by(Date_Step) %>% 
    summarize(
      var_n   = n(),
      var_min = min(.data[[var]], na.rm = T),
      var_q10 = quantile(.data[[var]], probs = 0.10, na.rm = T),
      var_avg = mean(.data[[var]], na.rm = T),
      var_q90 = quantile(.data[[var]], 0.90, na.rm = T),
      var_max = max(.data[[var]], na.rm = T),
      var_sd  = sd(.data[[var]], na.rm = T)) %>% 
    rename(Date = Date_Step)

  attr(d_aoi_summ, "labels")    <- eval(parse(text = glue("var_lookup$`{var}`")))
  attr(d_aoi_summ, "date_step") <- date_step
  attr(d_aoi_summ, "date_msg")  <- glue("This dataset was summarized by {date_step}.")
  attr(d_aoi_summ, "aoi") <- ifelse(
    empty_data_for_var, 
    glue("No data were found for {var} in this area of interest. Summaries were conducted across all existing data points."),
    glue("Data for {var} in selected area of interest")
  )
  
  d_aoi_summ
}

Plot time series

see: plot_metric_timeseries()

plot_timeseries <- function(d) {
  x <- d %>% select(
    Date, 
    `10% quantile` = var_q10,
    `Average`      = var_avg,
    `90% quantile` = var_q90)
  var_attrs <- tibble(
    title = attributes(d)$labels$var_title,
    var   = attributes(d)$labels$var_label)
  xts::xts(x = x %>% select(-Date), order.by = x %>% pull(Date)) %>% 
    dygraph(
      main = var_attrs$title,
      xlab = "Date", ylab = var_attrs$var) %>% # ...) %>%
    dySeries(
      c("10% quantile", "Average", "90% quantile"), 
      label = var_attrs$var, color = "Red") %>%
    dyRangeSelector(fillColor = "#FFFFFF", strokeColor = "#FFFFFF")
}

Get IDW raster without AOI (based on Cruise_ID)

get_oceano_var_cruise_raster <- function(cruise_id, var, depth_min, depth_max){
  # test values
  # cruise_id = "2020-01-05-C-33RL"; var = "T_degC"; depth_min = 0; depth_max = 10
  # cruise_id = "1949-03-01-C-31CR"; var = "Bottle O2(ml_L)"; depth_min = 0; depth_max = 1000
  
  d <- eval(parse(text = glue("var_lookup$`{var}`$data_source_name"))) %>% 
    as.name() %>% eval()
  
  d_var <- d %>% 
    filter(
      !is.na(.data[[var]]),
      Depthm >= !!depth_min,
      Depthm < !!depth_max)
  
  cruise_id_choices <- unique(d_var$Cruise_ID) # Cruise_ID values with data for var
  
  d_daily <- d_var %>% 
    filter(Cruise_ID == cruise_id) %>% 
    group_by(Cruise_ID, Date, Sta_ID) %>% 
    summarize(
      var_n   = n(),
      var_avg = mean(.data[[var]], na.rm = T), 
      .groups = "drop") %>% 
    arrange(desc(Date), Cruise_ID)
  d_daily
  
  pts <- get_pts(d)
  
  # find stations in aoi
  pts_aoi <- pts %>% 
    mutate(
      x = st_intersects(pts, aoi) %>% as.logical()) %>% 
    filter(x)
  
  p <- pts %>% 
    left_join(
      d_daily,
      by="Sta_ID") %>% 
    select(Sta_ID, Date, var_avg) %>% 
    filter(!is.na(var_avg))
  mapview(p, zcol="var_avg")
  
  h <- st_convex_hull(st_union(p)) %>% st_as_sf() %>% 
    mutate(one = 1)
  mapview(h)
  r <- raster(as_Spatial(h), res=0.1, crs=4326)
  z <- rasterize(as_Spatial(h), r, "one")
  
  # inverse distance weighted interpolation
  #   https://rspatial.org/raster/analysis/4-interpolation.html
  gs <- gstat(formula=var_avg~1, locations=p)
  idw <- interpolate(z, gs)
  w <- mask(idw, z)
  
  w_tif <- here(glue("data/idw_{var}_{cruise_id}_{depth_min}_{depth_max}"))
  # w_tif <- here("data/_test_idw.tif")
  raster::writeRaster(w, w_tif, overwrite=T)
  w <- raster(w_tif) %>% readAll()
  unlink(w_tif)
  
  w
}

Example timeseries plots

TEMPERATURE

get_oceano_var_aoi("T_degC", cinms_ply, "year", 0, 20) %>% 
  plot_timeseries()

SALINITY

get_oceano_var_aoi("Salnty", cinms_ply, "year", 0, 4000) %>% 
  plot_timeseries()

OXYGEN (HYPOXIA)

get_oceano_var_aoi("O2Sat", cinms_ply, "year", 0, 100) %>% 
  plot_timeseries()
get_oceano_var_aoi("Bottle O2(ml_L)", cinms_ply, "week", 0, 4000) %>% 
  plot_timeseries()
get_oceano_var_aoi("Bottle O2 (µmol/Kg)", cinms_ply, "week", 0, 4000) %>% 
  plot_timeseries()

DIC

get_oceano_var_aoi("DIC1", cinms_ply, "week", 0, 4000) %>% 
  plot_timeseries()

pH (OA)

[ ] TODO: find pH data for OA measurements

# need to find pH data:
DIC_cast %>% filter(!is.na(pH1 | pH2)) %>% nrow()
## [1] 84
bottle_cast %>% filter(!is.na(pH1 | pH2)) %>% nrow()
## [1] 84
# get_oceano_var_aoi("pH1", cinms_ply, "week", 0, 4000) %>% 
#   plot_timeseries()

Example IDW rasters of variables for latest (not within AOI)

TEMPERATURE

get_oceano_var_cruise_raster(
  cruise_id = "2020-01-05-C-33RL",
  var       = "T_degC", 
  depth_min = 0,
  depth_max = 10) %>% 
  mapview()
## [inverse distance weighted interpolation]

SALINITY

get_oceano_var_cruise_raster(
  cruise_id = "2020-01-05-C-33RL",
  var       = "Salnty", 
  depth_min = 0,
  depth_max = 20) %>% 
  mapview()
## [inverse distance weighted interpolation]

OXYGEN

get_oceano_var_cruise_raster(
  cruise_id = "1949-03-01-C-31CR",
  var       = "Bottle O2(ml_L)", 
  depth_min = 0,
  depth_max = 8000) %>% 
  mapview()
## [inverse distance weighted interpolation]

DIC

get_oceano_var_cruise_raster(
  cruise_id = "1949-03-01-C-31CR",
  var       = "DIC1", 
  depth_min = 0,
  depth_max = 8000) %>% 
  mapview()
## [inverse distance weighted interpolation]

OLD

Get raster of variable for latest within AOI

# test variables
var = "T_degC"; aoi = cinms_ply; depth_min = 0; depth_max = 100

d <- eval(parse(text = glue("var_lookup$`{var}`$data_source_name"))) %>% 
  as.name() %>% eval()

pts <- get_pts(d)

# find stations in aoi
pts_aoi <- pts %>%
  mutate(
    x = st_intersects(pts, aoi) %>% as.logical()) %>%
  filter(x)

d_aoi <- d %>%
  filter(Sta_ID %in% pts_aoi$Sta_ID)

d_aoi_daily <- d_aoi %>%
  filter(
    !is.na(.data[[var]]),
    Depthm >= depth_min,
    Depthm < depth_max) %>%
  group_by(Date, Sta_ID) %>%
  summarize(
    var_n   = n(),
    var_avg = mean(.data[[var]], na.rm = T),
    .groups = "drop") %>%
  arrange(desc(Date))
d_aoi_daily
## # A tibble: 383 x 4
##    Date       Sta_ID      var_n var_avg
##    <date>     <chr>       <int>   <dbl>
##  1 2020-01-15 083.3 051.0    13    12.9
##  2 2019-11-14 083.3 051.0    17    14.7
##  3 2019-07-22 083.3 051.0    20    11.9
##  4 2019-04-09 083.3 051.0    13    11.3
##  5 2019-02-11 083.3 051.0    14    13.5
##  6 2018-10-25 083.3 051.0    13    13.4
##  7 2018-06-20 083.3 051.0    11    12.6
##  8 2018-04-17 083.3 051.0    13    10.4
##  9 2018-02-06 083.3 051.0    13    13.0
## 10 2017-11-17 083.3 051.0    12    14.4
## # … with 373 more rows

TODO

summarize:

  • OCEAN TEMP: T_degC

  • salinity: Salnty

  • OXYGEN (HYPOXIA): Bottle O2(ml_L), Bottle O2 (µmol/Kg)

  • DIC:

  • ZOOPLANKTON,

  • ICHTHYOPLANKTON for each station ID

  • Create read_bottle() in calcofi4r