Packages & setup

# packages
if (!require("librarian")){
  install.packages("librarian")
  library(librarian)
}
librarian::shelf(
  dplyr, DT, dygraphs, glue,googlesheets4, gstat, here, lubridate, mapview, purrr, readr, 
  raster, rmapshaper, sf, skimr, stars, stringr, tidyr)
select <- dplyr::select
glue   <- glue::glue
mapviewOptions(fgb = FALSE)

TODO: access gdrive data from server

# paths
dir_data <- switch(
  Sys.info()["nodename"],
  `Bens-MacBook-Pro.local` = "/Users/bbest/My Drive/projects/calcofi/data",
  `Cristinas-MacBook-Pro.local` = "/Volumes/GoogleDrive/.shortcut-targets-by-id/13pWB5x59WSBR0mr9jJjkx7rri9hlUsMv/calcofi/data",
  `Bens-MacBook-Air.local` = "/Volumes/GoogleDrive/My Drive/projects/calcofi/data")
  # TODO: get Erin's Google Drive path and "nodename")

Read Bird Mammal Census datasets from gdrive

# bird & mammal counts from CalCOFI cruises
bird_mamm_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_observations.csv")
# log of transect datetimes & locations from CalCOFI cruises
transects_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_transects.csv")
# behavior codes
codes_bird_mamm_beh_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_behaviorcodes.csv")
# species codes
codes_bird_mamm_sp_csv <- file.path(dir_data, "/whales-seabirds-turtles/bird-mammal-census/CalCOFI_bird-mammal-census_allspecieslist.csv")
d_bird_mamm <- read_csv(bird_mamm_csv, guess_max = 1000000)
d_transects <- read_csv(transects_csv, guess_max = 1000000)
codes_bird_mamm_beh <- read_csv(codes_bird_mamm_beh_csv) 
codes_bird_mamm_sp  <- read_csv(codes_bird_mamm_sp_csv) 

Get aoi

# get example AOI (Channel Islands NMS)
sanctuaries_geo <- "https://github.com/noaa-onms/onmsR/raw/12a87dfd4b90f2e3009ccb4913315fb2df7afddc/data-raw/sanctuaries.geojson"

cinms_ply <- sf::st_read(sanctuaries_geo) %>%
  dplyr::filter(nms == "CINMS")
## Reading layer `sanctuaries' from data source 
##   `https://github.com/noaa-onms/onmsR/raw/12a87dfd4b90f2e3009ccb4913315fb2df7afddc/data-raw/sanctuaries.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 16 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -15.38615 xmax: 180 ymax: 48.50589
## Geodetic CRS:  WGS 84
# get AOI geom points as WKT for later use with API
cinms_txt <- sf::st_as_text(cinms_ply$geometry)

# get aoi
aoi = cinms_ply

Clean & reformat transect data

TODO:

[X] make TRANSECT LINES from Lat/Lon Start and Stop [X] create LINE SEGMENTS b/w individual transect lines. Separate each part of line segment w/ a field to represent EFFORT. [ ] get DATETIME data for transects (currently only have DATE)

Get transects as points (not very useful)

# get transects as points
transect_pts <- d_transects %>% 
  sf::st_as_sf(coords = c("Longitude Mid (º)", "Latitude Mid (º)"), crs = 4326) %>% 
  distinct()

Get transects as lines

Functions : infer ‘off-effort’ segments; create lines from lat/lng pts

# correct for automatic re-formatting of dates
get_date <- function(date) { # for date in date_list
  date %>% as.character() %>% as.Date("%Y-%m-%d")
}

# get lat/lng data for inferred 'off-effort' segments
infer_segments <- function(d) {
  # d <- d_cruise_date
  cruise         <- list()
  transect_start <- list()
  transect_stop  <- list()
  date_start     <- list()
  date_stop      <- list()
  lat_start      <- list()
  lon_start      <- list()
  lat_stop       <- list()
  lon_stop       <- list()

  for (i in 1:nrow(d)) { 
    if (i < nrow(d)) {
      cruise[i]         <- d$Cruise[i]
      transect_start[i] <- d$`Transect number`[i]
      transect_stop[i]  <- d$`Transect number`[i+1]
      date_start[i]     <- as.character(d$Date[i]) 
      date_stop[i]      <- as.character(d$Date[i+1])
      lat_start[i]      <- d$lat_stop[i]
      lon_start[i]      <- d$lon_stop[i]
      lat_stop[i]       <- d$lat_start[i+1]
      lon_stop[i]       <- d$lon_start[i+1]
    }
  }
  
  date_start <- map(date_start, get_date) 
  date_stop  <- map(date_stop, get_date)
  
  tibble(
    cruise         = unlist(cruise),
    transect_start = unlist(transect_start),
    transect_stop  = unlist(transect_stop),
    date_start     = date_start %>% reduce(c),
    date_stop      = date_stop  %>% reduce(c),
    lat_start      = unlist(lat_start),
    lon_start      = unlist(lon_start),
    lat_stop       = unlist(lat_stop),
    lon_stop       = unlist(lon_stop),
    effort         = FALSE) %>%
    mutate(
      date         = NA,
      transect     = (transect_start + transect_stop)/2,
      lat_mid      = (lat_start + lat_stop)/2,
      lon_mid      = (lon_start + lon_stop)/2) %>% 
    select(
      cruise, transect, transect_start, transect_stop, 
      date, date_start, date_stop, 
      lat_start, lon_start, lat_stop, lon_stop, lat_mid,lon_mid, effort)
}

# prep/clean data, run `infer_segments(), 
# and bind inferred and original segments into one dataset
update_lines <- function(d_cruise_date){ 
  # for each df (cruise & date combination) in d_list
  d_cruise_date <- d_cruise_date %>% 
    mutate(
      transect_start = `Transect number`,
      transect_stop  = `Transect number`,
      date_start     = Date,
      date_stop      = Date,
      lat_start      = mean(`Latitude Start (º)`),
      lon_start      = mean(`Longitude Start (º)`),
      lat_stop       = mean(`Latitude Stop (º)`),
      lon_stop       = mean(`Longitude Stop (º)`),
      lat_mid        = mean(`Latitude Mid (º)`),
      lon_mid        = mean(`Longitude Mid (º)`)) %>%
    select(
      Cruise, 
      `Transect number`, transect_start, transect_stop, 
      Date, date_start, date_stop,
      lat_start, lon_start, lat_stop, lon_stop, lat_mid, lon_mid) %>% 
    mutate(effort = TRUE) %>% 
    distinct() 
  
  d_cruise_date %>% 
    infer_segments() %>% 
    ungroup() %>% 
    bind_rows(
      d_cruise_date %>% 
        rename(
          cruise   = Cruise,
          transect = `Transect number`,
          date     = Date))
}

# convert lat/lng data to linestring format
get_linestring <- function(d){ 
  # where d = output of mapping `update_lines()` across d_list
  start    <- tibble(lon = d$lon_start, lat = d$lat_start)
  stop     <- tibble(lon = d$lon_stop,  lat = d$lat_stop)
  lines_sf <- vector("list", nrow(d))
  for (i in seq_along(lines_sf)) {
    lines_sf[[i]] <- st_linestring(as.matrix(rbind(start[i,], stop[i,])))
  }
  st_sfc(lines_sf, crs = "+proj=longlat +datum=WGS84") %>% 
    st_as_sf() %>%
    bind_cols(d) %>% 
    rename(geometry = x)
}

# filter transects data by aoi
filter_transects_by_aoi <- function(transect_lines) {
  tibble(transect_lines) %>% 
    mutate(
      in_aoi = st_intersects(transects_all_geom, aoi) %>% as.logical()) %>% 
  filter(in_aoi) %>%
  st_as_sf()
}

Generate transects df from original d_transects

segments <- d_transects %>%
  drop_na() %>%
  group_by(`Transect number`)

# original transect data split by Cruise & Date combination
d_list <- segments %>% 
  split(segments$Cruise, segments$Date)

# transect data NOT in linestring format
transects_all <- map(d_list, update_lines) %>%
  bind_rows() # bind all df's created from split d_list together into 1 df

# transect data in linestring form
transects_all_geom <- transects_all %>% 
  get_linestring()
# unique combinations of year + cruise for filtering purposes
year_cruise_choices <- transects_all %>% 
  mutate(year = year(date_start)) %>% 
  select(year, cruise) %>% 
  distinct()

Filter transect data by AOI (messy; best to also filter by year, cruise, etc.)

transects_in_aoi <- transects_all_geom %>% 
  filter_transects_by_aoi()

EXAMPLE PLOTS: filtered by different parameters

# transects filtered by AOI and cruise
transects_in_aoi %>% 
  filter(cruise == "CAC2005_07") %>% 
  mapview(zcol = "effort")
# year == 200, cruise number color-coded
transects_all_geom %>% 
  filter(year(date_start) == "2000") %>% 
  mapview(add=T, zcol="cruise")
# specific cruise number, effort color-coded
transects_all_geom %>% 
  filter(cruise == "CAC1987_05") %>% 
  mapview(add=T, zcol="effort")
# transect ==2, effort color-coded
transects_all_geom %>% 
  filter(transect == 2) %>% 
  mapview(add=T, zcol="effort")
# filtered by cruise + year combination, effort color-coded
transects_all_geom %>% 
  filter(
    year(date_start) == year_cruise_choices$year[1],
    cruise == year_cruise_choices$cruise[3]) %>% 
  mapview(add=T, zcol="effort")
transects_all_geom %>% 
  filter(
    year(date_start) == year_cruise_choices$year[1],
    cruise == year_cruise_choices$cruise[3]) %>% 
  mapview(add=T, zcol="effort")
transects_all_geom %>% 
  filter(year(date_start) == "2000") %>% 
  # pull(cruise) %>% unique()
  filter(cruise == "CAC2000_04") %>% 
  mapview(add=T, zcol="effort")

Join census observations with transect data

bird_mamm_census <- d_bird_mamm %>% 
  left_join(
    d_transects, 
    by = "GIS key") %>% 
  left_join(
    codes_bird_mamm_beh %>% 
      select(Behavior, Behavior_Description = Description), 
    by = "Behavior") %>%
  left_join(
    codes_bird_mamm_sp %>% 
      mutate(across(
        c("Bird", "LargeBird", "Fish", "Mammal", "Include", "Unidentified"), 
        as.logical)), 
    by = "Species")
 

# mapview(transects) # NOTE: this is prohibitively large to render to html (> 100 MB html)

Note: This figure is a static screenshot from the interactive map, which is prohibitively large to host on Github. ## Next Steps

  • After selecting transects for AOI, show me the presence of a species observed over time?
    • What times was there observation? How much
    • How many observations of the species?
    • Sightings per unit effort (SPUE)
      • What’s a “unit of effort”? Perhaps a transect, or better yet length of transect, eg 10 km or unit time of observation.

See Santora, J. A., and W. J. Sydeman. 2015. Persistence of hotspots and variability of seabird species richness and abundance in the southern California Current. Ecosphere 6(11):214:

Integrating seabirdobservations with station data requires a grid-based approach to resolve spatially explicit timeseries that account for survey effort over time. Allshipboard tracklines, indexed by 3 km intervals(Yen et al. 2006; Fig. 1), was linked to a GIS, as wehave done for other studies in the CCE (Santoraet al. 2011a, b, 2012a, b). The extent of theshipboard trackline (total survey effort) andCalCOFI sampling stations determined the ex-tent and size of grid cells. This was accomplishedusing the create fishnet command in ArcView toproject the individual 3km sampling points ontoa grid with cells size of 0.78 3 0.78 (;4500 km2).The size of cells was chosen to account for totaltrackline effort (Fig. 1) and to reflect the layout ofthe CalCOFI hydrographic and biological sam-pling stations (Fig. 1) in order to permit futureintegration with those data sets. The grid processresolved the location of consistently sampledcells during 1987–2012 with 45 and 48 cells inspring and summer, respectively (Fig. 1).

  1. (PDF) Persistence of hotspots and variability of seabird species richness and abundance in the southern California Current. Available from: https://www.researchgate.net/publication/283799672_Persistence_of_hotspots_and_variability_of_seabird_species_richness_and_abundance_in_the_southern_California_Current [accessed Mar 31 2022].

We standardized sampling effort by assessingthe number of times the ship visited a cell and theamount of survey effort collected within that cell,relative to all cells in a given season over theentire length of the time series (Santora and Veit2013). To determine a threshold to use as a cutoff, we calculated the mean 6 SD of cell visitsand effort per season, then omitted all effort lessthan 1 SD below the mean.

  1. (PDF) Persistence of hotspots and variability of seabird species richness and abundance in the southern California Current. Available from: https://www.researchgate.net/publication/283799672_Persistence_of_hotspots_and_variability_of_seabird_species_richness_and_abundance_in_the_southern_California_Current [accessed Mar 31 2022].