setup

Setup gcloud and ssh tunneled to database per https://github.com/CalCOFI/server#ssh-tunnel-connection-to-postgis-db.

librarian::shelf(
  calcofi/calcofi4r, 
  DT, here, leaflet, mapview, readr, sf, stringr)
source(here("../apps/libs/db.R"))
options(readr.show_col_types = F)

stations - calcofi4r

table(calcofi4r::stations$is_cce)     # 113
## 
## FALSE  TRUE 
##  2521   113
table(calcofi4r::stations$is_ccelter) # 66
## 
## FALSE  TRUE 
##  2568    66
table(calcofi4r::stations$is_sccoos)  # 9
## 
## FALSE  TRUE 
##  2625     9
sta_n <- tbl(con, "ctd_casts") %>% 
  group_by(sta_id) %>% 
  summarize(
    n = n(),
    date_min = min(date, na.rm=T),
    date_max = max(date, na.rm=T)) %>% 
  arrange(desc(n)) %>% 
  collect() %>% 
  left_join(
    calcofi4r::stations,
    by = "sta_id")

n_min_cce <- sta_n %>% 
  filter(is_cce) %>% 
  pull(n) %>% 
  min() # 1

n_min_ccelter <- sta_n %>% 
  filter(is_ccelter) %>% 
  pull(n) %>% 
  min() # 124

sta_nmin <- sta_n %>% 
  filter(n >= n_min_ccelter) %>% 
  mutate(
    cat = case_when(
      is_cce  & is_ccelter  ~ "ccelter",
      is_cce  & !is_ccelter ~ "cce",
      !is_cce & !is_ccelter ~ "neither",
      TRUE ~ "other")) %>% 
  st_as_sf()
table(sta_nmin$cat)
## 
##     cce ccelter neither 
##       5      66      18
leaflet() %>% 
  addProviderTiles(providers$Stamen.Toner) %>% 
  addCircleMarkers(
    data = sta_nmin %>% 
      filter(cat == "ccelter"))
sta_nmin$n <- as.numeric(sta_nmin$n)
mapview(sta_nmin, zcol = "n")
sta_n %>% 
  filter(is_sccoos & !is_ccelter)
## # A tibble: 9 × 13
##   sta_id      n date_min   date_max   sta_i…¹ sta_i…²   lon   lat is_of…³ is_cce
##   <chr>   <int> <date>     <date>       <dbl>   <dbl> <dbl> <dbl> <lgl>   <lgl> 
## 1 090.0 …    56 2004-07-19 2020-01-11    90      27.7 -118.  33.5 FALSE   TRUE  
## 2 086.8 …    56 2004-11-19 2020-01-11    86.8    32.5 -118.  33.9 FALSE   TRUE  
## 3 093.4 …    54 2004-07-13 2020-01-05    93.4    26.4 -117.  32.9 FALSE   TRUE  
## 4 083.3 …    54 2004-07-24 2020-01-16    83.3    39.4 -119.  34.3 FALSE   TRUE  
## 5 088.5 …    54 2004-07-19 2020-01-11    88.5    30.1 -118.  33.7 FALSE   TRUE  
## 6 081.7 …    50 2004-07-27 2020-01-16    81.7    43.5 -120.  34.4 FALSE   TRUE  
## 7 091.7 …    49 2004-07-28 2020-01-05    91.7    26.4 -117.  33.2 FALSE   TRUE  
## 8 080.0 …    48 2004-07-27 2020-01-16    80      50.5 -120.  34.5 FALSE   TRUE  
## 9 085.4 …    38 2004-07-28 2020-01-11    85.4    35.8 -119.  34.0 FALSE   TRUE  
## # … with 3 more variables: is_ccelter <lgl>, is_sccoos <lgl>, geom <POINT [°]>,
## #   and abbreviated variable names ¹​sta_id_line, ²​sta_id_station, ³​is_offshore

stations - Station Positions

Source: Station Positions – CalCOFI

  • 75 Station Pattern: Summer and Fall surveys (~16 days at sea) since 1984

  • 104 or 113 Station Pattern: Winter and Spring surveys (~23 days at sea)

    • 113 stations have occasionally been occupied (25+ days at sea) by sampling every 20 nautical miles on line 66.7 off Monterey, CA (part of the MBARI ‘SECRET’ time-series)
sta_ord_csv <- "./data/station-positions/CalCOFIStationOrder.csv"
d_sta_ord <- read_csv(sta_ord_csv)

# ran once
# dbWriteTable(con, "stations_order", d_sta_ord)

datatable(d_sta_ord)
d_ctd_casts_10 <- tbl(con, "ctd_casts") %>% 
  slice_min(10) %>% 
  collect()
names(d_ctd_casts_10) %>% str_subset("sta|line")
##  [1] "cruz_sta"  "dbsta_id"  "sta_id"    "sta_code"  "distance"  "rptline"  
##  [7] "stline"    "acline"    "rptsta"    "ststa"     "acsta"     "origstaid"
# "stline"  "rptline"  "acline"    
# "ststa"   "rptsta"   "acsta"
# which station and line to use?
#  - st*  ?
#  - rpt* reported?
#  - ac*  actual?
# tbl(con, "ctd_casts") %>% 
#   # filter(stline != rptline) %>% 
#   filter(stline != acline) %>% 
#   group_by(stline) %>% 
#   summarize(n = n()) %>% 
#   collect()

tbl(con, "stations_order") %>% 
  anti_join(
    tbl(con, "ctd_casts"),
    by = c(
#       "LINE" = "stline", 
#       "STA"  = "ststa")) %>% 
#   select(LINE, STA)
# #    LINE   STA
# #   <dbl> <dbl>
# # 1  73.3   100
# # 2  63.3   100
      "LINE" = "rptline", 
      "STA"  = "rptsta")) %>% 
  select(LINE, STA) # 0
## # Source:   SQL [0 x 2]
## # Database: postgres  [admin@localhost:5432/gis]
## # … with 2 variables: LINE <dbl>, STA <dbl>
# ok, so going with rptline, rptsta

pts_sta <- tbl(con, "stations_order") %>% 
  left_join(
    tbl(con, "ctd_casts"),
    by = c(
      "LINE" = "rptline", 
      "STA"  = "rptsta")) %>% 
  rename(
    sta_line = LINE, 
    sta_sta  = STA,
    lon      = `LON (DEC)`,
    lat      = `LAT (DEC)`,
    order    = `ORDER OCC`) %>% 
  group_by(sta_line, sta_sta, lon, lat) %>% 
  summarize(
    n        = n(),
    date_min = min(date, na.rm=T),
    date_max = max(date, na.rm=T), 
    .groups = "drop") %>% 
  collect() %>% 
  st_as_sf(
    coords = c("lon", "lat"),
    crs = 4326)

# how many ctd_casts missing?
# tbl(con, "ctd_casts") %>% count() %>% pull(n)  #   35,376
# sum(pts_sta$n)                                 # - 17,852 = 17,524

pts_sta$n <- as.numeric(pts_sta$n)
mapView(pts_sta["n"])
mapView(pts_sta, cex = "n")
mapView(pts_sta, cex = "n", zcol = "n")