tinyVAST
is an R package for fitting vector
autoregressive spatio-temporal (VAST) models using a minimal and
user-friendly interface. We here show how it can fit a bivariate
spatio-temporal model representing density dependence in physiological
condition for fishes.
This replicates a similar vignette provided for the VAST package, but
showcases several improvements in interpretation and interface.
We first load and combine the two data sets:
data( condition_and_density )
# Combine both parts
combo_data = plyr::rbind.fill( condition_and_density$condition,
condition_and_density$density )
# Reformat data in expected format
formed_data = cbind( combo_data[,c("Year","Lat","Lon")],
"Type" = factor(ifelse( is.na(combo_data[,'Individual_length_cm']),
"Biomass", "Condition" )),
"Response" = ifelse( is.na(combo_data[,'Individual_length_cm']),
combo_data[,'Sample_biomass_KGperHectare'],
log(combo_data[,'Individual_weight_Grams']) ),
"log_length" = ifelse( is.na(combo_data[,'Individual_length_cm']),
rep(0,nrow(combo_data)),
log(combo_data[,'Individual_length_cm'] / 10) ))
#
#formed_data$Year_Type = paste0( formed_data$Year, "_", formed_data$Type )
We then construct the SPDE mesh
# make mesh
mesh = fm_mesh_2d( formed_data[,c('Lon','Lat')], cutoff=1 )
Next, we specify spatial and spatio-temporal variance in both condition and density.
#
sem = "
Biomass <-> Biomass, sdB
Condition <-> Condition, sdC
Biomass -> Condition, dens_dep
"
#
dsem = "
Biomass <-> Biomass, 0, sdB
Condition <-> Condition, 0, sdC
Biomass -> Condition, 0, dens_dep
"
Finally, we define the distribution for each data set using the
family
argument:
Finally, we fit the model using tinyVAST
# fit model
fit = tinyVAST( data = formed_data,
formula = Response ~ interaction(Year,Type) + log_length,
spatial_graph = mesh,
control = tinyVASTcontrol( trace=0, verbose=TRUE, profile="alpha_j" ),
sem = sem,
dsem = dsem,
family = family,
variables = c("Biomass","Condition"),
variable_column = "Type",
space_columns = c("Lon", "Lat"),
time_column = "Year",
distribution_column = "Type",
times = 1982:2016 )
We can look at structural parameters using summary functions:
# spatial terms
summary(fit, "sem")
#> heads to from parameter start Estimate Std_Error z_value p_value
#> 1 2 Biomass Biomass 1 <NA> 1.423877e+00 0.133046685 10.70208186 9.951682e-27
#> 2 2 Condition Condition 2 <NA> -3.316273e-02 0.004167567 -7.95733630 1.757825e-15
#> 3 1 Condition Biomass 3 <NA> 9.292899e-05 0.004855089 0.01914053 9.847290e-01
# spatio-temporal terms
summary(fit, "dsem")
#> heads to from parameter start lag Estimate Std_Error z_value p_value
#> 1 2 Biomass Biomass 1 <NA> 0 0.966426764 0.024274856 39.811844 0.000000e+00
#> 2 2 Condition Condition 2 <NA> 0 -0.040541910 0.002723401 -14.886499 4.033460e-50
#> 3 1 Condition Biomass 3 <NA> 0 0.008201997 0.003339314 2.456192 1.404181e-02
Abundance-weighted expansion
To explore output, we can plot output using the survey extent:
# Extract shapefile
region = condition_and_density$eastern_bering_sea
# make extrapolation-grid
sf_grid = st_make_grid( region, cellsize=c(0.2,0.2) )
sf_grid = st_intersection( sf_grid, region )
sf_grid = st_make_valid( sf_grid )
#
grid_coords = st_coordinates( st_centroid(sf_grid) )
areas_km2 = st_area( sf_grid ) / 1e6
# Condition in
newdata = data.frame( "Lat" = grid_coords[,'Y'],
"Lon" = grid_coords[,'X'],
"Year" = 1982,
"Type" = "Condition",
#"Year_Type" = "1982_Condition",
"log_length" = 0 )
cond_1982 = predict(fit, newdata=newdata, what="p_g")
# Repeat for density
newdata2 = newdata
newdata2$Type = "Biomass"
#newdata2$Year_Type = "1982_Biomass"
dens_1982 = predict(fit, newdata=newdata2, what="p_g")
# Plot on map
plot_grid = st_sf( sf_grid,
"Condition.1982" = cond_1982,
"Density.1982" = dens_1982 )
plot( plot_grid, border=NA )
Density-weighted condition
Finally, we can calculate density-weighted condition
#
expand_data = rbind( newdata2, newdata )
W_gz = cbind( c(as.numeric(areas_km2),rep(0,length(areas_km2))), 0 )
V_gz = cbind( rep(c(0,3),each=length(areas_km2)), seq_along(areas_km2)-1 )
#
cond_tz = data.frame( "Year"=1998:2016, "Est"=NA, "SE"=NA )
for( yearI in seq_len(nrow(cond_tz)) ){
expand_data[,'Year'] = cond_tz[yearI,"Year"]
out = integrate_output( fit,
newdata = expand_data,
V_gz = V_gz,
W_gz = W_gz,
bias.correct = TRUE )
cond_tz[yearI,c("Est","SE")] = out[c("Estimate","Std. Error")]
}
# plot time-series
ggplot( cond_tz ) +
geom_line( aes(x=Year, y=Est) ) +
geom_ribbon( aes(x=Year, ymin=Est-SE, ymax=Est+SE), alpha=0.2 )