Skip to contents
library(tinyVAST)
library(fmesher)
set.seed(101)
options("tinyVAST.verbose" = FALSE)

tinyVAST is an R package for fitting vector autoregressive spatio-temporal (VAST) models. We here explore the capacity to specify the vector-autoregressive spatio-temporal component.

Spatio-temporal autoregressive model

We first explore the ability to specify a first-order autoregressive spatio-temporal process:

# Simulate settings
theta_xy = 0.4
n_x = n_y = 10
n_t = 15
rho = 0.8
spatial_sd = 0.5

# Simulate GMRFs
R_s = exp(-theta_xy * abs(outer(1:n_x, 1:n_y, FUN="-")) )
V_ss = spatial_sd^2*kronecker(R_s, R_s)
d = mvtnorm::rmvnorm(n_t, sigma=V_ss )

# Project through time and add mean
for( t in seq_len(n_t) ){
  if(t>1) d[t,] = rho*d[t-1,] + d[t,]
}
#d = d + 0.5

# Shape into longform data-frame and add error
Data = data.frame( expand.grid(time=1:n_t, x=1:n_x, y=1:n_y), "var"="logn", z=exp(as.vector(d)))
Data$n = tweedie::rtweedie( n=nrow(Data), mu=Data$z, phi=0.5, power=1.5 )
mean(Data$n==0)
#> [1] 0.046

# make mesh
mesh = fm_mesh_2d( Data[,c('x','y')] )

# fit model
mytinyVAST = tinyVAST( dsem = "logn -> logn, 1, rho",
           data = Data,
           formula = n ~ 0 + factor(time),
           spatial_graph = mesh,
           family = tweedie() )
mytinyVAST
#> Call: 
#> tinyVAST(formula = n ~ 0 + factor(time), data = Data, dsem = "logn -> logn, 1, rho", 
#>     family = tweedie(), spatial_graph = mesh)
#> 
#> Run time: 
#> Time difference of 32.35395 secs
#> 
#> Family: 
#> $obs
#> 
#> Family: tweedie 
#> Link function: log 
#> 
#> 
#> 
#> 
#> sdreport(.) result
#>              Estimate Std. Error
#> alpha_j   -0.08323604 0.15196456
#> alpha_j   -0.13549104 0.18670340
#> alpha_j   -0.10579217 0.20529851
#> alpha_j   -0.14499114 0.21780791
#> alpha_j   -0.37823869 0.22691800
#> alpha_j   -0.21633307 0.23026450
#> alpha_j   -0.41489959 0.23456777
#> alpha_j   -0.67168423 0.23833635
#> alpha_j   -0.49463135 0.23869331
#> alpha_j   -0.13968720 0.23733095
#> alpha_j    0.14836187 0.23640201
#> alpha_j   -0.21516693 0.23873590
#> alpha_j   -0.20120058 0.23979214
#> alpha_j    0.16887045 0.23708655
#> alpha_j    0.30040122 0.23660035
#> beta_z     0.81229112 0.03708631
#> beta_z     0.40988915 0.03291043
#> log_sigma -0.64868475 0.05422114
#> log_sigma  0.04394543 0.07275797
#> log_kappa  0.07228542 0.10755269
#> Maximum gradient component: 0.006315957 
#> 
#> Proportion deviance explained: 
#> [1] 0.6800908
#> 
#> DSEM terms: 
#>   heads   to from parameter start lag  Estimate  Std_Error  z_value       p_value
#> 1     1 logn logn         1  <NA>   1 0.8122911 0.03708631 21.90272 2.447223e-106
#> 2     2 logn logn         2  <NA>   0 0.4098891 0.03291043 12.45469  1.318626e-35
#> 
#> Fixed terms: 
#>                   Estimate Std_Error    z_value     p_value
#> factor(time)1  -0.08323604 0.1519646 -0.5477333 0.583875070
#> factor(time)2  -0.13549104 0.1867034 -0.7257021 0.468021440
#> factor(time)3  -0.10579217 0.2052985 -0.5153090 0.606337104
#> factor(time)4  -0.14499114 0.2178079 -0.6656835 0.505613407
#> factor(time)5  -0.37823869 0.2269180 -1.6668518 0.095543872
#> factor(time)6  -0.21633307 0.2302645 -0.9394981 0.347475054
#> factor(time)7  -0.41489959 0.2345678 -1.7687834 0.076930020
#> factor(time)8  -0.67168423 0.2383363 -2.8182199 0.004829073
#> factor(time)9  -0.49463135 0.2386933 -2.0722464 0.038242472
#> factor(time)10 -0.13968720 0.2373310 -0.5885756 0.556146036
#> factor(time)11  0.14836187 0.2364020  0.6275830 0.530277181
#> factor(time)12 -0.21516693 0.2387359 -0.9012760 0.367441605
#> factor(time)13 -0.20120058 0.2397921 -0.8390624 0.401434274
#> factor(time)14  0.16887045 0.2370866  0.7122734 0.476295482
#> factor(time)15  0.30040122 0.2366004  1.2696567 0.204206943

The estimated values for beta_z then correspond to the simulated value for rho and spatial_sd.

We can compare the true densities:

library(sf)
data_wide = reshape( Data[,c('x','y','time','z')],
                     direction = "wide", idvar = c('x','y'), timevar = "time")
sf_data = st_as_sf( data_wide, coords=c("x","y"))
sf_grid = sf::st_make_grid( sf_data )
sf_plot = st_sf(sf_grid, st_drop_geometry(sf_data) )
plot(sf_plot, max.plot=n_t )
plot of chunk VAST-true-dens
plot of chunk VAST-true-dens

with the estimated densities:

Data$z_hat = predict(mytinyVAST)
data_wide = reshape( Data[,c('x','y','time','z_hat')],
                     direction = "wide", idvar = c('x','y'), timevar = "time")
sf_data = st_as_sf( data_wide, coords=c("x","y"))
sf_plot = st_sf(sf_grid, st_drop_geometry(sf_data) )
plot(sf_plot, max.plot=n_t )
plot of chunk VAST-est-dens
plot of chunk VAST-est-dens

where a scatterplot shows that they are highly correlated:

plot( x=Data$z, y=Data$z_hat )
plot of chunk VAST-scatterplot
plot of chunk VAST-scatterplot

We can also use the DHARMa package to visualize simulation residuals:

# simulate new data conditional on fixed and random effects
y_ir = replicate( n = 100, 
           expr = mytinyVAST$obj$simulate()$y_i )

#
res = DHARMa::createDHARMa( simulatedResponse = y_ir, 
                            observedResponse = Data$n, 
                            fittedPredictedResponse = fitted(mytinyVAST) )
plot(res)
plot of chunk VAST-DHARMa
plot of chunk VAST-DHARMa

We can then calculate the area-weighted total abundance and compare it with its true value:

# Predicted sample-weighted total
(Est = sapply( seq_len(n_t),
   FUN=\(t) integrate_output(mytinyVAST, newdata=subset(Data,time==t)) ))
#>                           [,1]       [,2]       [,3]       [,4]      [,5]       [,6]      [,7]      [,8]      [,9]      [,10]     [,11]
#> Estimate             97.164902  96.643634  98.362458 101.517620 84.760587  97.538110 77.520820 59.565797 75.752785 113.562128 159.66734
#> Std. Error            7.194683   7.216494   7.309313   7.572241  6.643419   7.406226  6.207919  5.090533  6.144821   8.454587  11.09945
#> Est. (bias.correct) 102.324275 102.850496 105.043004 108.373176 90.604659 104.258111 83.102278 64.065173 81.166864 120.921460 169.20673
#> Std. (bias.correct)         NA         NA         NA         NA        NA         NA        NA        NA        NA         NA        NA
#>                          [,12]     [,13]     [,14]     [,15]
#> Estimate            120.156809 137.80745 192.64174 187.86973
#> Std. Error            9.107852  10.39448  13.68245  12.88189
#> Est. (bias.correct) 127.519259 145.78500 203.55063 200.54754
#> Std. (bias.correct)         NA        NA        NA        NA

# True (latent) sample-weighted total
(True = tapply( Data$z, INDEX=Data$time, FUN=sum ))
#>         1         2         3         4         5         6         7         8         9        10        11        12        13 
#>  99.21643 100.10603 101.66846 109.52622  85.76973 100.97116  80.99847  68.60738  85.39974 119.62380 147.41437 122.00580 158.26179 
#>        14        15 
#> 200.56813 203.37545

#
Index = data.frame( time=seq_len(n_t), t(Est), True )
Index$low = Index[,'Est...bias.correct.'] - 1.96*Index[,'Std..Error']
Index$high = Index[,'Est...bias.correct.'] + 1.96*Index[,'Std..Error']

#
library(ggplot2)
ggplot(Index, aes(time, Estimate)) +
  geom_ribbon(aes(ymin = low,
                  ymax = high),    # shadowing cnf intervals
              fill = "lightgrey") +
  geom_line( color = "black",
            linewidth = 1) +
  geom_point( aes(time, True), color = "red" )
plot of chunk VAST-abundance
plot of chunk VAST-abundance

Next, we compare this against the current version of VAST

#> Warning: package 'marginaleffects' was built under R version 4.3.3
settings = make_settings( purpose="index3",
                          n_x = n_x*n_y,
                          Region = "Other",
                          bias.correct = FALSE,
                          use_anisotropy = FALSE )
settings$FieldConfig['Epsilon','Component_1'] = 0
settings$FieldConfig['Omega',] = 0
settings$RhoConfig['Epsilon2'] = 4
settings$RhoConfig['Beta1'] = 3
settings$ObsModel = c(10,2)

# Run VAST
myVAST = fit_model( settings=settings,
                 Lat_i = Data[,'y'],
                 Lon_i = Data[,'x'],
                 t_i = Data[,'time'],
                 b_i = Data[,'n'],
                 a_i = rep(1,nrow(Data)),
                 observations_LL = cbind(Lat=Data[,'y'],Lon=Data[,'x']),
                 grid_dim_km = c(100,100),
                 newtonsteps = 0,
                 loopnum = 1,
                 control = list(eval.max = 10000, iter.max = 10000, trace = 0) )
myVAST
#> fit_model(.) result
#> $par
#>       beta1_ft       beta2_ft       beta2_ft       beta2_ft       beta2_ft       beta2_ft       beta2_ft       beta2_ft       beta2_ft 
#>    -0.58930072     0.51556558     0.46458132     0.49054914     0.46815469     0.22995166     0.39666648     0.19322487    -0.07072451 
#>       beta2_ft       beta2_ft       beta2_ft       beta2_ft       beta2_ft       beta2_ft       beta2_ft   L_epsilon2_z      logkappa2 
#>     0.11680346     0.47101097     0.77135916     0.40831806     0.42839443     0.79998249     0.91170565     0.49266122    -4.30062282 
#> Epsilon_rho2_f      logSigmaM 
#>     0.85035928     0.10422306 
#> 
#> $objective
#> [1] 1738.337
#> 
#> $iterations
#> [1] 6
#> 
#> $evaluations
#> function gradient 
#>       12        7 
#> 
#> $time_for_MLE
#> Time difference of 1.733057 secs
#> 
#> $max_gradient
#> [1] 0.0005693192
#> 
#> $Convergence_check
#> [1] "The model is likely not converged"
#> 
#> $number_of_coefficients
#>  Total  Fixed Random 
#>   2060     20   2040 
#> 
#> $AIC
#> [1] 3516.675
#> 
#> $diagnostics
#>             Param starting_value     Lower         MLE     Upper final_gradient
#> 1        beta1_ft    -0.58930212      -Inf -0.58930072       Inf  -2.544080e-04
#> 2        beta2_ft     0.51556564      -Inf  0.51556558       Inf   5.379535e-06
#> 3        beta2_ft     0.46458076      -Inf  0.46458132       Inf  -1.711880e-05
#> 4        beta2_ft     0.49055249      -Inf  0.49054914       Inf   2.249599e-04
#> 5        beta2_ft     0.46815184      -Inf  0.46815469       Inf  -2.146493e-04
#> 6        beta2_ft     0.22994932      -Inf  0.22995166       Inf  -1.963291e-04
#> 7        beta2_ft     0.39666756      -Inf  0.39666648       Inf   7.352745e-05
#> 8        beta2_ft     0.19322632      -Inf  0.19322487       Inf   1.263052e-04
#> 9        beta2_ft    -0.07072357      -Inf -0.07072451       Inf   8.061085e-05
#> 10       beta2_ft     0.11680236      -Inf  0.11680346       Inf  -8.984282e-05
#> 11       beta2_ft     0.47100950      -Inf  0.47101097       Inf  -1.175670e-04
#> 12       beta2_ft     0.77135978      -Inf  0.77135916       Inf   3.317524e-05
#> 13       beta2_ft     0.40831857      -Inf  0.40831806       Inf   3.594733e-05
#> 14       beta2_ft     0.42839428      -Inf  0.42839443       Inf  -2.828227e-05
#> 15       beta2_ft     0.79997983      -Inf  0.79998249       Inf  -1.675174e-04
#> 16       beta2_ft     0.91170839      -Inf  0.91170565       Inf   1.978155e-04
#> 17   L_epsilon2_z     0.49266060      -Inf  0.49266122       Inf  -3.409419e-04
#> 18      logkappa2    -4.30062178 -6.214608 -4.30062282 -3.565449   1.061017e-04
#> 19 Epsilon_rho2_f     0.85035619 -0.990000  0.85035928  0.990000  -5.693192e-04
#> 20      logSigmaM     0.10422369      -Inf  0.10422306 10.000000   8.657499e-05
#> 
#> $SD
#> sdreport(.) result
#>                   Estimate Std. Error
#> beta1_ft       -0.58930072 0.05080467
#> beta2_ft        0.51556558 0.14414517
#> beta2_ft        0.46458132 0.17371754
#> beta2_ft        0.49054914 0.19200615
#> beta2_ft        0.46815469 0.20433942
#> beta2_ft        0.22995166 0.21476799
#> beta2_ft        0.39666648 0.21934845
#> beta2_ft        0.19322487 0.22481460
#> beta2_ft       -0.07072451 0.22973075
#> beta2_ft        0.11680346 0.23059099
#> beta2_ft        0.47101097 0.22964743
#> beta2_ft        0.77135916 0.22895295
#> beta2_ft        0.40831806 0.23199191
#> beta2_ft        0.42839443 0.23305157
#> beta2_ft        0.79998249 0.23091834
#> beta2_ft        0.91170565 0.23072495
#> L_epsilon2_z    0.49266122 0.04727208
#> logkappa2      -4.30062282 0.13652887
#> Epsilon_rho2_f  0.85035928 0.03527714
#> logSigmaM       0.10422306 0.07108160
#> Maximum gradient component: 0.0005693192 
#> 
#> $time_for_sdreport
#> Time difference of 6.984021 secs
#> 
#> $time_for_run
#> Time difference of 33.26514 secs

Or with sdmTMB

library(sdmTMB)
mesh = make_mesh(Data, c("x","y"), n_knots=n_x*n_y )

start_time = Sys.time()
mysdmTMB = sdmTMB(
  formula = n ~ 0 + factor(time),
  data = Data,
  mesh = mesh,
  spatial = "off",
  spatiotemporal = "ar1",
  time = "time",
  family = tweedie()
)
sdmTMBtime = Sys.time() - start_time

The models all have similar runtimes

Times = c( "tinyVAST" = mytinyVAST$run_time,
           "VAST" = myVAST$total_time,
           "sdmTMB" = sdmTMBtime )
knitr::kable( cbind("run times (sec.)"=Times), digits=1)
run times (sec.)
tinyVAST 32.4
VAST 167.4
sdmTMB 29.9

Delta models

We can also fit these data using a delta model

# fit model
mydelta2 = tinyVAST( data = Data,
               formula = n ~ 1,
               delta_options = list(
                 delta_formula = ~ 0 + factor(time),
                 delta_dsem = "logn -> logn, 1, rho"),
               family = delta_lognormal(type="poisson-link"),
               spatial_graph = mesh )
#> Warning in deviance_explained(out): Problem detected: deviance explained should be between 0 and 1

mydelta
#> Error in eval(expr, envir, enclos): object 'mydelta' not found

And we can again use the DHARMa package to visualize simulation residuals:

# simulate new data conditional on fixed and random effects
y_ir = replicate( n = 100, 
           expr = mydelta2$obj$simulate()$y_i )

#
res = DHARMa::createDHARMa( simulatedResponse = y_ir, 
                            observedResponse = Data$n, 
                            fittedPredictedResponse = fitted(mydelta2) )
plot(res)
plot of chunk delta-DHARMa
plot of chunk delta-DHARMa

We can then use AIC to compare the fit of the delta-model and Tweedie distribution:

knitr::kable( c("Tweedie"=AIC(mytinyVAST),"delta-lognormal"=AIC(mydelta2)), digits=3)
x
Tweedie 3475.378
delta-lognormal 4177.185

Bivariate spatio-temporal autoregressive model

We next highlight how to specify a bivariate spatio-temporal model with a cross-laggged (vector autoregressive) interaction.

# Simulate settings
theta_xy = 0.2
n_x = n_y = 10
n_t = 20
B = rbind( c( 0.5, -0.25),
           c(-0.1,  0.50) )

# Simulate GMRFs
R = exp(-theta_xy * abs(outer(1:n_x, 1:n_y, FUN="-")) )
d1 = mvtnorm::rmvnorm(n_t, sigma=0.2*kronecker(R,R) )
d2 = mvtnorm::rmvnorm(n_t, sigma=0.2*kronecker(R,R) )
d = abind::abind( d1, d2, along=3 )

# Project through time and add mean
for( t in seq_len(n_t) ){
  if(t>1) d[t,,] = t(B%*%t(d[t-1,,])) + d[t,,]
}

# Shape into longform data-frame and add error
Data = data.frame( expand.grid(time=1:n_t, x=1:n_x, y=1:n_y, "var"=c("d1","d2")), z=exp(as.vector(d)))
Data$n = tweedie::rtweedie( n=nrow(Data), mu=Data$z, phi=0.5, power=1.5 )

# make mesh
mesh = fm_mesh_2d( Data[,c('x','y')] )

# Define DSEM
dsem = "
  d1 -> d1, 1, b11
  d2 -> d2, 1, b22
  d2 -> d1, 1, b21
  d1 -> d2, 1, b12
  d1 <-> d1, 0, var1
  d2 <-> d2, 0, var1
"

# fit model
out = tinyVAST( dsem = dsem,
           data = Data,
           formula = n ~ 0 + var,
           spatial_graph = mesh,
           family = tweedie() )
out
#> Call: 
#> tinyVAST(formula = n ~ 0 + var, data = Data, dsem = dsem, family = tweedie(), 
#>     spatial_graph = mesh)
#> 
#> Run time: 
#> Time difference of 3.236934 mins
#> 
#> Family: 
#> $obs
#> 
#> Family: tweedie 
#> Link function: log 
#> 
#> 
#> 
#> 
#> sdreport(.) result
#>               Estimate Std. Error
#> alpha_j   -0.135067087 0.11243172
#> alpha_j   -0.050931911 0.09302442
#> beta_z     0.537678543 0.06461591
#> beta_z     0.471060516 0.07549660
#> beta_z    -0.268664647 0.07225850
#> beta_z    -0.080655517 0.06157438
#> beta_z     0.355720082 0.01973953
#> log_sigma -0.675048462 0.02959907
#> log_sigma  0.004059747 0.04917005
#> log_kappa -0.566274473 0.09138031
#> Maximum gradient component: 0.002923743 
#> 
#> Proportion deviance explained: 
#> [1] 0.4630639
#> 
#> DSEM terms: 
#>   heads to from parameter start lag    Estimate  Std_Error   z_value      p_value
#> 1     1 d1   d1         1  <NA>   1  0.53767854 0.06461591  8.321148 8.711554e-17
#> 2     1 d2   d2         2  <NA>   1  0.47106052 0.07549660  6.239493 4.389917e-10
#> 3     1 d1   d2         3  <NA>   1 -0.26866465 0.07225850 -3.718104 2.007234e-04
#> 4     1 d2   d1         4  <NA>   1 -0.08065552 0.06157438 -1.309888 1.902338e-01
#> 5     2 d1   d1         5  <NA>   0  0.35572008 0.01973953 18.020697 1.340448e-72
#> 6     2 d2   d2         5  <NA>   0  0.35572008 0.01973953 18.020697 1.340448e-72
#> 
#> Fixed terms: 
#>          Estimate  Std_Error    z_value   p_value
#> vard1 -0.13506709 0.11243172 -1.2013254 0.2296250
#> vard2 -0.05093191 0.09302442 -0.5475112 0.5840276

The values for beta_z again correspond to the specified value for interaction-matrix B

We can again calculate the area-weighted total abundance and compare it with its true value:

# Predicted sample-weighted total
Est1 = sapply( seq_len(n_t), FUN=\(t) integrate_output(out, newdata=subset(Data,time==t & var=="d1")) )
Est2 = sapply( seq_len(n_t), FUN=\(t) integrate_output(out, newdata=subset(Data,time==t & var=="d2")) )

# True (latent) sample-weighted total
True = tapply( Data$z, INDEX=list("time"=Data$time,"var"=Data$var), FUN=sum )

#
Index = data.frame( expand.grid(dimnames(True)), "True"=as.vector(True) )
Index = data.frame( Index, rbind(t(Est1), t(Est2)) )
Index$low = Index[,'Est...bias.correct.'] - 1.96*Index[,'Std..Error']
Index$high = Index[,'Est...bias.correct.'] + 1.96*Index[,'Std..Error']

#
library(ggplot2)
ggplot(Index, aes( time, Estimate )) +
  facet_grid( rows=vars(var), scales="free" ) +
  geom_segment(aes(y = low,
                  yend = high,
                  x = time,
                  xend = time) ) +
  geom_point( aes(x=time, y=Estimate), color = "black") +
  geom_point( aes(x=time, y=True), color = "red" )
plot of chunk VAST-VAR-index
plot of chunk VAST-VAR-index