Simultaneous autoregressive process
James T. Thorson
Source:vignettes/simultaneous_autoregressive_process.Rmd
simultaneous_autoregressive_process.Rmd
library(tinyVAST)
library(igraph)
library(rnaturalearth)
library(sf)
options("tinyVAST.verbose" = FALSE)
tinyVAST
is an R package for fitting vector
autoregressive spatio-temporal (VAST) models using a minimal and
user-friendly interface. We here show how it can fit a multivariate
second-order autoregressive (AR2) model including spatial correlations
using a simultaneous autoregressive (SAR) process specified using
igraph.
To do so, we first load salmong returns, and remove 0s to allow comparison between Tweedie and lognormal distributions.
data( salmon_returns )
# Transform data
salmon_returns$Biomass_nozeros = ifelse( salmon_returns$Biomass==0,
NA, salmon_returns$Biomass )
Data = na.omit(salmon_returns)
We first explore an AR2 process, with independent variation among regions. This model shows a substantial first-order autocorrelation for sockeye and chum, and substantial second-order autocorrelation for pink salmon. An AR(2) process is stationary if \(\phi_1 + \phi_2 < 1\) and \(\phi_2 - \phi_1 < 1\), and this stationarity criterion suggests that each time-series is close to (but not quite) nonstationary.
# Define graph for SAR process
unconnected_graph = make_empty_graph( nlevels(Data$Region) )
V(unconnected_graph)$name = levels(Data$Region)
plot(unconnected_graph)
# Define SEM for AR2 process
dsem = "
sockeye -> sockeye, -1, lag1_sockeye
sockeye -> sockeye, -2, lag2_sockeye
pink -> pink, -1, lag1_pink
pink -> pink, -2, lag2_pink
chum -> chum, -1, lag1_chum
chum -> chum, -2, lag2_chum
"
# Fit tinyVAST model
mytiny0 = tinyVAST(
formula = Biomass_nozeros ~ 0 + Species + Region,
data = Data,
dsem = dsem,
variable_column = "Species",
time_column = "Year",
space_column = "Region",
distribution_column = "Species",
family = list( "chum" = lognormal(),
"pink" = lognormal(),
"sockeye" = lognormal() ),
spatial_graph = unconnected_graph,
control = tinyVASTcontrol( profile="alpha_j" ) )
#> Warning in nlminb(start = opt$par, objective = obj$fn, gradient = obj$gr, :
#> NA/NaN function evaluation
heads | to | from | parameter | start | lag | Estimate | Std_Error | z_value | p_value |
---|---|---|---|---|---|---|---|---|---|
1 | sockeye | sockeye | 1 | NA | -1 | 0.807 | 0.059 | 13.711 | 0.000 |
1 | sockeye | sockeye | 2 | NA | -2 | 0.195 | 0.059 | 3.308 | 0.001 |
1 | pink | pink | 3 | NA | -1 | 0.050 | 0.019 | 2.640 | 0.008 |
1 | pink | pink | 4 | NA | -2 | 0.882 | 0.022 | 39.931 | 0.000 |
1 | chum | chum | 5 | NA | -1 | 0.675 | 0.102 | 6.584 | 0.000 |
1 | chum | chum | 6 | NA | -2 | 0.293 | 0.099 | 2.940 | 0.003 |
2 | pink | pink | 7 | NA | 0 | 0.648 | 0.039 | 16.764 | 0.000 |
2 | chum | chum | 8 | NA | 0 | 0.294 | 0.035 | 8.353 | 0.000 |
2 | sockeye | sockeye | 9 | NA | 0 | 0.421 | 0.036 | 11.622 | 0.000 |
We also explore an SAR process for adjacency among regions
# Define graph for SAR process
adjacency_graph = make_graph( ~ Korea - Japan - M.I - WKam - EKam -
WAK - SPen - Kod - CI - PWS -
SEAK - NBC - SBC - WA )
We can plot this adjacency on a map to emphasize that it is a simple way to encode information about spatial proximity:
#maps = ne_countries( country = c("united states of america","russia","canada","south korea","north korea","japan") )
maps = ne_countries( continent = c("north america","asia","europe") )
maps = st_combine( maps )
maps = st_transform( maps, crs=st_crs(3832) )
#maps = st_crop( maps, xmin = -5*1e5, xmax = 12*1e5,
# ymin = 0 * 1e6, ymax=10 * 1e6 )
# Format inputs
loc_xy = cbind(
x = c(129,143,140,156,163,-163,-161,-154,-154,-147,-138,-129,-126,-125),
y = c(36,40,57,53,57,60,55,56,59,61,57,54,50,45)
)
loc_xy = sf_project( loc_xy, from=st_crs(4326), to=st_crs(3832) )
# Plot
xlim = c(-4,10) * 1e6
ylim = c(3,10) * 1e6
plot( maps,
xlim = xlim,
ylim = ylim,
col = "grey",
asp = FALSE,
add = FALSE )
plot( adjacency_graph,
layout = loc_xy,
add = TRUE,
rescale = FALSE,
vertex.label.color = "red",
xlim = xlim,
ylim = ylim,
edge.width = 2,
edge.color = "red" )
We can then pass this adjacency graph to tinyVAST
during
fitting:
# Fit tinyVAST model
mytiny = tinyVAST(
formula = Biomass_nozeros ~ 0 + Species + Region,
data = Data,
dsem = dsem,
variable_column = "Species",
time_column = "Year",
space_column = "Region",
distribution_column = "Species",
family = list( "chum" = lognormal(),
"pink" = lognormal(),
"sockeye" = lognormal() ),
spatial_graph = adjacency_graph,
control = tinyVASTcontrol( profile="alpha_j" ) )
# Summarize output
Summary = summary(mytiny, what="dsem")
knitr::kable( Summary, digits=3)
heads | to | from | parameter | start | lag | Estimate | Std_Error | z_value | p_value |
---|---|---|---|---|---|---|---|---|---|
1 | sockeye | sockeye | 1 | NA | -1 | 0.952 | NaN | NaN | NaN |
1 | sockeye | sockeye | 2 | NA | -2 | 0.914 | NaN | NaN | NaN |
1 | pink | pink | 3 | NA | -1 | 0.320 | NaN | NaN | NaN |
1 | pink | pink | 4 | NA | -2 | 0.677 | NaN | NaN | NaN |
1 | chum | chum | 5 | NA | -1 | 0.521 | NaN | NaN | NaN |
1 | chum | chum | 6 | NA | -2 | 0.493 | NaN | NaN | NaN |
2 | pink | pink | 7 | NA | 0 | 0.445 | NaN | NaN | NaN |
2 | chum | chum | 8 | NA | 0 | 0.235 | NaN | NaN | NaN |
2 | sockeye | sockeye | 9 | NA | 0 | 0.666 | NaN | NaN | NaN |
We can use AIC to compare these two models. This comparison suggests that spatial adjancency is not a parsimonious way to describe correlations among time-series.
# AIC for unconnected time-series
AIC(mytiny0)
#> [1] 49086.47
# AIC for SAR spatial variation
AIC(mytiny)
#> [1] 51825.84
Finally, we can plot observations and predictions for the selected model
# Compile long-form dataframe of observations and predictions
Resid = rbind( cbind(Data[,c('Species','Year','Region','Biomass_nozeros')], "Which"="Obs"),
cbind(Data[,c('Species','Year','Region')], "Biomass_nozeros"=predict(mytiny0,Data), "Which"="Pred") )
# plot using ggplot
library(ggplot2)
ggplot( data=Resid, aes(x=Year, y=Biomass_nozeros, col=Which) ) + # , group=yhat.id
geom_line() +
facet_grid( rows=vars(Region), cols=vars(Species), scales="free" ) +
scale_y_continuous(trans='log') #