::install_github("LukeCe/spflow") devtools
In-class Ex 5
Spatial Econometrics
1 Loading R Packages
A key package used is spflow, which allows us to estimate spatial econometric models, designed to exploit the relational structure of ~~flow data
options(repos = c(CRAN = "https://cran.rstudio.com/"))
::p_load(tmap, sf, spdep, sp, Matrix,
pacman spflow, knitr, tidyverse)
2 Importing the data
To use the functions in spflow package, we need the following R data types:
- Spatial weights
- O-D flow data as a tibble dataframe
- Explanatory variables as a tibble dataframe
<- st_read(dsn = "data/geospatial",
mpsz layer = "MPSZ-2019") %>%
st_transform(crs = 3414)
<- st_read(dsn = "data/geospatial",
busstop layer = "BusStop") %>%
st_transform(crs = 3414)
<- st_intersection(busstop, mpsz) %>%
mpsz_busstop select(BUS_STOP_N, SUBZONE_C) %>%
st_drop_geometry()
Spatial weights
Calculate centroid of each zubzone area
<- suppressWarnings({
centroids st_point_on_surface(st_geometry(mpsz_busstop))
})
Create a list of the following spatial weights metrics:
- contiguity weights
- distance-based weights
- k-nearest neighbours
<- list(
mpsz_nb "by_contiguity" = ploy2nb(mpsz_busstop),
"by_distance" = dnearneigh(centroids,
d1 = 0,
d2 = 5000),
"by_knn" = knn2nb(knearneigh(centroids, 3))
)
3 Retrieving prepared dataframes
<- read_rds("data/rds/mpsz_nb.rds")
mpsz_nb <- read_rds("data/rds/mpsz_flow.rds")
mpsz_flow <- read_rds("data/rds/mpsz_var.rds") mpsz_var
4 Creating spflow network class data
This combines neighbourhood links and flow data
<- spflow_network(
mpsz_net id_net = "sg",
node_neighborhood =
nb2mat(mpsz_nb$by_contiguity),
node_data = mpsz_var,
node_key_column = "SZ_CODE"
)
<- spflow_network_pair(
mpsz_net_pairs id_orig_net = "sg",
id_dest_net = "sg",
pair_data = mpsz_flow,
orig_key_column = "ORIGIN_SZ",
dest_key_column = "DESTIN_SZ"
)
mpsz_net_pairs
Spatial network pair with id: sg_sg
--------------------------------------------------
Origin network id: sg (with 313 nodes)
Destination network id: sg (with 313 nodes)
Number of pairs: 97969
Completeness of pairs: 100.00% (97969/97969)
Data on node-pairs:
DESTIN_SZ ORIGIN_SZ DISTANCE TRIPS
1 RVSZ05 RVSZ05 0 67
314 SRSZ01 RVSZ05 305.74 251
627 MUSZ02 RVSZ05 951.83 0
940 MPSZ05 RVSZ05 5254.07 0
1253 SISZ01 RVSZ05 4975 0
1566 BMSZ17 RVSZ05 3176.16 0
--- --- --- --- ---
96404 YSSZ07 TSSZ06 26972.97 0
96717 BSSZ01 TSSZ06 25582.48 0
97030 AMSZ05 TSSZ06 26714.79 0
97343 AMSZ04 TSSZ06 27572.74 0
97656 BSSZ02 TSSZ06 26681.7 0
97969 TSSZ06 TSSZ06 0 270
<- spflow_network_multi(mpsz_net,
mpsz_multi_net
mpsz_net_pairs)
mpsz_multi_net
Collection of spatial network nodes and pairs
--------------------------------------------------
Contains 1 spatial network nodes
With id : sg
Contains 1 spatial network pairs
With id : sg_sg
Availability of origin-destination pair information:
ID_ORIG_NET ID_DEST_NET ID_NET_PAIR COMPLETENESS C_PAIRS C_ORIG C_DEST
sg sg sg_sg 100.00% 97969/97969 313/313 313/313
4.1 Correlation Analysis
<- log(1 + TRIPS) ~
cor_formula +
BUSSTOP_COUNT +
AGE7_12 +
AGE13_24 +
AGE25_64 +
SCHOOL_COUNT +
BUSINESS_COUNT +
RETAILS_COUNT +
FINSERV_COUNT # P = impedence
P_(log(DISTANCE +1))
<- pair_cor(
cor_mat
mpsz_multi_net,spflow_formula = cor_formula,
add_lags_x = FALSE)
# creating labels for variables
colnames(cor_mat) <- paste0(
substr(
colnames(cor_mat), 1, 3), "...")
# parse to construct correlation matrix
cor_image(cor_mat)
5 Model Callibration
There are 3 key model callibrations available:
- Maximum Likelihood Estimation (MLE)
- Spatial Two-stage Least Squares (S2SLS)
- Bayesian Markov Chain Monte Carlo (MCMC)
5.1 Base Model based on MLE
<- spflow(
base_model spflow_formula = log(1 + TRIPS) ~
# origin
O_(BUSSTOP_COUNT +
+
AGE25_64) # destination
D_(SCHOOL_COUNT +
+
BUSINESS_COUNT +
RETAILS_COUNT +
FINSERV_COUNT) P_(log(DISTANCE +1)),
spflow_networks = mpsz_multi_net
)
base_model
--------------------------------------------------
Spatial interaction model estimated by: MLE
Spatial correlation structure: SDM (model_9)
Dependent variable: log(1 + TRIPS)
--------------------------------------------------
Coefficients:
est sd t.stat p.val
rho_d 0.680 0.004 192.554 0.000
rho_o 0.678 0.004 187.729 0.000
rho_w -0.396 0.006 -65.588 0.000
(Intercept) 0.410 0.065 6.265 0.000
(Intra) 1.313 0.081 16.263 0.000
D_SCHOOL_COUNT 0.017 0.002 7.885 0.000
D_SCHOOL_COUNT.lag1 0.002 0.004 0.551 0.582
D_BUSINESS_COUNT 0.000 0.000 3.015 0.003
D_BUSINESS_COUNT.lag1 0.000 0.000 -0.249 0.803
D_RETAILS_COUNT 0.000 0.000 -0.306 0.759
D_RETAILS_COUNT.lag1 0.000 0.000 0.152 0.879
D_FINSERV_COUNT 0.002 0.000 6.787 0.000
D_FINSERV_COUNT.lag1 -0.002 0.001 -3.767 0.000
O_BUSSTOP_COUNT 0.002 0.000 6.806 0.000
O_BUSSTOP_COUNT.lag1 -0.001 0.000 -2.364 0.018
O_AGE25_64 0.000 0.000 7.336 0.000
O_AGE25_64.lag1 0.000 0.000 -2.797 0.005
P_log(DISTANCE + 1) -0.050 0.007 -6.792 0.000
--------------------------------------------------
R2_corr: 0.6942936
Observations: 97969
Model coherence: Validated
Overall Model results show that:
R2_corr: 0.6942941
The model is able to account for ~69% of variations
The model results also reveal statistics and spatial lag statistics for each explanatory variable. For instance,
D_SCHOOL_COUNT p-value = 0.000
D_SCHOOL_COUNT.lag1 p-value = 0.581
The above statistics reveal that school count within a zone is statistically significant, but the spatial lag variable has p-value > 0.05, which means that neighbouting areas’ school count does not contribute to the overall ‘attractiveness’ of the area.
6 Model Diagnostics
6.1 Moran Scatterplot - Residuals disgnostic
<- par(mfrow = c(1, 3),
old_par mar = c(2, 2, 2, 2))
spflow_moran_plots(base_model)
6.2 Rerun correlation matrix with base_model
<- pair_cor(base_model)
corr_residual
colnames(corr_residual) <- substr(colnames(corr_residual),1,3)
cor_image(corr_residual)
6.3 Model Control - fine-tuning unconstrained Model
# Create formula as list and save as variable
<- log(1 + TRIPS) ~
spflow_formula # origin
O_(BUSSTOP_COUNT +
+
AGE25_64) # destination
D_(SCHOOL_COUNT +
+
BUSINESS_COUNT +
RETAILS_COUNT +
FINSERV_COUNT) P_(log(DISTANCE +1))
# model control to define methods
<- spflow_control(
model_control estimation_method = "mle",
#model_1 is unconstrained
model = "model_1")
<- spflow(
mle_model1
spflow_formula,spflow_networks = mpsz_multi_net,
estimation_control = model_control)
mle_model1
--------------------------------------------------
Spatial interaction model estimated by: OLS
Spatial correlation structure: SLX (model_1)
Dependent variable: log(1 + TRIPS)
--------------------------------------------------
Coefficients:
est sd t.stat p.val
(Intercept) 11.384 0.069 164.255 0.000
(Intra) -6.006 0.112 -53.393 0.000
D_SCHOOL_COUNT 0.093 0.003 28.599 0.000
D_SCHOOL_COUNT.lag1 0.255 0.006 44.905 0.000
D_BUSINESS_COUNT 0.001 0.000 10.036 0.000
D_BUSINESS_COUNT.lag1 0.003 0.000 18.274 0.000
D_RETAILS_COUNT 0.000 0.000 -1.940 0.052
D_RETAILS_COUNT.lag1 0.000 0.000 -2.581 0.010
D_FINSERV_COUNT 0.005 0.000 10.979 0.000
D_FINSERV_COUNT.lag1 -0.016 0.001 -17.134 0.000
O_BUSSTOP_COUNT 0.014 0.001 25.865 0.000
O_BUSSTOP_COUNT.lag1 0.015 0.001 21.728 0.000
O_AGE25_64 0.000 0.000 14.479 0.000
O_AGE25_64.lag1 0.000 0.000 14.452 0.000
P_log(DISTANCE + 1) -1.281 0.008 -165.327 0.000
--------------------------------------------------
R2_corr: 0.2831458
Observations: 97969
Model coherence: Validated
6.4 Model control - check intra-zonal model (model 8)
# Create formula as list and save as variable
<- log(1 + TRIPS) ~
spflow_formula # origin
O_(BUSSTOP_COUNT +
+
AGE25_64) # destination
D_(SCHOOL_COUNT +
+
BUSINESS_COUNT +
RETAILS_COUNT +
FINSERV_COUNT) P_(log(DISTANCE +1))
# model control to define methods
<- spflow_control(
model_control estimation_method = "mle",
#model_1 is unconstrained
model = "model_8")
<- spflow(
mle_model8
spflow_formula,spflow_networks = mpsz_multi_net,
estimation_control = model_control)
mle_model8
--------------------------------------------------
Spatial interaction model estimated by: MLE
Spatial correlation structure: SDM (model_8)
Dependent variable: log(1 + TRIPS)
--------------------------------------------------
Coefficients:
est sd t.stat p.val
rho_d 0.689 0.003 196.831 0.000
rho_o 0.687 0.004 192.214 0.000
rho_w -0.473 0.003 -142.469 0.000
(Intercept) 1.086 0.049 22.275 0.000
(Intra) 0.840 0.075 11.255 0.000
D_SCHOOL_COUNT 0.019 0.002 8.896 0.000
D_SCHOOL_COUNT.lag1 0.019 0.004 5.130 0.000
D_BUSINESS_COUNT 0.000 0.000 3.328 0.001
D_BUSINESS_COUNT.lag1 0.000 0.000 1.664 0.096
D_RETAILS_COUNT 0.000 0.000 -0.414 0.679
D_RETAILS_COUNT.lag1 0.000 0.000 -0.171 0.864
D_FINSERV_COUNT 0.002 0.000 6.150 0.000
D_FINSERV_COUNT.lag1 -0.003 0.001 -4.601 0.000
O_BUSSTOP_COUNT 0.003 0.000 7.676 0.000
O_BUSSTOP_COUNT.lag1 0.000 0.000 0.552 0.581
O_AGE25_64 0.000 0.000 6.870 0.000
O_AGE25_64.lag1 0.000 0.000 -0.462 0.644
P_log(DISTANCE + 1) -0.125 0.005 -22.865 0.000
--------------------------------------------------
R2_corr: 0.6965974
Observations: 97969
Model coherence: Validated
Results reveal:
R2_corr: 0.6965974