> library(magrittr)
Reading and reorganizing the data:
> sero <- readxl::read_excel("Sero.xlsx", "All data") %>%
+ dplyr::select(Typo3A, `Centr CAT2`, All, Pop, Tested, `Pos Delta`, Centrality, Shape_Leng, born_in_th) %>%
+ dplyr::rename(cases = All,
+ popsize = Pop,
+ area = Shape_Leng,
+ centrality = Centrality,
+ jenks = `Centr CAT2`,
+ typology = Typo3A,
+ tested = Tested,
+ seropositives = `Pos Delta`,
+ borne_here = born_in_th) %>%
+ dplyr::mutate(seronegatives = tested - seropositives,
+ incidence = cases / popsize,
+ jenks = ordered(jenks, 0:2),
+ typology = factor(dplyr::recode(typology, `0` = "rural",
+ `1` = "urban core",
+ `2` = "first periphery",
+ `3` = "second periphery",
+ `4` = "new settlement",
+ `5` = "old settlement"),
+ c("rural", "urban core", "first periphery", "second periphery", "new settlement", "old settlement")),
+ city = as.character(typology) %in% c("urban core", "first periphery", "second periphery"),
+ centrality = 1e6 * centrality / area,
+ sqrtincidence = sqrt(incidence),
+ sqrtcentrality = sqrt(centrality),
+ seroprevalence = seropositives / tested,
+ urban_core = typology == "urban core")
Exploring the continuous independent variables:
> hist(sero$incidence, n = 15, col = "grey", xlab = "incidence", ylab = "number of villages", main = NA)
> hist(sero$sqrtincidence, n = 15, col = "grey", xlab = "sqrt(incidence)", ylab = "number of villages", main = NA)
> hist(sero$centrality, n = 15, col = "grey", xlab = "centrality", ylab = "number of villages", main = NA)
> hist(sero$sqrtcentrality, n = 15, col = "grey", xlab = "sqrt(centrality)", ylab = "number of villages", main = NA)
A binomial model with incidence
, centrality
and typology
:
> mod0 <- glm(cbind(seropositives, seronegatives) ~ incidence + I(incidence^2) + centrality + I(centrality^2) + typology, binomial, sero)
> car::Anova(mod0, test = "LR")
Analysis of Deviance Table (Type II tests)
Response: cbind(seropositives, seronegatives)
LR Chisq Df Pr(>Chisq)
incidence 0.3938 1 0.53030
I(incidence^2) 0.4303 1 0.51185
centrality 0.1112 1 0.73881
I(centrality^2) 0.0191 1 0.88998
typology 11.4483 5 0.04318 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> mod1 <- glm(cbind(seropositives, seronegatives) ~ sqrtincidence + I(sqrtincidence^2) + sqrtcentrality + I(sqrtcentrality^2) + typology, binomial, sero)
> car::Anova(mod1, test = "LR")
Analysis of Deviance Table (Type II tests)
Response: cbind(seropositives, seronegatives)
LR Chisq Df Pr(>Chisq)
sqrtincidence 0.1504 1 0.69816
I(sqrtincidence^2) 0.1910 1 0.66212
sqrtcentrality 0.2567 1 0.61241
I(sqrtcentrality^2) 0.0374 1 0.84660
typology 12.1830 5 0.03236 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
In the city only:
> mod2 <- glm(cbind(seropositives, seronegatives) ~ incidence + I(incidence^2) + centrality + I(centrality^2) + typology, binomial, dplyr::filter(sero, city))
> car::Anova(mod2, test = "LR")
Analysis of Deviance Table (Type II tests)
Response: cbind(seropositives, seronegatives)
LR Chisq Df Pr(>Chisq)
incidence 0.030538 1 0.8613
I(incidence^2) 0.044870 1 0.8322
centrality 0.061526 1 0.8041
I(centrality^2) 0.003416 1 0.9534
typology 0.254226 2 0.8806
> mod3 <- glm(cbind(seropositives, seronegatives) ~ sqrtincidence + I(sqrtincidence^2) + sqrtcentrality + I(sqrtcentrality^2) + typology, binomial, dplyr::filter(sero, city))
> car::Anova(mod3, test = "LR")
Analysis of Deviance Table (Type II tests)
Response: cbind(seropositives, seronegatives)
LR Chisq Df Pr(>Chisq)
sqrtincidence 1.01217 1 0.3144
I(sqrtincidence^2) 0.98016 1 0.3222
sqrtcentrality 0.01721 1 0.8956
I(sqrtcentrality^2) 0.11469 1 0.7349
typology 0.45172 2 0.7978
> mod4 <- glm(cbind(seropositives, seronegatives) ~ typology, binomial, sero)
> summary(mod4)
Call:
glm(formula = cbind(seropositives, seronegatives) ~ typology,
family = binomial, data = sero)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.0607 -0.8787 -0.6213 0.6579 2.6381
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.5468 0.1167 -13.250 <2e-16 ***
typologyurban core 0.4166 0.1720 2.423 0.0154 *
typologyfirst periphery 0.4314 0.1901 2.270 0.0232 *
typologysecond periphery 0.3375 0.2207 1.529 0.1263
typologynew settlement -15.4688 1001.4499 -0.015 0.9877
typologyold settlement -0.3991 0.7649 -0.522 0.6019
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 299.20 on 278 degrees of freedom
Residual deviance: 285.87 on 273 degrees of freedom
(191 observations deleted due to missingness)
AIC: 553.24
Number of Fisher Scoring iterations: 15
> mod5 <- glm(cbind(cases, popsize - cases) ~ typology + centrality + borne_here, binomial, sero)
> summary(mod5)
Call:
glm(formula = cbind(cases, popsize - cases) ~ typology + centrality +
borne_here, family = binomial, data = sero)
Deviance Residuals:
Min 1Q Median 3Q Max
-6.6521 -1.7999 -0.8944 0.5738 10.1817
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -5.286e+00 8.407e-02 -62.877 <2e-16 ***
typologyurban core 4.891e-01 5.644e-02 8.665 <2e-16 ***
typologyfirst periphery 7.258e-01 5.568e-02 13.034 <2e-16 ***
typologysecond periphery 7.540e-01 6.015e-02 12.535 <2e-16 ***
typologynew settlement 3.098e-01 1.707e-01 1.815 0.0695 .
typologyold settlement -3.931e-02 1.282e-01 -0.307 0.7592
centrality -2.571e-05 8.334e-04 -0.031 0.9754
borne_here -1.277e-02 1.208e-03 -10.566 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 2910.9 on 469 degrees of freedom
Residual deviance: 2344.5 on 462 degrees of freedom
AIC: 3462.8
Number of Fisher Scoring iterations: 5
> exp(coef(mod5))
(Intercept) typologyurban core typologyfirst periphery typologysecond periphery typologynew settlement
0.005060218 1.630776213 2.066420380 2.125533648 1.363177788
typologyold settlement centrality borne_here
0.961456537 0.999974286 0.987313807
> car::Anova(mod5, test = "LR")
Analysis of Deviance Table (Type II tests)
Response: cbind(cases, popsize - cases)
LR Chisq Df Pr(>Chisq)
typology 247.807 5 <2e-16 ***
centrality 0.001 1 0.9754
borne_here 109.113 1 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> mod6 <- glm(cbind(cases, popsize - cases) ~ typology + centrality + borne_here + seroprevalence, binomial, dplyr::filter(sero, tested > 10))
> summary(mod6)
Call:
glm(formula = cbind(cases, popsize - cases) ~ typology + centrality +
borne_here + seroprevalence, family = binomial, data = dplyr::filter(sero,
tested > 10))
Deviance Residuals:
Min 1Q Median 3Q Max
-5.7446 -1.4808 -0.0846 1.2082 5.9258
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -5.836967 0.163008 -35.808 < 2e-16 ***
typologyurban core 0.262889 0.119320 2.203 0.0276 *
typologyfirst periphery 0.571345 0.102405 5.579 2.42e-08 ***
typologysecond periphery 1.209321 0.098862 12.232 < 2e-16 ***
centrality -0.025469 0.004935 -5.161 2.46e-07 ***
borne_here 0.003169 0.002609 1.215 0.2244
seroprevalence 0.630308 0.314805 2.002 0.0453 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 413.10 on 35 degrees of freedom
Residual deviance: 220.93 on 29 degrees of freedom
AIC: 399.04
Number of Fisher Scoring iterations: 5
> car::Anova(mod6, test = "LR")
Analysis of Deviance Table (Type II tests)
Response: cbind(cases, popsize - cases)
LR Chisq Df Pr(>Chisq)
typology 174.505 3 < 2.2e-16 ***
centrality 30.407 1 3.502e-08 ***
borne_here 1.472 1 0.22507
seroprevalence 3.983 1 0.04595 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> mod7 <- glm(cbind(cases, popsize - cases) ~ typology*seroprevalence + centrality + borne_here, binomial, dplyr::filter(sero, tested > 10))
> summary(mod7)
Call:
glm(formula = cbind(cases, popsize - cases) ~ typology * seroprevalence +
centrality + borne_here, family = binomial, data = dplyr::filter(sero,
tested > 10))
Deviance Residuals:
Min 1Q Median 3Q Max
-6.0218 -1.4125 -0.1473 1.1161 5.6388
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -6.038100 0.201539 -29.960 < 2e-16 ***
typologyurban core 0.272866 0.273814 0.997 0.3190
typologyfirst periphery 1.004057 0.239202 4.198 2.70e-05 ***
typologysecond periphery 1.488755 0.201410 7.392 1.45e-13 ***
seroprevalence 1.576292 0.629580 2.504 0.0123 *
centrality -0.027470 0.005378 -5.108 3.26e-07 ***
borne_here 0.003170 0.002759 1.149 0.2505
typologyurban core:seroprevalence 0.020707 1.195367 0.017 0.9862
typologyfirst periphery:seroprevalence -1.766342 0.884652 -1.997 0.0459 *
typologysecond periphery:seroprevalence -1.223436 0.804747 -1.520 0.1284
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 413.10 on 35 degrees of freedom
Residual deviance: 215.68 on 26 degrees of freedom
AIC: 399.79
Number of Fisher Scoring iterations: 5
> car::Anova(mod7, test = "LR")
Analysis of Deviance Table (Type II tests)
Response: cbind(cases, popsize - cases)
LR Chisq Df Pr(>Chisq)
typology 174.505 3 < 2.2e-16 ***
seroprevalence 3.983 1 0.04595 *
centrality 29.441 1 5.764e-08 ***
borne_here 1.317 1 0.25105
typology:seroprevalence 5.253 3 0.15420
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> mod7 <- glm(cbind(cases, popsize - cases) ~ urban_core*seroprevalence + centrality + borne_here, binomial, dplyr::filter(sero, tested > 10))
> car::Anova(mod7, test = "LR")
Analysis of Deviance Table (Type II tests)
Response: cbind(cases, popsize - cases)
LR Chisq Df Pr(>Chisq)
urban_core 15.9595 1 6.471e-05 ***
seroprevalence 9.0913 1 0.002568 **
centrality 0.1725 1 0.677919
borne_here 1.6330 1 0.201295
urban_core:seroprevalence 2.0650 1 0.150715
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> serotypes <- readxl::read_excel("serotypes denv.xls", "Sheet1") %>%
+ dplyr::select(Age, Dengue, Typo) %>%
+ dplyr::mutate(typology = factor(dplyr::recode(Typo, `0` = "rural",
+ `1` = "urban core",
+ `2` = "first periphery",
+ `3` = "second periphery",
+ `4` = "new settlement",
+ `5` = "old settlement"),
+ c("rural", "urban core", "first periphery", "second periphery", "new settlement", "old settlement")),
+ dengue = Dengue > 0) %>%
+ dplyr::select(dengue, Age, typology)
> model <- glm(dengue ~ Age + typology, binomial, serotypes)
> car::Anova(model, test = "LR")
Analysis of Deviance Table (Type II tests)
Response: dengue
LR Chisq Df Pr(>Chisq)
Age 89.967 1 < 2e-16 ***
typology 14.383 5 0.01335 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> summary(model)
Call:
glm(formula = dengue ~ Age + typology, family = binomial, data = serotypes)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.3606 -0.6995 -0.5479 -0.3686 2.4208
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.810097 0.192694 -14.583 <2e-16 ***
Age 0.035338 0.003872 9.126 <2e-16 ***
typologyurban core 0.528583 0.174148 3.035 0.0024 **
typologyfirst periphery 0.380191 0.200394 1.897 0.0578 .
typologysecond periphery 0.380551 0.231502 1.644 0.1002
typologynew settlement -13.708353 457.568461 -0.030 0.9761
typologyold settlement -0.347874 0.783010 -0.444 0.6568
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1344.9 on 1306 degrees of freedom
Residual deviance: 1239.4 on 1300 degrees of freedom
(5 observations deleted due to missingness)
AIC: 1253.4
Number of Fisher Scoring iterations: 14