> library(magrittr)

Reading and reorganizing the data:

> sero <- readxl::read_excel("Sero.xlsx", "All data") %>% 
+   dplyr::select(Typo3A, `Centr CAT2`, All, Pop, Tested, `Pos Delta`, Centrality, Shape_Leng, born_in_th) %>% 
+   dplyr::rename(cases            = All,
+                 popsize          = Pop,
+                 area             = Shape_Leng,
+                 centrality       = Centrality,
+                 jenks            = `Centr CAT2`,
+                 typology         = Typo3A,
+                 tested           = Tested,
+                 seropositives    = `Pos Delta`,
+                 borne_here       = born_in_th) %>% 
+   dplyr::mutate(seronegatives    = tested - seropositives,
+                 incidence        = cases / popsize, 
+                 jenks            = ordered(jenks, 0:2),
+                 typology         = factor(dplyr::recode(typology, `0` = "rural",
+                                                                   `1` = "urban core",
+                                                                   `2` = "first periphery",
+                                                                   `3` = "second periphery",
+                                                                   `4` = "new settlement",
+                                                                   `5` = "old settlement"),
+                                           c("rural", "urban core", "first periphery", "second periphery", "new settlement", "old settlement")),
+                 city             = as.character(typology) %in% c("urban core", "first periphery", "second periphery"),
+                 centrality       = 1e6 * centrality / area,
+                 sqrtincidence    = sqrt(incidence),
+                 sqrtcentrality   = sqrt(centrality),
+                 seroprevalence   = seropositives / tested,
+                 urban_core       = typology == "urban core")

Exploring the continuous independent variables:

> hist(sero$incidence, n = 15, col = "grey", xlab = "incidence", ylab = "number of villages", main = NA)

> hist(sero$sqrtincidence, n = 15, col = "grey", xlab = "sqrt(incidence)", ylab = "number of villages", main = NA)

> hist(sero$centrality, n = 15, col = "grey", xlab = "centrality", ylab = "number of villages", main = NA)

> hist(sero$sqrtcentrality, n = 15, col = "grey", xlab = "sqrt(centrality)", ylab = "number of villages", main = NA)

A binomial model with incidence, centrality and typology:

> mod0 <- glm(cbind(seropositives, seronegatives) ~ incidence + I(incidence^2) + centrality + I(centrality^2) + typology, binomial, sero)
> car::Anova(mod0, test = "LR")
Analysis of Deviance Table (Type II tests)

Response: cbind(seropositives, seronegatives)
                LR Chisq Df Pr(>Chisq)  
incidence         0.3938  1    0.53030  
I(incidence^2)    0.4303  1    0.51185  
centrality        0.1112  1    0.73881  
I(centrality^2)   0.0191  1    0.88998  
typology         11.4483  5    0.04318 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> mod1 <- glm(cbind(seropositives, seronegatives) ~ sqrtincidence + I(sqrtincidence^2) + sqrtcentrality + I(sqrtcentrality^2) + typology, binomial, sero)
> car::Anova(mod1, test = "LR")
Analysis of Deviance Table (Type II tests)

Response: cbind(seropositives, seronegatives)
                    LR Chisq Df Pr(>Chisq)  
sqrtincidence         0.1504  1    0.69816  
I(sqrtincidence^2)    0.1910  1    0.66212  
sqrtcentrality        0.2567  1    0.61241  
I(sqrtcentrality^2)   0.0374  1    0.84660  
typology             12.1830  5    0.03236 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

In the city only:

> mod2 <- glm(cbind(seropositives, seronegatives) ~ incidence + I(incidence^2) + centrality + I(centrality^2) + typology, binomial, dplyr::filter(sero, city))
> car::Anova(mod2, test = "LR")
Analysis of Deviance Table (Type II tests)

Response: cbind(seropositives, seronegatives)
                LR Chisq Df Pr(>Chisq)
incidence       0.030538  1     0.8613
I(incidence^2)  0.044870  1     0.8322
centrality      0.061526  1     0.8041
I(centrality^2) 0.003416  1     0.9534
typology        0.254226  2     0.8806
> mod3 <- glm(cbind(seropositives, seronegatives) ~ sqrtincidence + I(sqrtincidence^2) + sqrtcentrality + I(sqrtcentrality^2) + typology, binomial, dplyr::filter(sero, city))
> car::Anova(mod3, test = "LR")
Analysis of Deviance Table (Type II tests)

Response: cbind(seropositives, seronegatives)
                    LR Chisq Df Pr(>Chisq)
sqrtincidence        1.01217  1     0.3144
I(sqrtincidence^2)   0.98016  1     0.3222
sqrtcentrality       0.01721  1     0.8956
I(sqrtcentrality^2)  0.11469  1     0.7349
typology             0.45172  2     0.7978
> mod4 <- glm(cbind(seropositives, seronegatives) ~ typology, binomial, sero)
> summary(mod4)

Call:
glm(formula = cbind(seropositives, seronegatives) ~ typology, 
    family = binomial, data = sero)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.0607  -0.8787  -0.6213   0.6579   2.6381  

Coefficients:
                          Estimate Std. Error z value Pr(>|z|)    
(Intercept)                -1.5468     0.1167 -13.250   <2e-16 ***
typologyurban core          0.4166     0.1720   2.423   0.0154 *  
typologyfirst periphery     0.4314     0.1901   2.270   0.0232 *  
typologysecond periphery    0.3375     0.2207   1.529   0.1263    
typologynew settlement    -15.4688  1001.4499  -0.015   0.9877    
typologyold settlement     -0.3991     0.7649  -0.522   0.6019    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 299.20  on 278  degrees of freedom
Residual deviance: 285.87  on 273  degrees of freedom
  (191 observations deleted due to missingness)
AIC: 553.24

Number of Fisher Scoring iterations: 15

Incidence

> mod5 <- glm(cbind(cases, popsize - cases) ~ typology + centrality + borne_here, binomial, sero)
> summary(mod5)

Call:
glm(formula = cbind(cases, popsize - cases) ~ typology + centrality + 
    borne_here, family = binomial, data = sero)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-6.6521  -1.7999  -0.8944   0.5738  10.1817  

Coefficients:
                           Estimate Std. Error z value Pr(>|z|)    
(Intercept)              -5.286e+00  8.407e-02 -62.877   <2e-16 ***
typologyurban core        4.891e-01  5.644e-02   8.665   <2e-16 ***
typologyfirst periphery   7.258e-01  5.568e-02  13.034   <2e-16 ***
typologysecond periphery  7.540e-01  6.015e-02  12.535   <2e-16 ***
typologynew settlement    3.098e-01  1.707e-01   1.815   0.0695 .  
typologyold settlement   -3.931e-02  1.282e-01  -0.307   0.7592    
centrality               -2.571e-05  8.334e-04  -0.031   0.9754    
borne_here               -1.277e-02  1.208e-03 -10.566   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 2910.9  on 469  degrees of freedom
Residual deviance: 2344.5  on 462  degrees of freedom
AIC: 3462.8

Number of Fisher Scoring iterations: 5
> exp(coef(mod5))
             (Intercept)       typologyurban core  typologyfirst periphery typologysecond periphery   typologynew settlement 
             0.005060218              1.630776213              2.066420380              2.125533648              1.363177788 
  typologyold settlement               centrality               borne_here 
             0.961456537              0.999974286              0.987313807 
> car::Anova(mod5, test = "LR")
Analysis of Deviance Table (Type II tests)

Response: cbind(cases, popsize - cases)
           LR Chisq Df Pr(>Chisq)    
typology    247.807  5     <2e-16 ***
centrality    0.001  1     0.9754    
borne_here  109.113  1     <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> mod6 <- glm(cbind(cases, popsize - cases) ~ typology + centrality + borne_here + seroprevalence, binomial, dplyr::filter(sero, tested > 10))
> summary(mod6)

Call:
glm(formula = cbind(cases, popsize - cases) ~ typology + centrality + 
    borne_here + seroprevalence, family = binomial, data = dplyr::filter(sero, 
    tested > 10))

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-5.7446  -1.4808  -0.0846   1.2082   5.9258  

Coefficients:
                          Estimate Std. Error z value Pr(>|z|)    
(Intercept)              -5.836967   0.163008 -35.808  < 2e-16 ***
typologyurban core        0.262889   0.119320   2.203   0.0276 *  
typologyfirst periphery   0.571345   0.102405   5.579 2.42e-08 ***
typologysecond periphery  1.209321   0.098862  12.232  < 2e-16 ***
centrality               -0.025469   0.004935  -5.161 2.46e-07 ***
borne_here                0.003169   0.002609   1.215   0.2244    
seroprevalence            0.630308   0.314805   2.002   0.0453 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 413.10  on 35  degrees of freedom
Residual deviance: 220.93  on 29  degrees of freedom
AIC: 399.04

Number of Fisher Scoring iterations: 5
> car::Anova(mod6, test = "LR")
Analysis of Deviance Table (Type II tests)

Response: cbind(cases, popsize - cases)
               LR Chisq Df Pr(>Chisq)    
typology        174.505  3  < 2.2e-16 ***
centrality       30.407  1  3.502e-08 ***
borne_here        1.472  1    0.22507    
seroprevalence    3.983  1    0.04595 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> mod7 <- glm(cbind(cases, popsize - cases) ~ typology*seroprevalence + centrality + borne_here, binomial, dplyr::filter(sero, tested > 10))
> summary(mod7)

Call:
glm(formula = cbind(cases, popsize - cases) ~ typology * seroprevalence + 
    centrality + borne_here, family = binomial, data = dplyr::filter(sero, 
    tested > 10))

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-6.0218  -1.4125  -0.1473   1.1161   5.6388  

Coefficients:
                                         Estimate Std. Error z value Pr(>|z|)    
(Intercept)                             -6.038100   0.201539 -29.960  < 2e-16 ***
typologyurban core                       0.272866   0.273814   0.997   0.3190    
typologyfirst periphery                  1.004057   0.239202   4.198 2.70e-05 ***
typologysecond periphery                 1.488755   0.201410   7.392 1.45e-13 ***
seroprevalence                           1.576292   0.629580   2.504   0.0123 *  
centrality                              -0.027470   0.005378  -5.108 3.26e-07 ***
borne_here                               0.003170   0.002759   1.149   0.2505    
typologyurban core:seroprevalence        0.020707   1.195367   0.017   0.9862    
typologyfirst periphery:seroprevalence  -1.766342   0.884652  -1.997   0.0459 *  
typologysecond periphery:seroprevalence -1.223436   0.804747  -1.520   0.1284    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 413.10  on 35  degrees of freedom
Residual deviance: 215.68  on 26  degrees of freedom
AIC: 399.79

Number of Fisher Scoring iterations: 5
> car::Anova(mod7, test = "LR")
Analysis of Deviance Table (Type II tests)

Response: cbind(cases, popsize - cases)
                        LR Chisq Df Pr(>Chisq)    
typology                 174.505  3  < 2.2e-16 ***
seroprevalence             3.983  1    0.04595 *  
centrality                29.441  1  5.764e-08 ***
borne_here                 1.317  1    0.25105    
typology:seroprevalence    5.253  3    0.15420    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> mod7 <- glm(cbind(cases, popsize - cases) ~ urban_core*seroprevalence + centrality + borne_here, binomial, dplyr::filter(sero, tested > 10))
> car::Anova(mod7, test = "LR")
Analysis of Deviance Table (Type II tests)

Response: cbind(cases, popsize - cases)
                          LR Chisq Df Pr(>Chisq)    
urban_core                 15.9595  1  6.471e-05 ***
seroprevalence              9.0913  1   0.002568 ** 
centrality                  0.1725  1   0.677919    
borne_here                  1.6330  1   0.201295    
urban_core:seroprevalence   2.0650  1   0.150715    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> serotypes <- readxl::read_excel("serotypes denv.xls", "Sheet1") %>% 
+   dplyr::select(Age, Dengue, Typo) %>% 
+   dplyr::mutate(typology = factor(dplyr::recode(Typo, `0` = "rural",
+                                                       `1` = "urban core",
+                                                       `2` = "first periphery",
+                                                       `3` = "second periphery",
+                                                       `4` = "new settlement",
+                                                       `5` = "old settlement"),
+                                   c("rural", "urban core", "first periphery", "second periphery", "new settlement", "old settlement")),
+                 dengue = Dengue > 0) %>% 
+   dplyr::select(dengue, Age, typology)
> model <- glm(dengue ~ Age + typology, binomial, serotypes)
> car::Anova(model, test = "LR")
Analysis of Deviance Table (Type II tests)

Response: dengue
         LR Chisq Df Pr(>Chisq)    
Age        89.967  1    < 2e-16 ***
typology   14.383  5    0.01335 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> summary(model)

Call:
glm(formula = dengue ~ Age + typology, family = binomial, data = serotypes)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.3606  -0.6995  -0.5479  -0.3686   2.4208  

Coefficients:
                           Estimate Std. Error z value Pr(>|z|)    
(Intercept)               -2.810097   0.192694 -14.583   <2e-16 ***
Age                        0.035338   0.003872   9.126   <2e-16 ***
typologyurban core         0.528583   0.174148   3.035   0.0024 ** 
typologyfirst periphery    0.380191   0.200394   1.897   0.0578 .  
typologysecond periphery   0.380551   0.231502   1.644   0.1002    
typologynew settlement   -13.708353 457.568461  -0.030   0.9761    
typologyold settlement    -0.347874   0.783010  -0.444   0.6568    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1344.9  on 1306  degrees of freedom
Residual deviance: 1239.4  on 1300  degrees of freedom
  (5 observations deleted due to missingness)
AIC: 1253.4

Number of Fisher Scoring iterations: 14