Downloading the data:
> tmp <- tempfile(fileext = "xlsx")
> download.file("https://www.dropbox.com/s/3uq5zaqt7tdqro8/Census%202015%20village%20dengue%20and%20built%20Up.xlsx?raw=1", tmp)
> data <- readxl::read_excel(tmp)
Renaming the variables (replacing spaces with _
)
> names(data) <- gsub(" +", "_", names(data))
> mod1 <- glm(All ~ as.factor(Urban_Type), poisson, data, offset = log(nb_person_2014))
> summary(mod1)
Call:
glm(formula = All ~ as.factor(Urban_Type), family = poisson,
data = data, offset = log(nb_person_2014))
Deviance Residuals:
Min 1Q Median 3Q Max
-7.2074 -2.1010 -1.0423 0.6472 12.3554
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -6.00909 0.03398 -176.840 < 2e-16 ***
as.factor(Urban_Type)1 1.06981 0.04596 23.276 < 2e-16 ***
as.factor(Urban_Type)2 1.19734 0.05288 22.641 < 2e-16 ***
as.factor(Urban_Type)3 1.28481 0.04209 30.522 < 2e-16 ***
as.factor(Urban_Type)4 0.48333 0.11822 4.088 4.34e-05 ***
as.factor(Urban_Type)5 0.09049 0.12187 0.742 0.458
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 4406.3 on 466 degrees of freedom
Residual deviance: 3135.2 on 461 degrees of freedom
(1 observation deleted due to missingness)
AIC: 4404
Number of Fisher Scoring iterations: 5
> anova(mod1, test = "LRT")
Analysis of Deviance Table
Model: poisson, link: log
Response: All
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 466 4406.3
as.factor(Urban_Type) 5 1271 461 3135.2 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> mod2 <- glm(All ~ ordered(Born_in_this_place_CAT) + as.factor(Urban_Type), poisson, data, offset = log(nb_person_2014))
> summary(mod2)
Call:
glm(formula = All ~ ordered(Born_in_this_place_CAT) + as.factor(Urban_Type),
family = poisson, data = data, offset = log(nb_person_2014))
Deviance Residuals:
Min 1Q Median 3Q Max
-7.5544 -1.9716 -0.9430 0.8479 12.0840
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -6.01108 0.03419 -175.809 < 2e-16 ***
ordered(Born_in_this_place_CAT).L -0.39102 0.05048 -7.747 9.44e-15 ***
ordered(Born_in_this_place_CAT).Q -0.10949 0.04119 -2.658 0.007855 **
ordered(Born_in_this_place_CAT).C -0.24938 0.03420 -7.292 3.05e-13 ***
as.factor(Urban_Type)1 0.95690 0.04927 19.420 < 2e-16 ***
as.factor(Urban_Type)2 1.09156 0.05437 20.075 < 2e-16 ***
as.factor(Urban_Type)3 1.12489 0.04609 24.408 < 2e-16 ***
as.factor(Urban_Type)4 0.39742 0.11891 3.342 0.000831 ***
as.factor(Urban_Type)5 0.15655 0.12240 1.279 0.200893
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 4406.3 on 466 degrees of freedom
Residual deviance: 3012.4 on 458 degrees of freedom
(1 observation deleted due to missingness)
AIC: 4287.2
Number of Fisher Scoring iterations: 5
> anova(mod2, test = "LRT")
Analysis of Deviance Table
Model: poisson, link: log
Response: All
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 466 4406.3
ordered(Born_in_this_place_CAT) 3 590.82 463 3815.5 < 2.2e-16 ***
as.factor(Urban_Type) 5 803.03 458 3012.4 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> cbind(exp(coef(mod2)), exp(confint(mod2)))
Waiting for profiling to be done...
2.5 % 97.5 %
(Intercept) 0.00245144 0.002290814 0.002619419
ordered(Born_in_this_place_CAT).L 0.67636524 0.611951893 0.745895518
ordered(Born_in_this_place_CAT).Q 0.89629244 0.826259467 0.971088876
ordered(Born_in_this_place_CAT).C 0.77928207 0.728795412 0.833363337
as.factor(Urban_Type)1 2.60360851 2.364356655 2.868206929
as.factor(Urban_Type)2 2.97892165 2.677051802 3.313162509
as.factor(Urban_Type)3 3.07989064 2.814920658 3.372375475
as.factor(Urban_Type)4 1.48798377 1.169570818 1.865281323
as.factor(Urban_Type)5 1.16947253 0.912396957 1.475252702
> mod3 <- glm(All ~ Moved_since_2005 + as.factor(Urban_Type), poisson, data, offset = log(nb_person_2014))
> summary(mod3)
Call:
glm(formula = All ~ Moved_since_2005 + as.factor(Urban_Type),
family = poisson, data = data, offset = log(nb_person_2014))
Deviance Residuals:
Min 1Q Median 3Q Max
-6.9869 -2.0860 -1.0100 0.6855 13.2081
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -6.217870 0.054921 -113.216 < 2e-16 ***
Moved_since_2005 0.007195 0.001473 4.886 1.03e-06 ***
as.factor(Urban_Type)1 1.059865 0.046013 23.034 < 2e-16 ***
as.factor(Urban_Type)2 1.210107 0.052948 22.854 < 2e-16 ***
as.factor(Urban_Type)3 1.230262 0.043613 28.209 < 2e-16 ***
as.factor(Urban_Type)4 0.460222 0.118308 3.890 0.0001 ***
as.factor(Urban_Type)5 0.101038 0.121884 0.829 0.4071
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 4406.3 on 466 degrees of freedom
Residual deviance: 3112.4 on 460 degrees of freedom
(1 observation deleted due to missingness)
AIC: 4383.2
Number of Fisher Scoring iterations: 5
> anova(mod3, test = "LRT")
Analysis of Deviance Table
Model: poisson, link: log
Response: All
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 466 4406.3
Moved_since_2005 1 133.98 465 4272.3 < 2.2e-16 ***
as.factor(Urban_Type) 5 1159.94 460 3112.4 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> cbind(exp(coef(mod3)), exp(confint(mod3)))
Waiting for profiling to be done...
2.5 % 97.5 %
(Intercept) 0.001993486 0.001789998 0.002220011
Moved_since_2005 1.007221202 1.004292270 1.010106319
as.factor(Urban_Type)1 2.885981086 2.637389267 3.158798935
as.factor(Urban_Type)2 3.353843973 3.022224590 3.719530064
as.factor(Urban_Type)3 3.422125225 3.142727522 3.728755754
as.factor(Urban_Type)4 1.584425015 1.246759589 1.983668507
as.factor(Urban_Type)5 1.106318731 0.863941169 1.394075325
> with(data, plot(Moved_since_2005, log(All)))
> with(data, plot(Moved_since_2005, log(All / nb_person_2014)))
> with(data, plot(log(Moved_since_2005), log(All / nb_person_2014)))
> mod5 <- glm(All ~ Water_In_House + Moved_since_2005 + as.factor(Urban_Type), poisson, data, offset = log(nb_person_2014))
> summary(mod5)
Call:
glm(formula = All ~ Water_In_House + Moved_since_2005 + as.factor(Urban_Type),
family = poisson, data = data, offset = log(nb_person_2014))
Deviance Residuals:
Min 1Q Median 3Q Max
-6.9514 -2.0786 -1.0327 0.6984 13.1186
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -6.084182 0.096056 -63.340 < 2e-16 ***
Water_In_House -0.007315 0.004328 -1.690 0.091 .
Moved_since_2005 0.007155 0.001473 4.859 1.18e-06 ***
as.factor(Urban_Type)1 1.064819 0.046106 23.095 < 2e-16 ***
as.factor(Urban_Type)2 1.222544 0.053495 22.853 < 2e-16 ***
as.factor(Urban_Type)3 1.240109 0.044001 28.184 < 2e-16 ***
as.factor(Urban_Type)4 0.467335 0.118393 3.947 7.90e-05 ***
as.factor(Urban_Type)5 0.117710 0.122302 0.962 0.336
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 4406.3 on 466 degrees of freedom
Residual deviance: 3109.5 on 459 degrees of freedom
(1 observation deleted due to missingness)
AIC: 4382.3
Number of Fisher Scoring iterations: 5
> anova(mod5, test = "LRT")
Analysis of Deviance Table
Model: poisson, link: log
Response: All
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 466 4406.3
Water_In_House 1 8.89 465 4397.4 0.002865 **
Moved_since_2005 1 135.74 464 4261.7 < 2.2e-16 ***
as.factor(Urban_Type) 5 1152.11 459 3109.5 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> cbind(exp(coef(mod5)), exp(confint(mod5)))
Waiting for profiling to be done...
2.5 % 97.5 %
(Intercept) 0.002278628 0.001885543 0.002747694
Water_In_House 0.992711517 0.984377577 1.001219601
Moved_since_2005 1.007180748 1.004252018 1.010065812
as.factor(Urban_Type)1 2.900312841 2.650005643 3.175056921
as.factor(Urban_Type)2 3.395814509 3.056823118 3.770175891
as.factor(Urban_Type)3 3.455990255 3.171391962 3.768498597
as.factor(Urban_Type)4 1.595735458 1.255465657 1.998183228
as.factor(Urban_Type)5 1.124917557 0.877796947 1.418753844
> mod6 <- glm(All ~ Water_In_House + as.factor(Urban_Type) + Moved_since_2005, poisson, data, offset = log(nb_person_2014))
> summary(mod6)
Call:
glm(formula = All ~ Water_In_House + as.factor(Urban_Type) +
Moved_since_2005, family = poisson, data = data, offset = log(nb_person_2014))
Deviance Residuals:
Min 1Q Median 3Q Max
-6.9514 -2.0786 -1.0327 0.6984 13.1186
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -6.084182 0.096056 -63.340 < 2e-16 ***
Water_In_House -0.007315 0.004328 -1.690 0.091 .
as.factor(Urban_Type)1 1.064819 0.046106 23.095 < 2e-16 ***
as.factor(Urban_Type)2 1.222544 0.053495 22.853 < 2e-16 ***
as.factor(Urban_Type)3 1.240109 0.044001 28.184 < 2e-16 ***
as.factor(Urban_Type)4 0.467335 0.118393 3.947 7.90e-05 ***
as.factor(Urban_Type)5 0.117710 0.122302 0.962 0.336
Moved_since_2005 0.007155 0.001473 4.859 1.18e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 4406.3 on 466 degrees of freedom
Residual deviance: 3109.5 on 459 degrees of freedom
(1 observation deleted due to missingness)
AIC: 4382.3
Number of Fisher Scoring iterations: 5
> anova(mod6, test = "LRT")
Analysis of Deviance Table
Model: poisson, link: log
Response: All
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 466 4406.3
Water_In_House 1 8.89 465 4397.4 0.002865 **
as.factor(Urban_Type) 5 1265.23 460 3132.2 < 2.2e-16 ***
Moved_since_2005 1 22.63 459 3109.5 1.969e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> cbind(exp(coef(mod6)), exp(confint(mod6)))
Waiting for profiling to be done...
2.5 % 97.5 %
(Intercept) 0.002278628 0.001885543 0.002747694
Water_In_House 0.992711517 0.984377577 1.001219601
as.factor(Urban_Type)1 2.900312841 2.650005643 3.175056921
as.factor(Urban_Type)2 3.395814509 3.056823118 3.770175891
as.factor(Urban_Type)3 3.455990255 3.171391962 3.768498597
as.factor(Urban_Type)4 1.595735458 1.255465657 1.998183228
as.factor(Urban_Type)5 1.124917557 0.877796947 1.418753844
Moved_since_2005 1.007180748 1.004252018 1.010065812
> mod7 <- glm(All ~ as.factor(Urban_Type) + Work_Mig, poisson, data, offset = log(nb_person_2014))
> summary(mod7)
Call:
glm(formula = All ~ as.factor(Urban_Type) + Work_Mig, family = poisson,
data = data, offset = log(nb_person_2014))
Deviance Residuals:
Min 1Q Median 3Q Max
-7.2149 -2.0960 -1.0391 0.6351 12.3324
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -6.005180 0.034365 -174.749 < 2e-16 ***
as.factor(Urban_Type)1 1.077406 0.047042 22.903 < 2e-16 ***
as.factor(Urban_Type)2 1.201096 0.053119 22.611 < 2e-16 ***
as.factor(Urban_Type)3 1.291388 0.042976 30.049 < 2e-16 ***
as.factor(Urban_Type)4 0.485409 0.118250 4.105 4.04e-05 ***
as.factor(Urban_Type)5 0.087292 0.121946 0.716 0.474
Work_Mig -0.003513 0.004660 -0.754 0.451
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 4406.3 on 466 degrees of freedom
Residual deviance: 3134.7 on 460 degrees of freedom
(1 observation deleted due to missingness)
AIC: 4405.5
Number of Fisher Scoring iterations: 5
> anova(mod7, test = "LRT")
Analysis of Deviance Table
Model: poisson, link: log
Response: All
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 466 4406.3
as.factor(Urban_Type) 5 1271.04 461 3135.2 <2e-16 ***
Work_Mig 1 0.57 460 3134.7 0.4484
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> cbind(exp(coef(mod7)), exp(confint(mod7)))
Waiting for profiling to be done...
2.5 % 97.5 %
(Intercept) 0.002465945 0.002303626 0.002635865
as.factor(Urban_Type)1 2.937050482 2.678644509 3.221170238
as.factor(Urban_Type)2 3.323757964 2.994123580 3.687412169
as.factor(Urban_Type)3 3.637833298 3.345013989 3.958874289
as.factor(Urban_Type)4 1.624840217 1.278699319 2.034018048
as.factor(Urban_Type)5 1.091214833 0.852050648 1.375220178
Work_Mig 0.996492929 0.987301831 1.005506780
> mod7 <- glm(All ~ Work_Mig + as.factor(Urban_Type), poisson, data, offset = log(nb_person_2014))
> summary(mod7)
Call:
glm(formula = All ~ Work_Mig + as.factor(Urban_Type), family = poisson,
data = data, offset = log(nb_person_2014))
Deviance Residuals:
Min 1Q Median 3Q Max
-7.2149 -2.0960 -1.0391 0.6351 12.3324
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -6.005180 0.034365 -174.749 < 2e-16 ***
Work_Mig -0.003513 0.004660 -0.754 0.451
as.factor(Urban_Type)1 1.077406 0.047042 22.903 < 2e-16 ***
as.factor(Urban_Type)2 1.201096 0.053119 22.611 < 2e-16 ***
as.factor(Urban_Type)3 1.291388 0.042976 30.049 < 2e-16 ***
as.factor(Urban_Type)4 0.485409 0.118250 4.105 4.04e-05 ***
as.factor(Urban_Type)5 0.087292 0.121946 0.716 0.474
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 4406.3 on 466 degrees of freedom
Residual deviance: 3134.7 on 460 degrees of freedom
(1 observation deleted due to missingness)
AIC: 4405.5
Number of Fisher Scoring iterations: 5
> anova(mod7, test = "LRT")
Analysis of Deviance Table
Model: poisson, link: log
Response: All
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 466 4406.3
Work_Mig 1 70.3 465 4336.0 < 2.2e-16 ***
as.factor(Urban_Type) 5 1201.3 460 3134.7 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> cbind(exp(coef(mod7)), exp(confint(mod7)))
Waiting for profiling to be done...
2.5 % 97.5 %
(Intercept) 0.002465945 0.002303626 0.002635865
Work_Mig 0.996492929 0.987301831 1.005506780
as.factor(Urban_Type)1 2.937050482 2.678644509 3.221170238
as.factor(Urban_Type)2 3.323757964 2.994123580 3.687412169
as.factor(Urban_Type)3 3.637833298 3.345013989 3.958874289
as.factor(Urban_Type)4 1.624840217 1.278699319 2.034018048
as.factor(Urban_Type)5 1.091214833 0.852050648 1.375220178
Final model, writing the residuals into a CSV file:
> mod7 <- glm(All ~ as.factor(Urban_Type) + Moved_since_2005, poisson, data, offset = log(nb_person_2014), na.action = na.exclude)
> summary(mod7)
Call:
glm(formula = All ~ as.factor(Urban_Type) + Moved_since_2005,
family = poisson, data = data, na.action = na.exclude, offset = log(nb_person_2014))
Deviance Residuals:
Min 1Q Median 3Q Max
-6.9869 -2.0860 -1.0100 0.6855 13.2081
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -6.217870 0.054921 -113.216 < 2e-16 ***
as.factor(Urban_Type)1 1.059865 0.046013 23.034 < 2e-16 ***
as.factor(Urban_Type)2 1.210107 0.052948 22.854 < 2e-16 ***
as.factor(Urban_Type)3 1.230262 0.043613 28.209 < 2e-16 ***
as.factor(Urban_Type)4 0.460222 0.118308 3.890 0.0001 ***
as.factor(Urban_Type)5 0.101038 0.121884 0.829 0.4071
Moved_since_2005 0.007195 0.001473 4.886 1.03e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 4406.3 on 466 degrees of freedom
Residual deviance: 3112.4 on 460 degrees of freedom
(1 observation deleted due to missingness)
AIC: 4383.2
Number of Fisher Scoring iterations: 5
> anova(mod7, test = "LRT")
Analysis of Deviance Table
Model: poisson, link: log
Response: All
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 466 4406.3
as.factor(Urban_Type) 5 1271.04 461 3135.2 < 2.2e-16 ***
Moved_since_2005 1 22.87 460 3112.4 1.732e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> cbind(exp(coef(mod7)), exp(confint(mod7)))
Waiting for profiling to be done...
2.5 % 97.5 %
(Intercept) 0.001993486 0.001789998 0.002220011
as.factor(Urban_Type)1 2.885981086 2.637389267 3.158798935
as.factor(Urban_Type)2 3.353843973 3.022224590 3.719530064
as.factor(Urban_Type)3 3.422125225 3.142727522 3.728755754
as.factor(Urban_Type)4 1.584425015 1.246759589 1.983668507
as.factor(Urban_Type)5 1.106318731 0.863941169 1.394075325
Moved_since_2005 1.007221202 1.004292270 1.010106319
> write.csv(data.frame(code_sig = data$code_sig, residuals = resid(mod7)), "residuals.csv", quote = FALSE, row.names = FALSE)