## Required packages. library("bamlss") library("gamlss2") library("gamlss.dist") library("sf") ## Read the population density data d <- readRDS("AustriaSwissPop.rds") ## First overview of the data. summary(d) for(n in names(d)) { if(is.numeric(d[[n]])) { x11() hist(d[[n]], breaks = "Scott", freq = FALSE, xlab = n, main = "") lines(density(d[[n]])) } } ## Close all graphics. graphics.off() ## Log transform response and make strictly ## positive. d$pop_km2 <- log(d$pop_km2) amin <- abs(min(d$pop_km2)) d$pop_km2 <- d$pop_km2 + amin + 1 ## Square root transform all distances. vn <- grep("dist", names(d), value = TRUE) for(v in vn) d[[v]] <- sqrt(d[[v]]) ## Log-transform nighttime light. d$NTL <- log(d$NTL) ## Square root transform sealing density. d$IMD <- sqrt(d$IMD) ## Subset the data for training and testing. d1 <- subset(d, country == "Austria") d2 <- subset(d, country == "Switzerland") ## Find a suitable distributional model using intercepts only. families <- gamlss_distributions(type = "continuous") ll <- rep(NA, length(families)) ## Assign names names(ll) <- names(families) ## Simple for loop that tries out to fit ## a model, if there is no error, the logLik ## is saved in object ll. for(i in seq_along(families)) { b <- try(gamlss2(pop_km2 ~ 1, data = d1, family = families[[i]])) if(!inherits(b, "try-error")) { ll[i] <- logLik(b) } } ## The BCPE distribution seems to fit best, but when ## predicting parameters the log-density gves an error. ## Therefore, use the second best BCT family. b <- gamlss2(pop_km2 ~ 1, family = BCPE, data = d1) ## Visualize the estimated density. par <- predict(b, type = "parameter") dy <- family(b)$d(d1$pop_km2, par) i <- order(d1$pop_km2) hist(d1$pop_km2, freq = FALSE, breaks = "Scott") lines(dy[i] ~ d1$pop_km2[i], col = 4, lwd = 4) ## Set up a "base" formula. f <- ~ s(IMD) + s(NTL) + s(LC_veg) + s(CNT_dist) + s(WWY_dist) + s(WTR_dist) + s(UNI_dist) + s(SPM_dist) + s(HWY_dist) + s(STR_dist) + s(SCH_dist) + s(PLG_dist) + s(PRK_dist) + s(NHO_dist) + s(MWY_dist) + s(MAL_dist) + s(HSP_dist) + s(DOC_dist) + s(CLG_dist) + s(AIR_dist) ## Replicate formula for each parameter of the distribution. f <- rep(list(f), 4) ## Assign names. names(f) <- names(BCPE()$parameters[1:4]) ## Update the first formula to add the response. f[[1]] <- update(f[[1]], pop_km2 ~ .) ## Select covariates using stability selection. b <- stabsel(f, data = d1, family = BCPE(mu.link = "log"), q = 10, thr = 0.6, init = TRUE) ## Selection frequency plot. plot(b) ## Extract model formula of selected terms. nf <- formula(b) ## Refit model using full MCMC. m <- bamlss(nf, data = d1, family = BCPE(mu.link = "log")) ## Plot estimated effects. plot(m, pages = 1) plot(m, pages = 1, scale = 0) ## Predict the median for Switzerland. p <- predict(m, newdata = d2, type = "parameter") q50 <- family(m)$mean(0.5, p) ## Plot fitted values vs. response plot(q50, d2$pop_km2) abline(0, 1) ## Compute the map of estimated population density. Swiss <- readRDS("SwissMunicipal.rds") Swiss$fit <- rep(NA, nrow(Swiss)) ids <- as.integer(unique(d2$id)) for(i in ids) { Swiss$fit[Swiss$id == i] <- q50[d2$id == i][1L] ## Retransform to oiginal scale. ## Swiss$fit[Swiss$id == i] <- exp(q50[d2$id == i][1L] - amin - 1) } breaks <- seq(min(Swiss$fit), max(Swiss$fit), length = 100) plot(Swiss["fit"], breaks = breaks)