## Required packages.
library("bamlss")
library("gamlss2")
library("gamlss.dist")
library("sf")

## Read the population density data
d <- readRDS("AustriaSwissPop.rds")

## First overview of the data.
summary(d)
for(n in names(d)) {
  if(is.numeric(d[[n]])) {
    x11()
    hist(d[[n]], breaks = "Scott", freq = FALSE, xlab = n, main = "")
    lines(density(d[[n]]))
  }
}

## Close all graphics.
graphics.off()

## Log transform response and make strictly
## positive.
d$pop_km2 <- log(d$pop_km2)
amin <- abs(min(d$pop_km2))
d$pop_km2 <- d$pop_km2 + amin + 1

## Square root transform all distances.
vn <- grep("dist", names(d), value = TRUE)
for(v in vn)
  d[[v]] <- sqrt(d[[v]])

## Log-transform nighttime light.
d$NTL <- log(d$NTL)

## Square root transform sealing density.
d$IMD <- sqrt(d$IMD)

## Subset the data for training and testing.
d1 <- subset(d, country == "Austria")
d2 <- subset(d, country == "Switzerland")

## Find a suitable distributional model using intercepts only.
families <- gamlss_distributions(type = "continuous")
ll <- rep(NA, length(families))

## Assign names
names(ll) <- names(families)

## Simple for loop that tries out to fit
## a model, if there is no error, the logLik
## is saved in object ll.
for(i in seq_along(families)) {
  b <- try(gamlss2(pop_km2 ~ 1, data = d1, family = families[[i]]))
  if(!inherits(b, "try-error")) {
    ll[i] <- logLik(b)
  }
}

## The BCPE distribution seems to fit best, but when
## predicting parameters the log-density gves an error.
## Therefore, use the second best BCT family.
b <- gamlss2(pop_km2 ~ 1, family = BCPE, data = d1)

## Visualize the estimated density.
par <- predict(b, type = "parameter")
dy <- family(b)$d(d1$pop_km2, par)
i <- order(d1$pop_km2)
hist(d1$pop_km2, freq = FALSE, breaks = "Scott")
lines(dy[i] ~ d1$pop_km2[i], col = 4, lwd = 4)

## Set up a "base" formula.
f <- ~ s(IMD) + s(NTL) + s(LC_veg) + s(CNT_dist) + s(WWY_dist) + s(WTR_dist) +
  s(UNI_dist) + s(SPM_dist) + s(HWY_dist) + s(STR_dist) +
  s(SCH_dist) + s(PLG_dist) + s(PRK_dist) + s(NHO_dist) +
  s(MWY_dist) + s(MAL_dist) + s(HSP_dist) + s(DOC_dist) +
  s(CLG_dist) + s(AIR_dist)

## Replicate formula for each parameter of the distribution.
f <- rep(list(f), 4)

## Assign names.
names(f) <- names(BCPE()$parameters[1:4])

## Update the first formula to add the response.
f[[1]] <- update(f[[1]], pop_km2 ~ .)

## Select covariates using stability selection.
b <- stabsel(f, data = d1, family = BCPE(mu.link = "log"),
  q = 10, thr = 0.6, init = TRUE)

## Selection frequency plot.
plot(b)

## Extract model formula of selected terms.
nf <- formula(b)

## Refit model using full MCMC.
m <- bamlss(nf, data = d1, family = BCPE(mu.link = "log"))

## Plot estimated effects.
plot(m, pages = 1)
plot(m, pages = 1, scale = 0)

## Predict the median for Switzerland.
p <- predict(m, newdata = d2, type = "parameter")
q50 <- family(m)$mean(0.5, p)

## Plot fitted values vs. response
plot(q50, d2$pop_km2)
abline(0, 1)

## Compute the map of estimated population density.
Swiss <- readRDS("SwissMunicipal.rds")
Swiss$fit <- rep(NA, nrow(Swiss))
ids <- as.integer(unique(d2$id))
for(i in ids) {
  Swiss$fit[Swiss$id == i] <- q50[d2$id == i][1L]
  ## Retransform to oiginal scale.
  ## Swiss$fit[Swiss$id == i] <- exp(q50[d2$id == i][1L] - amin - 1)
}

breaks <- seq(min(Swiss$fit), max(Swiss$fit), length = 100)
plot(Swiss["fit"], breaks = breaks)