## Load the Munich rent data. d <- readRDS("MunichRent.rds") ## Response distribution. hist(d$rent, freq = FALSE, breaks = "Scott") ## Load the bamlss package, retrieve all available ## distributions from the gamlss.dist package. library("bamlss") families <- gamlss_distributions(type = "continuous") names(families) ## Estimate intercept only models to find ## a suitable distribution for the rent. ## Therefore, initialize a vector for saving ## log-likelihood values for each model. ll <- rep(NA, length(families)) ## Assign names names(ll) <- names(families) ## Simple for loop that tries out to fit ## a model, if there is no error, the logLik ## is saved in object ll. for(i in seq_along(families)) { b <- try(gamlss2(rent ~ 1, data = d, family = families[[i]])) if(!inherits(b, "try-error")) { ll[i] <- logLik(b) } } ## Plot the sorted values, only the top five using tail(). barplot(tail(sort(ll))) ## The pareto seems to be the best, refit. b <- gamlss2(rent ~ 1, family = PARETO1o, data = d) ## Compute the estimated density to check the model fit. ## Therefore, predict the estimated parameters of the ## distribution. par <- predict(b, type = "parameter") ## Calculate the density dy <- family(b)$d(d$rent, par) ## Order observations to draw the density line. i <- order(d$rent) hist(d$rent, freq = FALSE, breaks = "Scott") lines(dy[i] ~ d$rent[i], col = 4, lwd = 4) ## The pareto model does not fit the data at all! ## Therefore, use the JSU which definitely has the ## best log-likelihood value. b <- gamlss2(rent ~ 1, family = JSU, data = d) par <- predict(b, type = "parameter") dy <- family(b)$d(d$rent, par) i <- order(d$rent) hist(d$rent, freq = FALSE, breaks = "Scott") lines(dy[i] ~ d$rent[i], col = 4, lwd = 4) ## In the next step, estimate a distributional ## model including covariates. ## JSU has 4 parameters, we need a list of ## 4 formulas. Start with a "base" formula f <- ~ s(area) + s(yearc) ## Replicate the formula for all parameters ## of the response distribution. f <- rep(list(f), 4) ## Update the first formula for adding the response rent. f[[1]] <- update(f[[1]], rent ~ .) ## Estimate the model. b <- gamlss2(f, data = d, family = JSU) ## Maybe some fine tuning of the outer and inner iteration ## of the backfitting algorithm is needed. b <- gamlss2(f, data = d, family = JSU, maxit = c(100, 100)) ## Plot the estimated effects. plot(b)