################################################################################
################################## Chapter 2 ###################################
################################################################################
# fuzzy string matching
kfz.bestand = 
	"SELECT {vars}, kfz.kfz_nr, kfz.lfd_nr, LTRIM(RTRIM(kfz.f_id_nr)) AS f_id_nr, kfz.gross_ort, kfz.gross_straße, kfz.hausnr 
	FROM ((sdb_kfz.dbo.kfzk000_new kfz
	INNER JOIN (
		SELECT f_id_nr, MAX(dt_bis) AS MaxDTBIS
		FROM sdb_kfz.dbo.kfzk000_new
		where dt_bis >= {st_format1}
		and d_akz_zut <= {st_format2}
		GROUP BY f_id_nr) grouppedKFZ
	ON kfz.f_id_nr = grouppedKFZ.f_id_nr
	AND kfz.dt_bis = grouppedKFZ.MaxDTBIS
	AND kfz.gross_ort = 'DORTMUND')
	LEFT JOIN (
		SELECT kfz_nr, lfd_nr, f_id_nr, kmh, kw4, sitzm, co2, lamin, brmin, homin, tach1, tach2
		FROM sdb_kfz.dbo.kfzk004_new) technik
	ON LTRIM(RTRIM(kfz.f_id_nr)) = LTRIM(RTRIM(technik.f_id_nr)))"
	
adressen.sql = 
	"SELECT distinct STRNAME, HNR, HNRZ, RW, HW, UBZ
	from ADRESSEN.DBO.FB62AlleGebaeudeAdressen
	where abzug = {st_format3}
	and FLAG_HG = 1"

sanitize_string <- function(x){
	out <- x %>% tolower() %>%
	gsub(pattern = "ä", replacement="ae", x = .) %>%
	gsub(pattern = "ü", replacement="ue", x = .) %>%
	gsub(pattern = "ö", replacement="oe", x = .) %>%
	gsub(pattern = "ß", replacement="ss", x = .) %>%
	gsub(pattern = "-", replacement="", x = .) %>%
	gsub(pattern = " ", replacement="", x = .) %>% 
	gsub(pattern = "\\.", replacement="", x = .) %>%
	gsub(pattern = "*str$", replacement="strasse", x = .) 
	return(out)
}

# Hausnummern verarbeiten
clean_hausnr <- function(data){
	data <- gsub(pattern = " *$", replacement = "", x = data)
	data <- gsub(pattern = "^ *", replacement = "", x = data)
	data[which(data == 0)] <- NA
	data[which(data == "NULL")] <- NA
	zusatz <- regmatches(data, gregexpr("[a-zA-Z]+", data))
	zusatz <- unlist(lapply(zusatz, FUN = function(x){if(length(x) == 0) return("") else return(toupper(substr(x, 1, 1)))}))
	hausnr <- regmatches(data, gregexpr("[a-zA-Z-]+", data), invert = TRUE)
	hausnr <- unlist(lapply(hausnr, FUN = function(x){as.numeric(unlist(strsplit(x, "\\s"))[1])}))
	return(data.frame(Hausnummer = hausnr, Zusatz = zusatz))
}

# Distanzfunktion
l1dist <- function(x){
	if(sum(is.na(x)) == length(x)){return(rep(1, length(x)))}
	1 - abs(x) / max(c(1, abs(x)), na.rm = TRUE)
}
# Hausnummern matchen
match_hausnr <- function(record, weights = c(.9, .1)){
	valid <- subset(saubere_anschriften, STRNAME == unlist(record["gross_strasse_2"]), select = c(HNR, HNRZ))
	hausnr <- l1dist(as.numeric(unlist(record["hausnr_2"])) - as.numeric(valid$HNR)) * weights[1]
	zusatz <- l1dist(match(unlist(record["hausnrz_2"]), LETTERS) - match(valid$HNRZ, LETTERS)) * weights[2]
	if(unlist(record["hausnrz_2"]) == ""){
	zusatz[which(valid$HNRZ != "")] <- 0}#!important
	score <- hausnr + zusatz
	ids <- which(score == max(score, na.rm = TRUE))
	if(length(ids) > 1){
		ids <- sample(x = ids, size = 1)
		score <- .95 } else {
	if(abs(as.numeric(unlist(record["hausnr_2"])) - as.numeric(valid[ids, "HNR"])) > 4){
		ids <- sample(x = 1:nrow(valid), size = 1)
		score <- 0 }
	}
	cbind(score = max(score, na.rm = TRUE), valid[ids,])
}

s <- as.data.frame(as.character(seq(from = as.Date("2014-01-01"), to = as.Date("2023-01-01"), by="1 year")))
Bestand.alle <- apply(X = s, MARGIN = 1, 
	FUN = function(d) {
		print(as.character(d))
		#Daten abfragen
		pattern <- c("\\{vars\\}", "\\{st_format1\\}", "\\{st_format2\\}")
		vars <- c("LTRIM(RTRIM(kfz.anrede)) AS anrede", "kfz.s_geburt", "kfz.beruf", 
			"kfz.d_ezulass", "kfz.kren", "kfz.finfzart", "kfz.tgm", 
			"LTRIM(RTRIM(kfz.erm_schadstoffgruppe)) AS schadstoffgruppe", "kfz.her", "kfz.typ", 
			"LTRIM(RTRIM(kfz.kennzeichenart)) AS kennzeichenart", "kfz.bestandsart", 
			"technik.kmh", "technik.kw4", "technik.sitzm", "technik.co2", "technik.lamin", 
			"technik.brmin", "technik.homin", "technik.tach1", "technik.tach2")
		replacement <- c(paste(vars, collapse = ", "), 
		date_format(d = d, typ = 1), date_format(d = d, typ = 2))
		Daten <- processSQLQuery(stri_replace_all_regex(kfz.bestand, 
			pattern = pattern, replacement = replacement, vectorize = FALSE), 
			dbConnectionObject)
		#Daten bereinigen
		Daten$fahrzeugalter <- class_ezulass(Daten$d_ezulass, d)
		Daten$finfzart <- class_finfzart(Daten$finfzart)
		Daten$kren <- class_kren(Daten$kren)
		Daten$geschlecht <- class_anrede(Daten$anrede)
		Daten$halteralter <- class_geburt(Daten$s_geburt, d)
		options(warn = -1)
		Daten$kmh <- as.numeric(Daten$kmh)
		Daten$kw4 <- as.numeric(Daten$kw4)
		Daten$sitzm <- as.numeric(Daten$sitzm)
		Daten$co2 <- as.numeric(Daten$co2)
		Daten$lamin <- as.numeric(Daten$lamin)
		Daten$brmin <- as.numeric(Daten$brmin)
		Daten$homin <- as.numeric(Daten$homin)
		Daten$tach1 <- as.numeric(Daten$tach1)
		Daten$tach2 <- as.numeric(Daten$tach2)
		Daten$gross_ort <- trimws(Daten$gross_ort)
		Daten$gross_straße <- trimws(Daten$gross_straße)
		Daten$hausnr <- trimws(Daten$hausnr)
		options(warn = 0)
		Daten <- subset(Daten, select = -c(kfz_nr,lfd_nr,anrede,d_ezulass,s_geburt))
		#Daten zusammenfassen
		Daten <- Daten[!duplicated(Daten), ]
		Tab <- table(Daten$f_id_nr)
		doubletten <- names(Tab[which(Tab > 1)])
		Korrekturen <- lapply(doubletten, FUN = function(f_id_nr){
			S <- Daten[which(Daten$f_id_nr == f_id_nr),]
			n.NA <- apply(S, MARGIN = 1, FUN = function(x) sum(is.na(x)))
			S[sample(which(n.NA == min(n.NA)), 1),]})
		Korrekturen <- do.call(rbind, Korrekturen)
		Daten <- rbind(subset(Daten, !(f_id_nr %in% doubletten)), Korrekturen)
		Daten$id <- 1:nrow(Daten)
		#Adressen abfragen
		Adressen <- processSQLQuery(stri_replace_all_regex(adressen.sql, 
			pattern = "\\{st_format3\\}", replacement = date_format(d = d, typ = 3), vectorize = FALSE), 
			dbConnectionObject)
		Adressen$Ort <- "DORTMUND"
		## first pass
		fp <- merge(x = Daten, y = Adressen[which(is.na(Adressen$HNRZ)),], 
			by.x = c("gross_ort", "gross_straße", "hausnr"), 
			by.y = c("Ort", "STRNAME", "HNR"))
		fp$qAddress <- 0
		fp <- subset(fp, select = -c(gross_ort, gross_straße, hausnr, HNRZ))
		## second pass
		d2 <- Daten[setdiff(Daten$id, fp$id),]
		Adressen$clean_strname <- sanitize_string(Adressen$STRNAME)
		d2$clean_strname <- sanitize_string(d2$gross_straße)
		sp <- merge(x = d2, y = Adressen[which(is.na(Adressen$HNRZ)),], 
			by.x = c("gross_ort", "clean_strname", "hausnr"), 
			by.y = c("Ort", "clean_strname", "HNR"))
		sp$qAddress <- 1
		sp <- subset(sp, select = -c(gross_ort, clean_strname, hausnr, gross_straße, STRNAME, HNRZ))
		## third pass
		### DORTMUND
		d3 <- Daten[setdiff(Daten$id, c(fp$id, sp$id)),]
		res <- stringdist(a = trimws(tolower(d3$gross_ort)), b = "dortmund", method = "lcs")
		fuzzy <- which(res > 0 & res <= 6)
		d3$gross_ort[fuzzy] <- "DORTMUND"
		### Straßen
		d3$clean_strname <- sanitize_string(d3$gross_straße)
		clean_strname <- unique(Adressen$clean_strname)
		clean_strname <- clean_strname[!is.na(clean_strname)]
		res <- stringsimmatrix(a = clean_strname, b = d3$clean_strname,
			method = "osa", weight = c(d = 1, i = .5, s = 1, t = 1), nthread = 2)
		threshold <- which(apply(res, MARGIN = 2, FUN = max) > .9)
		d3$clean_strname[threshold] <- clean_strname[apply(res, MARGIN = 2, FUN = which.max)[threshold]]
		### Hausnummern
		Adressen <<- Adressen
		Hausnummern <- clean_hausnr(d3$hausnr)
		d3$hausnr <- Hausnummern$Hausnummer
		d3$hausnrz <- Hausnummern$Zusatz
		fuzzy <- which(!is.na(match(d3$clean_strname, clean_strname)) & d3$gross_ort == "DORTMUND")
		res <- apply(X = d3[fuzzy,], MARGIN = 1, FUN = match_hausnr) %>% do.call(rbind, .)
		d3[fuzzy, "hausnr"] <- res[, "HNR"]
		d3[fuzzy, "hausnrz"] <- res[, "HNRZ"]
		Adressen$HNRZ[which(is.na(Adressen$HNRZ))] <- ""
		d3$hausnrz[which(is.na(d3$hausnrz))] <- ""
		tp <- merge(x = d3, y = Adressen, 
			by.x = c("gross_ort", "clean_strname", "hausnr", "hausnrz"), 
			by.y = c("Ort", "clean_strname", "HNR", "HNRZ"))
		tp$qAddress <- 2
		tp <- subset(tp, select = -c(gross_ort, clean_strname, hausnr, hausnrz, gross_straße, STRNAME))
		# Merge
		unknown <- Daten[setdiff(Daten$id, c(fp$id, sp$id, tp$id)),]
		unknown <- subset(unknown, select = -c(gross_ort, gross_straße, hausnr))
		unknown$RW <- NA
		unknown$HW <- NA
		unknown$UBZ <- NA
		unknown$qAddress <- 3
		return(rbind(fp, sp, tp, unknown))
		})

# Imputation
library("rMIDAS")

# Apply rMIDAS preprocessing steps
rDaten <- subset(Daten, select = -c(her, typ, beruf))
cat_cols <- c('schadstoffgruppe', 'Fahrzeugklasse','Kraftstoffart', 'Sitzplätze')
bin_cols <- c('geschlecht', 'Achsen', 'Antriebsachsen')
Daten_conv <- convert(rDaten, 
                      bin_cols = bin_cols, 
                      cat_cols = cat_cols,
                      minmax_scale = TRUE)
Daten_validate <- overimpute(Daten_conv, seed = 1234, 
training_epochs = 21, report_ival = 3, layer_structure = c(50, 50), 
plot_vars = TRUE, save_path = "<PATH>")

Daten_train <- train(Daten_conv, seed = 1234, 
training_epochs = 18, layer_structure = c(50, 50))
Daten_complete <- complete(Daten_train, m = 1)
Daten_complete <- cbind(her = Daten$her, typ = Daten$typ, Daten_complete[[1]])
################################################################################
################################## Chapter 3 ###################################
################################################################################
allDaten <- merge(Daten, Segments, by.x = c("her", "typ"), by.y = c("Herstellerschlüssel", "Typschlüssel"), all.x = TRUE)
allDaten$Segmentcode <- NA
allDaten$Segmentcode[which(allDaten$Segment == "MINIS")] <- 0
allDaten$Segmentcode[which(allDaten$Segment == "KLEINWAGEN")] <- 1
allDaten$Segmentcode[which(allDaten$Segment == "KOMPAKTKLASSE")] <- 2
allDaten$Segmentcode[which(allDaten$Segment == "MITTELKLASSE")] <- 3
allDaten$Segmentcode[which(allDaten$Segment == "OBERE MITTELKLASSE")] <- 4
allDaten$Segmentcode[which(allDaten$Segment == "OBERKLASSE")] <- 5
allDaten$Segmentcode[which(allDaten$Segment == "SUV")] <- 6
allDaten$Segmentcode[which(allDaten$Segment == "GELÄNDEWAGEN")] <- 7
allDaten$Segmentcode[which(allDaten$Segment == "SPORTWAGEN")] <- 8
allDaten$Segmentcode[which(allDaten$Segment == "MINI-VANS")] <- 9
allDaten$Segmentcode[which(allDaten$Segment == "GROSSRAUM-VANS")] <- 10
allDaten$Segmentcode[which(allDaten$Segment == "UTILITIES")] <- 11
allDaten$Segmentcode[which(allDaten$Segment == "WOHNMOBILE")] <- 12
allDaten$Kraftstoffart[which(allDaten$Kraftstoffart == "Brennstoffzelle")] <- "andere"
allDaten$Sitzplätze <- as.numeric(allDaten$Sitzplätze)
classdata <- subset(allDaten, select = -c(her,typ,RW,HW,beruf,fahrzeugalter,halteralter,geschlecht,Segment,Fahrzeugklasse,co2,Hubraum,schadstoffgruppe,Kraftstoffart))

cDaten <- classdata[complete.cases(classdata),]
trainDaten <- subset(cDaten, Usage == "Train", select = -c(Segmentcode,Usage))
testDaten <- subset(cDaten, Usage == "Test", select = -c(Segmentcode,Usage))

# Data conversion steps
library(Matrix)
library(xgboost)

trainMatrix <- xgb.DMatrix(
	data = sparse.model.matrix(~ ., data = trainDaten)[,-1], 
	label = unlist(subset(cDaten, Usage == "Train", select = c(Segmentcode))))
testMatrix <- xgb.DMatrix(
	data = sparse.model.matrix(~ ., data = testDaten)[,-1], 
	label = unlist(subset(cDaten, Usage == "Test", select = c(Segmentcode))))

# xgboost
watchlist <- list(train = trainMatrix, eval = testMatrix)
params <- list(nthread = 2, objective = "multi:softmax",
		num_class = 13,  eval_metric = "merror")
bst <- xgb.train(params, trainMatrix, nrounds = 100, watchlist = watchlist, early_stopping_rounds = 3)
################################################################################
################################## Chapter 4 ###################################
################################################################################
#Andy Lyons ajlyons at berkeley.edu
#Fri Mar 25 01:23:00 CET 2011
######################################################################
   ## mvee()
   ## Uses the Khachiyan Algorithm to find the the minimum volume enclosing
   ## ellipsoid (MVEE) of a set of points. In two dimensions, this is just
   ## the bounding ellipse (this function is limited to two dimensions).
   ## Adapted by Andy Lyons from Matlab code by Nima Moshtagh.
   ## Copyright (c) 2009, Nima Moshtagh
   ##         [1]http://www.mathworks.com/matlabcentral/fileexchange/9542
   ##         [2]http://www.mathworks.com/matlabcentral/fileexchange/13844
   ##         [3]http://stackoverflow.com/questions/1768197/bounding-ellipse
   ##
   ## Parameters
   ## xy          a two-column data frame containing x and y coordinates
   ##              if  NULL then a random sample set of 10 points will be generated
   ## tolerance   a tolerance value (default = 0.005)
   ## plotme      FALSE/TRUE. Plots the points and ellipse. Default TRUE.
   ## max.iter    The maximum number of iterations. If the script tries this
   ##             number of iterations but still can't get to the tolerance
   ##             value, it displays an error message and returns NULL
   ## shiftxy     TRUE/FALSE. If True, will apply a shift to the coordinates to make them
   ##             smaller and speed up the matrix calculations, then reverse the shift
   ##             to the center point of the resulting ellipoid. Default TRUE
   ## Output:     A list containing the "center form" matrix equation of the
   ##              ellipse.  i.e.  a  2x2 matrix "A" and a 2x1 vector "C" representing
   ##             the center of the ellipse such that:
   ##             (x - C)' A (x - C) <= 1
   ##              Also in the list is a 2x1 vector elps.axes.lngth whose elements
   ##              are  one-half  the lengths of the major and minor axes (variables
   ##             a and b
   ##             Also in list is alpha, the angle of rotation
   ######################################################################
   mvee <- function(xy=NULL, tolerance = 0.005, max.iter = 500, shiftxy = TRUE) {
       ## Number of points
       n = nrow(xy)
       ## Dimension of the points (2)
       #d = ncol(xy)
	   d <- 2
       xy.min <- sapply(xy, FUN = "min")
       xy.use <- xy - rep(xy.min, each = n)
       ## Add a column of 1s to the (n x 2) matrix xy - so it is now (n x 3)
       Q <- t(cbind(xy.use, rep(1,n)))
       ## Initialize
       count <- 1
       err <- 1
       u <- rep(1/n, n)
       ## Khachiyan Algorithm
       while (err > tolerance)
       {
           ## see [4]http://stackoverflow.com/questions/1768197/bounding-ellipse
           ## for commented code
           X <- Q %*% diag(u) %*% t(Q)
           M <- diag(t(Q) %*% solve(X) %*% Q)
           maximum <- max(M)
           j <- which(M == maximum)
           step_size = (maximum - d -1) / ((d+1)*(maximum-1))
           new_u <- (1 - step_size) * u
           new_u[j] <- new_u[j] + step_size
           err <- sqrt(sum((new_u - u)^2))
           count <- count + 1
           if (count > max.iter) {
               warning(paste("Iterated", max.iter, "times and still can't find the bounding ellipse. \n", sep=""))
               warning("Either increase the tolerance or the maximum number of iterations")
               return(NULL)
           }
           u <- new_u
       }
       ## Put the elements of the vector u into the diagonal of a matrix
       U <- diag(u)
       ## Take the transpose of xy
       P <- t(xy.use)
       ## Compute the A-matrix
       A <- (1/d) * solve(P %*% U %*% t(P) - (P %*% u) %*% t(P %*% u))
       ## Find the Eigenvalues of matrix A which will be used to get the major and minor axes
       A.eigen <- eigen(A)
	   r <- A.eigen$values[1] / A.eigen$values[2]
	   V <- pi / sqrt(prod(A.eigen$values))
	   ellipse.params <- list("A" = A, "eigen" = A.eigen, "r" = r, "V" = V)
   }
plot_ellipse <- function(params)
		{
           ## Plotting commands adapted from code by Alberto Monteiro
           ## [5]https://stat.ethz.ch/pipermail/r-help/2006-October/114652.html
           ## Create the points for the ellipse
		   A.eigen <- params$eigen
		   c <- params$c
		   theta <- seq(0, 2 * pi, length = 72)
		   semi.axes <- sort(1 / sqrt(A.eigen$values), decreasing=TRUE)
           alpha <- atan2(A.eigen$vectors[2,1], A.eigen$vectors[1,1]) - pi/2
           a <- semi.axes[1]
           b <- semi.axes[2]
           elp.plot.xs <- c[1] + a * cos(theta) * cos(alpha) - b * sin(theta) * sin(alpha)
           elp.plot.ys <- c[2] + a * cos(theta) * sin(alpha) + b * sin(theta) * cos(alpha)
           ## Plot the ellipse with the same scale on each axis
           lines(elp.plot.xs, elp.plot.ys, type = "l", lty="solid")
       }

#References
#   1. http://www.mathworks.com/matlabcentral/fileexchange/9542
#   2. http://www.mathworks.com/matlabcentral/fileexchange/13844
#   3. http://stackoverflow.com/questions/1768197/bounding-ellipse
#   4. http://stackoverflow.com/questions/1768197/bounding-ellipse
#   5. https://stat.ethz.ch/pipermail/r-help/2006-October/114652.html

# Own code for Chapter 4 starts here: 
library(delaunay)
library(data.table)
library(parallel)
library(pbapply)
library(sf)

interval_borders <- function(limits, n){
	seq(from = limits[1], to = limits[2], length.out = n + 1)}

interval_midpoints <- function(limits, n){
	var_s <- interval_borders(limits, n)
	rowMeans(cbind(head(var_s, -1), tail(var_s, -1)))}
map <- read_sf(<PATH>)

clust <- makePSOCKcluster(2)
year <- c("2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023")
res_expansion_factor <- 1
do_prescaling <- FALSE

for(year_id in year){print(year_id)
PKW <- readRDS(paste0("~/Master/data/xgboost/", year_id, ".RDS"))
PKW <- PKW[which(PKW$beruf == "888" & PKW$Segment == "SUV"),]
PKW <- setDT(PKW[, c("RW", "HW")])
coords <- PKW
setnames(coords, old = c("RW", "HW"), new = c("x", "y"))

ucoords <- data.table::setDT(coords)[, .N, c("x","y")]
xres <- 100 * res_expansion_factor# in meters
yres <- 100 * res_expansion_factor# in meters
maxeigen <- 4 / (xres^2 + yres^2)
mineigen <- 4 / (max(xres, yres) * 20)^2

## Grid
xlim <- c(382290.4, 405244.8)
ylim <- c(5697385, 5717364)
xcount <- round(diff(range(xlim)) / xres)
ycount <- round(diff(range(ylim)) / yres)

borders_x <- interval_borders(xlim, xcount)
borders_y <- interval_borders(ylim, ycount)
mid_x <- interval_midpoints(xlim, xcount)
mid_y <- interval_midpoints(ylim, ycount)
mapcoords <- expand.grid(x = mid_x, y = mid_y)

## delaunay
del <- delaunay::delaunay(as.matrix(subset(ucoords, select = -c(N))))
Faces <- del$faces

## Global bandwidth selection (crossvalidation)

g.bw.x <- bw.nrd0(coords$x) * sqrt(5)
g.bw.y <- bw.nrd0(coords$y) * sqrt(5)
g.D <- diag(c(g.bw.x, g.bw.y))

## First pass local bandwidth selection (delaunay)

clusterEvalQ(cl = clust, expr = library(data.table))
clusterExport(clust, c("ucoords", "Faces", "mvee"))

res <- pblapply(cl = clust, X = 1:nrow(ucoords), FUN = function(id){
	data <- ucoords[unique(as.numeric(Faces[apply(Faces, MARGIN = 1, FUN = function(row) any(row == id)),])),]
	data <- data[, c("N"):=NULL]
	mvee(xy = data.frame(data))}
)

Nscaled <- lapply(1:nrow(ucoords), FUN = function(id){res[[id]]$V * (1 / as.numeric(ucoords[id, "N"]))})
med_l <- median(unlist(Nscaled))
med_g <- pi*prod(c(g.bw.x,g.bw.y) / 2)
g.bw.s <- med_g / med_l 
Gscaled <- unlist(Nscaled) * g.bw.s # Total area!!!

maxeigen <- 4 / (xres^2 + yres^2)
mineigen <- 4 / (max(xres, yres) * 20)^2

sres <- lapply(1:nrow(ucoords), FUN = function(id){ 
	x <- res[[id]]
	# global scaling
	l <- Gscaled[id] / res[[id]]$V
	lambda_2 <- (1 / l) * x$eigen$values[2]
	lambda_1 <- x$r * lambda_2
	lambda <- c(lambda_1, lambda_2)
	# maximum scaling
	if(any(lambda < mineigen)){
		lambda <- lambda / min(lambda / mineigen)
	}
	# minimum scaling (ok!)
	if(any(lambda > maxeigen)){
		lambda <- lambda / max(lambda / maxeigen)
	}
	# result
	x$eigen$vectors %*% diag(lambda) %*% solve(x$eigen$vectors)
	}
)


kernel_epanechnikov <- function(u){
	0.75 * (1 - u^2) * (abs(u) <= 1)
}
geo_ellipse <- function(p, c, A){
   crossprod((p-c), A) %*% (p-c)
}

M <- data.frame(t(mapcoords[,1:2]))
clusterExport(clust, c("ucoords", "sres", "geo_ellipse", "kernel_epanechnikov", "M"))

i.densities <- pbsapply(cl = clust, X = 1:nrow(ucoords), FUN = function(id){
	N <- as.numeric(ucoords[id, "N"])
	c <- t(as.matrix(ucoords[id, c("x", "y")]))
	A <- sres[[id]]
	z <- kernel_epanechnikov(vapply(M, geo_ellipse, numeric(1), USE.NAMES = FALSE, c = c, A = A))
	z / sum(z) * N
	}
)
r.densities <- apply(i.densities, MARGIN = 1, FUN = sum)
mapcoords$den <- r.densities
find_value <- function(x,y) mapcoords$den[which(mapcoords$x == x & mapcoords$y == y)]
z <- outer(mid_x, mid_y, Vectorize(find_value))

saveRDS(mapcoords, paste0(<PATH>, year_id, "_den.RDS"))
save.image(paste0(<PATH>, year_id, ".RData"))

# Graphics
pbreaks <- pretty(c(z))
png(filename = paste0(<PATH>, year_id, ".png"), width = 672, height = 672)
par(mar = c(3,3,1,1))
image(x = mid_x, y = mid_y, z = z, xlab = "", ylab = "", main = "", 
	col = hcl.colors(length(pbreaks) - 1, "YlOrRd", rev = TRUE), breaks = pbreaks)
title(ylab="y-coordinates", line = 2)
title(xlab="x-coordinates", line = 2)
mtext(side=3, line=0, at = mean(xlim), cex=1, paste0("01.01.", year_id))
legend("bottomleft", col = hcl.colors(length(pbreaks) - 1, "YlOrRd", rev = TRUE), 
	legend = pbreaks[-1], pch = 15, pt.cex = 2, title = "Cars per ha", ncol = 5)
plot(st_geometry(map), border =  hsv(245/360, 80/100, 89/100, alpha = .5), add = TRUE)
dev.off()
}
################################################################################
################################## Chapter 5 ###################################
################################################################################
# input
delta <- 1/3
R <- 1
# calculate epsilon
p <- (1 - delta) / 2
epsilon <- - log(p / (1 - p) * (1 / (delta + p) - 1)) / R
# sigma
library(lamW)
## sigma1
sigma1 <- - R / (lambertW0(- 1 / (2 * exp(1))) * exp(1) * epsilon)
## für welches i wird u == -1/exp(1)
u <- function(i){ ((- (2 * i * R * exp(- i * epsilon) * 
		exp(- (i * R * exp(- i * epsilon))/ sigma1)) / sigma1) - (- 1 / exp(1)))^2}
cp <- optimize(u, interval = c(0,10))$minimum
## sigma2
sigma2 <- function(i, sigma1, cp){
	u <- - (2 * i * R * exp(- i * epsilon) * 
		exp(- (i * R * exp(- i * epsilon))/ sigma1)) / sigma1
	( - (i * R * exp(i * epsilon) * sigma1) / 
			(lambertW0(u) * exp(i * epsilon) * sigma1 + i * R)) * 
		(i <= cp) + 
	( - (i * R * exp(i * epsilon) * sigma1) / 
			(lambertWm1(u) * exp(i * epsilon) * sigma1 + i * R)) * 
		(i > cp)
}
## sigmalim 
sigma_lim <- 1 / epsilon

qlap <- function(p, mu, b){
	mu - b * sign(p - 1/2) * log(1 - 2 * abs(p - 1/2))}
plap <- function(x, mu, b){
	1/2 + 1/2*sign(x - mu)*(1 - exp(- abs(x - mu) / b))}
rlap <- function(mu, b, n, branch){
	cp <- plap(x = 0, mu = mu, b = b)
	invalid <- isTRUE(all.equal(cp, 0))
	if(branch == "pos"){
		p <- runif(n = n, min = cp, max = 1)
	}
	if(branch == "neg" & !invalid){
		p <- runif(n = n, min = 0, max = cp)
	}
	r <- qlap(p = p, mu = mu, b = b)
	if(branch == "neg" & invalid){r <- 0}
	if(branch == "neg" & !invalid){r <- r * cp / (1 - cp)}
	r
}

mapcoords <- readRDS(<PATH>)
mapcoords$sigma <- NA
nullid <- which(mapcoords$den == 0)
otherid <- setdiff(1:nrow(mapcoords), nullid)
mapcoords$sigma[nullid] <- sigma1
mapcoords$sigma[otherid] <- sigma2(i = mapcoords$den[otherid], sigma1, cp)
set.seed(1234)
mapcoords$noise <- mapply(rlap, mu = mapcoords$den, b = mapcoords$sigma, n = 1, branch = "pos")
mapcoords$neg <- mapply(rlap, mu = mapcoords$den, b = mapcoords$sigma, n = 1, branch = "neg")
mapcoords$res <- mapcoords$noise + mapcoords$neg

# distribute negative noise
negid <- which(mapcoords$res < 0)
mapcoords$res[negid] <- mapcoords$noise[negid]
sampleids <- sample(negid)
d <- numeric() # minimum distances
for(i in sampleids){
	df <- data.frame(id = 1:nrow(mapcoords), D = sqrt((mapcoords[i, "x"] - mapcoords[, "x"])^2 + (mapcoords[i, "y"] - mapcoords[, "y"])^2))
	valid_ids <- which(mapcoords$res > abs(mapcoords[i, "neg"]))
	df <- df[valid_ids,]
	d[i] <- min(df$D)
	newid <- df$id[sample(which(df$D == d[i]),1)]
	mapcoords[newid,"res"] <- mapcoords[newid,"res"] + mapcoords[i, "neg"]
}
################################################################################
################################## Chapter 6 ###################################
################################################################################
library(SpatialKWD)
d2014 <- readRDS(<PATH>)
d2015 <- readRDS(<PATH>)
d2016 <- readRDS(<PATH>)
d2017 <- readRDS(<PATH>)
d2018 <- readRDS(<PATH>)
d2019 <- readRDS(<PATH>)
d2020 <- readRDS(<PATH>)
d2021 <- readRDS(<PATH>)
d2022 <- readRDS(<PATH>)
d2023 <- readRDS(<PATH>)
colnames(d2014)[3] <- "d2014"
colnames(d2015)[3] <- "d2015"
colnames(d2016)[3] <- "d2016"
colnames(d2017)[3] <- "d2017"
colnames(d2018)[3] <- "d2018"
colnames(d2019)[3] <- "d2019"
colnames(d2020)[3] <- "d2020"
colnames(d2021)[3] <- "d2021"
colnames(d2022)[3] <- "d2022"
colnames(d2023)[3] <- "d2023"

df_list <- list(d2014, d2015, d2016, d2017, d2018, d2019, d2020, d2021, d2022, d2023)
df <- Reduce(function(x, y) merge(x, y, by = c("x", "y")), df_list, accumulate=FALSE)

wasserstein_dist <- compareAll(Coordinates = as.matrix(df[,1:2]), Weights = as.matrix(df[,3:12])) 
M <- matrix(wasserstein_dist$distance, nrow = 10, ncol = 10)
colnames(M) <- as.character(c(2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023))
rownames(M) <- as.character(c(2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023))

par(mar=c(2,3,0,0))
image(t(M), xaxt = "n", yaxt = "n")
axis(side = 1, at = seq(from = 0, to = 1, length.out = 10),labels = colnames(M))
axis(side = 2, at = seq(from = 0, to = 1, length.out = 10),labels = rownames(M), las = 2)
abline(h = seq(from = 0, to = 1, length.out = 10) + 0.055)
abline(v = seq(from = 0, to = 1, length.out = 10) + 0.055)
text(x = rep(seq(from = 0, to = 1, length.out = 10), 10), 
y = sort(rep(seq(from = 0, to = 1, length.out = 10), 10)), 
labels = round(c(M), 2))