################################################################################ ################################## Chapter 2 ################################### ################################################################################ # fuzzy string matching kfz.bestand = "SELECT {vars}, kfz.kfz_nr, kfz.lfd_nr, LTRIM(RTRIM(kfz.f_id_nr)) AS f_id_nr, kfz.gross_ort, kfz.gross_straße, kfz.hausnr FROM ((sdb_kfz.dbo.kfzk000_new kfz INNER JOIN ( SELECT f_id_nr, MAX(dt_bis) AS MaxDTBIS FROM sdb_kfz.dbo.kfzk000_new where dt_bis >= {st_format1} and d_akz_zut <= {st_format2} GROUP BY f_id_nr) grouppedKFZ ON kfz.f_id_nr = grouppedKFZ.f_id_nr AND kfz.dt_bis = grouppedKFZ.MaxDTBIS AND kfz.gross_ort = 'DORTMUND') LEFT JOIN ( SELECT kfz_nr, lfd_nr, f_id_nr, kmh, kw4, sitzm, co2, lamin, brmin, homin, tach1, tach2 FROM sdb_kfz.dbo.kfzk004_new) technik ON LTRIM(RTRIM(kfz.f_id_nr)) = LTRIM(RTRIM(technik.f_id_nr)))" adressen.sql = "SELECT distinct STRNAME, HNR, HNRZ, RW, HW, UBZ from ADRESSEN.DBO.FB62AlleGebaeudeAdressen where abzug = {st_format3} and FLAG_HG = 1" sanitize_string <- function(x){ out <- x %>% tolower() %>% gsub(pattern = "ä", replacement="ae", x = .) %>% gsub(pattern = "ü", replacement="ue", x = .) %>% gsub(pattern = "ö", replacement="oe", x = .) %>% gsub(pattern = "ß", replacement="ss", x = .) %>% gsub(pattern = "-", replacement="", x = .) %>% gsub(pattern = " ", replacement="", x = .) %>% gsub(pattern = "\\.", replacement="", x = .) %>% gsub(pattern = "*str$", replacement="strasse", x = .) return(out) } # Hausnummern verarbeiten clean_hausnr <- function(data){ data <- gsub(pattern = " *$", replacement = "", x = data) data <- gsub(pattern = "^ *", replacement = "", x = data) data[which(data == 0)] <- NA data[which(data == "NULL")] <- NA zusatz <- regmatches(data, gregexpr("[a-zA-Z]+", data)) zusatz <- unlist(lapply(zusatz, FUN = function(x){if(length(x) == 0) return("") else return(toupper(substr(x, 1, 1)))})) hausnr <- regmatches(data, gregexpr("[a-zA-Z-]+", data), invert = TRUE) hausnr <- unlist(lapply(hausnr, FUN = function(x){as.numeric(unlist(strsplit(x, "\\s"))[1])})) return(data.frame(Hausnummer = hausnr, Zusatz = zusatz)) } # Distanzfunktion l1dist <- function(x){ if(sum(is.na(x)) == length(x)){return(rep(1, length(x)))} 1 - abs(x) / max(c(1, abs(x)), na.rm = TRUE) } # Hausnummern matchen match_hausnr <- function(record, weights = c(.9, .1)){ valid <- subset(saubere_anschriften, STRNAME == unlist(record["gross_strasse_2"]), select = c(HNR, HNRZ)) hausnr <- l1dist(as.numeric(unlist(record["hausnr_2"])) - as.numeric(valid$HNR)) * weights[1] zusatz <- l1dist(match(unlist(record["hausnrz_2"]), LETTERS) - match(valid$HNRZ, LETTERS)) * weights[2] if(unlist(record["hausnrz_2"]) == ""){ zusatz[which(valid$HNRZ != "")] <- 0}#!important score <- hausnr + zusatz ids <- which(score == max(score, na.rm = TRUE)) if(length(ids) > 1){ ids <- sample(x = ids, size = 1) score <- .95 } else { if(abs(as.numeric(unlist(record["hausnr_2"])) - as.numeric(valid[ids, "HNR"])) > 4){ ids <- sample(x = 1:nrow(valid), size = 1) score <- 0 } } cbind(score = max(score, na.rm = TRUE), valid[ids,]) } s <- as.data.frame(as.character(seq(from = as.Date("2014-01-01"), to = as.Date("2023-01-01"), by="1 year"))) Bestand.alle <- apply(X = s, MARGIN = 1, FUN = function(d) { print(as.character(d)) #Daten abfragen pattern <- c("\\{vars\\}", "\\{st_format1\\}", "\\{st_format2\\}") vars <- c("LTRIM(RTRIM(kfz.anrede)) AS anrede", "kfz.s_geburt", "kfz.beruf", "kfz.d_ezulass", "kfz.kren", "kfz.finfzart", "kfz.tgm", "LTRIM(RTRIM(kfz.erm_schadstoffgruppe)) AS schadstoffgruppe", "kfz.her", "kfz.typ", "LTRIM(RTRIM(kfz.kennzeichenart)) AS kennzeichenart", "kfz.bestandsart", "technik.kmh", "technik.kw4", "technik.sitzm", "technik.co2", "technik.lamin", "technik.brmin", "technik.homin", "technik.tach1", "technik.tach2") replacement <- c(paste(vars, collapse = ", "), date_format(d = d, typ = 1), date_format(d = d, typ = 2)) Daten <- processSQLQuery(stri_replace_all_regex(kfz.bestand, pattern = pattern, replacement = replacement, vectorize = FALSE), dbConnectionObject) #Daten bereinigen Daten$fahrzeugalter <- class_ezulass(Daten$d_ezulass, d) Daten$finfzart <- class_finfzart(Daten$finfzart) Daten$kren <- class_kren(Daten$kren) Daten$geschlecht <- class_anrede(Daten$anrede) Daten$halteralter <- class_geburt(Daten$s_geburt, d) options(warn = -1) Daten$kmh <- as.numeric(Daten$kmh) Daten$kw4 <- as.numeric(Daten$kw4) Daten$sitzm <- as.numeric(Daten$sitzm) Daten$co2 <- as.numeric(Daten$co2) Daten$lamin <- as.numeric(Daten$lamin) Daten$brmin <- as.numeric(Daten$brmin) Daten$homin <- as.numeric(Daten$homin) Daten$tach1 <- as.numeric(Daten$tach1) Daten$tach2 <- as.numeric(Daten$tach2) Daten$gross_ort <- trimws(Daten$gross_ort) Daten$gross_straße <- trimws(Daten$gross_straße) Daten$hausnr <- trimws(Daten$hausnr) options(warn = 0) Daten <- subset(Daten, select = -c(kfz_nr,lfd_nr,anrede,d_ezulass,s_geburt)) #Daten zusammenfassen Daten <- Daten[!duplicated(Daten), ] Tab <- table(Daten$f_id_nr) doubletten <- names(Tab[which(Tab > 1)]) Korrekturen <- lapply(doubletten, FUN = function(f_id_nr){ S <- Daten[which(Daten$f_id_nr == f_id_nr),] n.NA <- apply(S, MARGIN = 1, FUN = function(x) sum(is.na(x))) S[sample(which(n.NA == min(n.NA)), 1),]}) Korrekturen <- do.call(rbind, Korrekturen) Daten <- rbind(subset(Daten, !(f_id_nr %in% doubletten)), Korrekturen) Daten$id <- 1:nrow(Daten) #Adressen abfragen Adressen <- processSQLQuery(stri_replace_all_regex(adressen.sql, pattern = "\\{st_format3\\}", replacement = date_format(d = d, typ = 3), vectorize = FALSE), dbConnectionObject) Adressen$Ort <- "DORTMUND" ## first pass fp <- merge(x = Daten, y = Adressen[which(is.na(Adressen$HNRZ)),], by.x = c("gross_ort", "gross_straße", "hausnr"), by.y = c("Ort", "STRNAME", "HNR")) fp$qAddress <- 0 fp <- subset(fp, select = -c(gross_ort, gross_straße, hausnr, HNRZ)) ## second pass d2 <- Daten[setdiff(Daten$id, fp$id),] Adressen$clean_strname <- sanitize_string(Adressen$STRNAME) d2$clean_strname <- sanitize_string(d2$gross_straße) sp <- merge(x = d2, y = Adressen[which(is.na(Adressen$HNRZ)),], by.x = c("gross_ort", "clean_strname", "hausnr"), by.y = c("Ort", "clean_strname", "HNR")) sp$qAddress <- 1 sp <- subset(sp, select = -c(gross_ort, clean_strname, hausnr, gross_straße, STRNAME, HNRZ)) ## third pass ### DORTMUND d3 <- Daten[setdiff(Daten$id, c(fp$id, sp$id)),] res <- stringdist(a = trimws(tolower(d3$gross_ort)), b = "dortmund", method = "lcs") fuzzy <- which(res > 0 & res <= 6) d3$gross_ort[fuzzy] <- "DORTMUND" ### Straßen d3$clean_strname <- sanitize_string(d3$gross_straße) clean_strname <- unique(Adressen$clean_strname) clean_strname <- clean_strname[!is.na(clean_strname)] res <- stringsimmatrix(a = clean_strname, b = d3$clean_strname, method = "osa", weight = c(d = 1, i = .5, s = 1, t = 1), nthread = 2) threshold <- which(apply(res, MARGIN = 2, FUN = max) > .9) d3$clean_strname[threshold] <- clean_strname[apply(res, MARGIN = 2, FUN = which.max)[threshold]] ### Hausnummern Adressen <<- Adressen Hausnummern <- clean_hausnr(d3$hausnr) d3$hausnr <- Hausnummern$Hausnummer d3$hausnrz <- Hausnummern$Zusatz fuzzy <- which(!is.na(match(d3$clean_strname, clean_strname)) & d3$gross_ort == "DORTMUND") res <- apply(X = d3[fuzzy,], MARGIN = 1, FUN = match_hausnr) %>% do.call(rbind, .) d3[fuzzy, "hausnr"] <- res[, "HNR"] d3[fuzzy, "hausnrz"] <- res[, "HNRZ"] Adressen$HNRZ[which(is.na(Adressen$HNRZ))] <- "" d3$hausnrz[which(is.na(d3$hausnrz))] <- "" tp <- merge(x = d3, y = Adressen, by.x = c("gross_ort", "clean_strname", "hausnr", "hausnrz"), by.y = c("Ort", "clean_strname", "HNR", "HNRZ")) tp$qAddress <- 2 tp <- subset(tp, select = -c(gross_ort, clean_strname, hausnr, hausnrz, gross_straße, STRNAME)) # Merge unknown <- Daten[setdiff(Daten$id, c(fp$id, sp$id, tp$id)),] unknown <- subset(unknown, select = -c(gross_ort, gross_straße, hausnr)) unknown$RW <- NA unknown$HW <- NA unknown$UBZ <- NA unknown$qAddress <- 3 return(rbind(fp, sp, tp, unknown)) }) # Imputation library("rMIDAS") # Apply rMIDAS preprocessing steps rDaten <- subset(Daten, select = -c(her, typ, beruf)) cat_cols <- c('schadstoffgruppe', 'Fahrzeugklasse','Kraftstoffart', 'Sitzplätze') bin_cols <- c('geschlecht', 'Achsen', 'Antriebsachsen') Daten_conv <- convert(rDaten, bin_cols = bin_cols, cat_cols = cat_cols, minmax_scale = TRUE) Daten_validate <- overimpute(Daten_conv, seed = 1234, training_epochs = 21, report_ival = 3, layer_structure = c(50, 50), plot_vars = TRUE, save_path = "") Daten_train <- train(Daten_conv, seed = 1234, training_epochs = 18, layer_structure = c(50, 50)) Daten_complete <- complete(Daten_train, m = 1) Daten_complete <- cbind(her = Daten$her, typ = Daten$typ, Daten_complete[[1]]) ################################################################################ ################################## Chapter 3 ################################### ################################################################################ allDaten <- merge(Daten, Segments, by.x = c("her", "typ"), by.y = c("Herstellerschlüssel", "Typschlüssel"), all.x = TRUE) allDaten$Segmentcode <- NA allDaten$Segmentcode[which(allDaten$Segment == "MINIS")] <- 0 allDaten$Segmentcode[which(allDaten$Segment == "KLEINWAGEN")] <- 1 allDaten$Segmentcode[which(allDaten$Segment == "KOMPAKTKLASSE")] <- 2 allDaten$Segmentcode[which(allDaten$Segment == "MITTELKLASSE")] <- 3 allDaten$Segmentcode[which(allDaten$Segment == "OBERE MITTELKLASSE")] <- 4 allDaten$Segmentcode[which(allDaten$Segment == "OBERKLASSE")] <- 5 allDaten$Segmentcode[which(allDaten$Segment == "SUV")] <- 6 allDaten$Segmentcode[which(allDaten$Segment == "GELÄNDEWAGEN")] <- 7 allDaten$Segmentcode[which(allDaten$Segment == "SPORTWAGEN")] <- 8 allDaten$Segmentcode[which(allDaten$Segment == "MINI-VANS")] <- 9 allDaten$Segmentcode[which(allDaten$Segment == "GROSSRAUM-VANS")] <- 10 allDaten$Segmentcode[which(allDaten$Segment == "UTILITIES")] <- 11 allDaten$Segmentcode[which(allDaten$Segment == "WOHNMOBILE")] <- 12 allDaten$Kraftstoffart[which(allDaten$Kraftstoffart == "Brennstoffzelle")] <- "andere" allDaten$Sitzplätze <- as.numeric(allDaten$Sitzplätze) classdata <- subset(allDaten, select = -c(her,typ,RW,HW,beruf,fahrzeugalter,halteralter,geschlecht,Segment,Fahrzeugklasse,co2,Hubraum,schadstoffgruppe,Kraftstoffart)) cDaten <- classdata[complete.cases(classdata),] trainDaten <- subset(cDaten, Usage == "Train", select = -c(Segmentcode,Usage)) testDaten <- subset(cDaten, Usage == "Test", select = -c(Segmentcode,Usage)) # Data conversion steps library(Matrix) library(xgboost) trainMatrix <- xgb.DMatrix( data = sparse.model.matrix(~ ., data = trainDaten)[,-1], label = unlist(subset(cDaten, Usage == "Train", select = c(Segmentcode)))) testMatrix <- xgb.DMatrix( data = sparse.model.matrix(~ ., data = testDaten)[,-1], label = unlist(subset(cDaten, Usage == "Test", select = c(Segmentcode)))) # xgboost watchlist <- list(train = trainMatrix, eval = testMatrix) params <- list(nthread = 2, objective = "multi:softmax", num_class = 13, eval_metric = "merror") bst <- xgb.train(params, trainMatrix, nrounds = 100, watchlist = watchlist, early_stopping_rounds = 3) ################################################################################ ################################## Chapter 4 ################################### ################################################################################ #Andy Lyons ajlyons at berkeley.edu #Fri Mar 25 01:23:00 CET 2011 ###################################################################### ## mvee() ## Uses the Khachiyan Algorithm to find the the minimum volume enclosing ## ellipsoid (MVEE) of a set of points. In two dimensions, this is just ## the bounding ellipse (this function is limited to two dimensions). ## Adapted by Andy Lyons from Matlab code by Nima Moshtagh. ## Copyright (c) 2009, Nima Moshtagh ## [1]http://www.mathworks.com/matlabcentral/fileexchange/9542 ## [2]http://www.mathworks.com/matlabcentral/fileexchange/13844 ## [3]http://stackoverflow.com/questions/1768197/bounding-ellipse ## ## Parameters ## xy a two-column data frame containing x and y coordinates ## if NULL then a random sample set of 10 points will be generated ## tolerance a tolerance value (default = 0.005) ## plotme FALSE/TRUE. Plots the points and ellipse. Default TRUE. ## max.iter The maximum number of iterations. If the script tries this ## number of iterations but still can't get to the tolerance ## value, it displays an error message and returns NULL ## shiftxy TRUE/FALSE. If True, will apply a shift to the coordinates to make them ## smaller and speed up the matrix calculations, then reverse the shift ## to the center point of the resulting ellipoid. Default TRUE ## Output: A list containing the "center form" matrix equation of the ## ellipse. i.e. a 2x2 matrix "A" and a 2x1 vector "C" representing ## the center of the ellipse such that: ## (x - C)' A (x - C) <= 1 ## Also in the list is a 2x1 vector elps.axes.lngth whose elements ## are one-half the lengths of the major and minor axes (variables ## a and b ## Also in list is alpha, the angle of rotation ###################################################################### mvee <- function(xy=NULL, tolerance = 0.005, max.iter = 500, shiftxy = TRUE) { ## Number of points n = nrow(xy) ## Dimension of the points (2) #d = ncol(xy) d <- 2 xy.min <- sapply(xy, FUN = "min") xy.use <- xy - rep(xy.min, each = n) ## Add a column of 1s to the (n x 2) matrix xy - so it is now (n x 3) Q <- t(cbind(xy.use, rep(1,n))) ## Initialize count <- 1 err <- 1 u <- rep(1/n, n) ## Khachiyan Algorithm while (err > tolerance) { ## see [4]http://stackoverflow.com/questions/1768197/bounding-ellipse ## for commented code X <- Q %*% diag(u) %*% t(Q) M <- diag(t(Q) %*% solve(X) %*% Q) maximum <- max(M) j <- which(M == maximum) step_size = (maximum - d -1) / ((d+1)*(maximum-1)) new_u <- (1 - step_size) * u new_u[j] <- new_u[j] + step_size err <- sqrt(sum((new_u - u)^2)) count <- count + 1 if (count > max.iter) { warning(paste("Iterated", max.iter, "times and still can't find the bounding ellipse. \n", sep="")) warning("Either increase the tolerance or the maximum number of iterations") return(NULL) } u <- new_u } ## Put the elements of the vector u into the diagonal of a matrix U <- diag(u) ## Take the transpose of xy P <- t(xy.use) ## Compute the A-matrix A <- (1/d) * solve(P %*% U %*% t(P) - (P %*% u) %*% t(P %*% u)) ## Find the Eigenvalues of matrix A which will be used to get the major and minor axes A.eigen <- eigen(A) r <- A.eigen$values[1] / A.eigen$values[2] V <- pi / sqrt(prod(A.eigen$values)) ellipse.params <- list("A" = A, "eigen" = A.eigen, "r" = r, "V" = V) } plot_ellipse <- function(params) { ## Plotting commands adapted from code by Alberto Monteiro ## [5]https://stat.ethz.ch/pipermail/r-help/2006-October/114652.html ## Create the points for the ellipse A.eigen <- params$eigen c <- params$c theta <- seq(0, 2 * pi, length = 72) semi.axes <- sort(1 / sqrt(A.eigen$values), decreasing=TRUE) alpha <- atan2(A.eigen$vectors[2,1], A.eigen$vectors[1,1]) - pi/2 a <- semi.axes[1] b <- semi.axes[2] elp.plot.xs <- c[1] + a * cos(theta) * cos(alpha) - b * sin(theta) * sin(alpha) elp.plot.ys <- c[2] + a * cos(theta) * sin(alpha) + b * sin(theta) * cos(alpha) ## Plot the ellipse with the same scale on each axis lines(elp.plot.xs, elp.plot.ys, type = "l", lty="solid") } #References # 1. http://www.mathworks.com/matlabcentral/fileexchange/9542 # 2. http://www.mathworks.com/matlabcentral/fileexchange/13844 # 3. http://stackoverflow.com/questions/1768197/bounding-ellipse # 4. http://stackoverflow.com/questions/1768197/bounding-ellipse # 5. https://stat.ethz.ch/pipermail/r-help/2006-October/114652.html # Own code for Chapter 4 starts here: library(delaunay) library(data.table) library(parallel) library(pbapply) library(sf) interval_borders <- function(limits, n){ seq(from = limits[1], to = limits[2], length.out = n + 1)} interval_midpoints <- function(limits, n){ var_s <- interval_borders(limits, n) rowMeans(cbind(head(var_s, -1), tail(var_s, -1)))} map <- read_sf() clust <- makePSOCKcluster(2) year <- c("2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023") res_expansion_factor <- 1 do_prescaling <- FALSE for(year_id in year){print(year_id) PKW <- readRDS(paste0("~/Master/data/xgboost/", year_id, ".RDS")) PKW <- PKW[which(PKW$beruf == "888" & PKW$Segment == "SUV"),] PKW <- setDT(PKW[, c("RW", "HW")]) coords <- PKW setnames(coords, old = c("RW", "HW"), new = c("x", "y")) ucoords <- data.table::setDT(coords)[, .N, c("x","y")] xres <- 100 * res_expansion_factor# in meters yres <- 100 * res_expansion_factor# in meters maxeigen <- 4 / (xres^2 + yres^2) mineigen <- 4 / (max(xres, yres) * 20)^2 ## Grid xlim <- c(382290.4, 405244.8) ylim <- c(5697385, 5717364) xcount <- round(diff(range(xlim)) / xres) ycount <- round(diff(range(ylim)) / yres) borders_x <- interval_borders(xlim, xcount) borders_y <- interval_borders(ylim, ycount) mid_x <- interval_midpoints(xlim, xcount) mid_y <- interval_midpoints(ylim, ycount) mapcoords <- expand.grid(x = mid_x, y = mid_y) ## delaunay del <- delaunay::delaunay(as.matrix(subset(ucoords, select = -c(N)))) Faces <- del$faces ## Global bandwidth selection (crossvalidation) g.bw.x <- bw.nrd0(coords$x) * sqrt(5) g.bw.y <- bw.nrd0(coords$y) * sqrt(5) g.D <- diag(c(g.bw.x, g.bw.y)) ## First pass local bandwidth selection (delaunay) clusterEvalQ(cl = clust, expr = library(data.table)) clusterExport(clust, c("ucoords", "Faces", "mvee")) res <- pblapply(cl = clust, X = 1:nrow(ucoords), FUN = function(id){ data <- ucoords[unique(as.numeric(Faces[apply(Faces, MARGIN = 1, FUN = function(row) any(row == id)),])),] data <- data[, c("N"):=NULL] mvee(xy = data.frame(data))} ) Nscaled <- lapply(1:nrow(ucoords), FUN = function(id){res[[id]]$V * (1 / as.numeric(ucoords[id, "N"]))}) med_l <- median(unlist(Nscaled)) med_g <- pi*prod(c(g.bw.x,g.bw.y) / 2) g.bw.s <- med_g / med_l Gscaled <- unlist(Nscaled) * g.bw.s # Total area!!! maxeigen <- 4 / (xres^2 + yres^2) mineigen <- 4 / (max(xres, yres) * 20)^2 sres <- lapply(1:nrow(ucoords), FUN = function(id){ x <- res[[id]] # global scaling l <- Gscaled[id] / res[[id]]$V lambda_2 <- (1 / l) * x$eigen$values[2] lambda_1 <- x$r * lambda_2 lambda <- c(lambda_1, lambda_2) # maximum scaling if(any(lambda < mineigen)){ lambda <- lambda / min(lambda / mineigen) } # minimum scaling (ok!) if(any(lambda > maxeigen)){ lambda <- lambda / max(lambda / maxeigen) } # result x$eigen$vectors %*% diag(lambda) %*% solve(x$eigen$vectors) } ) kernel_epanechnikov <- function(u){ 0.75 * (1 - u^2) * (abs(u) <= 1) } geo_ellipse <- function(p, c, A){ crossprod((p-c), A) %*% (p-c) } M <- data.frame(t(mapcoords[,1:2])) clusterExport(clust, c("ucoords", "sres", "geo_ellipse", "kernel_epanechnikov", "M")) i.densities <- pbsapply(cl = clust, X = 1:nrow(ucoords), FUN = function(id){ N <- as.numeric(ucoords[id, "N"]) c <- t(as.matrix(ucoords[id, c("x", "y")])) A <- sres[[id]] z <- kernel_epanechnikov(vapply(M, geo_ellipse, numeric(1), USE.NAMES = FALSE, c = c, A = A)) z / sum(z) * N } ) r.densities <- apply(i.densities, MARGIN = 1, FUN = sum) mapcoords$den <- r.densities find_value <- function(x,y) mapcoords$den[which(mapcoords$x == x & mapcoords$y == y)] z <- outer(mid_x, mid_y, Vectorize(find_value)) saveRDS(mapcoords, paste0(, year_id, "_den.RDS")) save.image(paste0(, year_id, ".RData")) # Graphics pbreaks <- pretty(c(z)) png(filename = paste0(, year_id, ".png"), width = 672, height = 672) par(mar = c(3,3,1,1)) image(x = mid_x, y = mid_y, z = z, xlab = "", ylab = "", main = "", col = hcl.colors(length(pbreaks) - 1, "YlOrRd", rev = TRUE), breaks = pbreaks) title(ylab="y-coordinates", line = 2) title(xlab="x-coordinates", line = 2) mtext(side=3, line=0, at = mean(xlim), cex=1, paste0("01.01.", year_id)) legend("bottomleft", col = hcl.colors(length(pbreaks) - 1, "YlOrRd", rev = TRUE), legend = pbreaks[-1], pch = 15, pt.cex = 2, title = "Cars per ha", ncol = 5) plot(st_geometry(map), border = hsv(245/360, 80/100, 89/100, alpha = .5), add = TRUE) dev.off() } ################################################################################ ################################## Chapter 5 ################################### ################################################################################ # input delta <- 1/3 R <- 1 # calculate epsilon p <- (1 - delta) / 2 epsilon <- - log(p / (1 - p) * (1 / (delta + p) - 1)) / R # sigma library(lamW) ## sigma1 sigma1 <- - R / (lambertW0(- 1 / (2 * exp(1))) * exp(1) * epsilon) ## für welches i wird u == -1/exp(1) u <- function(i){ ((- (2 * i * R * exp(- i * epsilon) * exp(- (i * R * exp(- i * epsilon))/ sigma1)) / sigma1) - (- 1 / exp(1)))^2} cp <- optimize(u, interval = c(0,10))$minimum ## sigma2 sigma2 <- function(i, sigma1, cp){ u <- - (2 * i * R * exp(- i * epsilon) * exp(- (i * R * exp(- i * epsilon))/ sigma1)) / sigma1 ( - (i * R * exp(i * epsilon) * sigma1) / (lambertW0(u) * exp(i * epsilon) * sigma1 + i * R)) * (i <= cp) + ( - (i * R * exp(i * epsilon) * sigma1) / (lambertWm1(u) * exp(i * epsilon) * sigma1 + i * R)) * (i > cp) } ## sigmalim sigma_lim <- 1 / epsilon qlap <- function(p, mu, b){ mu - b * sign(p - 1/2) * log(1 - 2 * abs(p - 1/2))} plap <- function(x, mu, b){ 1/2 + 1/2*sign(x - mu)*(1 - exp(- abs(x - mu) / b))} rlap <- function(mu, b, n, branch){ cp <- plap(x = 0, mu = mu, b = b) invalid <- isTRUE(all.equal(cp, 0)) if(branch == "pos"){ p <- runif(n = n, min = cp, max = 1) } if(branch == "neg" & !invalid){ p <- runif(n = n, min = 0, max = cp) } r <- qlap(p = p, mu = mu, b = b) if(branch == "neg" & invalid){r <- 0} if(branch == "neg" & !invalid){r <- r * cp / (1 - cp)} r } mapcoords <- readRDS() mapcoords$sigma <- NA nullid <- which(mapcoords$den == 0) otherid <- setdiff(1:nrow(mapcoords), nullid) mapcoords$sigma[nullid] <- sigma1 mapcoords$sigma[otherid] <- sigma2(i = mapcoords$den[otherid], sigma1, cp) set.seed(1234) mapcoords$noise <- mapply(rlap, mu = mapcoords$den, b = mapcoords$sigma, n = 1, branch = "pos") mapcoords$neg <- mapply(rlap, mu = mapcoords$den, b = mapcoords$sigma, n = 1, branch = "neg") mapcoords$res <- mapcoords$noise + mapcoords$neg # distribute negative noise negid <- which(mapcoords$res < 0) mapcoords$res[negid] <- mapcoords$noise[negid] sampleids <- sample(negid) d <- numeric() # minimum distances for(i in sampleids){ df <- data.frame(id = 1:nrow(mapcoords), D = sqrt((mapcoords[i, "x"] - mapcoords[, "x"])^2 + (mapcoords[i, "y"] - mapcoords[, "y"])^2)) valid_ids <- which(mapcoords$res > abs(mapcoords[i, "neg"])) df <- df[valid_ids,] d[i] <- min(df$D) newid <- df$id[sample(which(df$D == d[i]),1)] mapcoords[newid,"res"] <- mapcoords[newid,"res"] + mapcoords[i, "neg"] } ################################################################################ ################################## Chapter 6 ################################### ################################################################################ library(SpatialKWD) d2014 <- readRDS() d2015 <- readRDS() d2016 <- readRDS() d2017 <- readRDS() d2018 <- readRDS() d2019 <- readRDS() d2020 <- readRDS() d2021 <- readRDS() d2022 <- readRDS() d2023 <- readRDS() colnames(d2014)[3] <- "d2014" colnames(d2015)[3] <- "d2015" colnames(d2016)[3] <- "d2016" colnames(d2017)[3] <- "d2017" colnames(d2018)[3] <- "d2018" colnames(d2019)[3] <- "d2019" colnames(d2020)[3] <- "d2020" colnames(d2021)[3] <- "d2021" colnames(d2022)[3] <- "d2022" colnames(d2023)[3] <- "d2023" df_list <- list(d2014, d2015, d2016, d2017, d2018, d2019, d2020, d2021, d2022, d2023) df <- Reduce(function(x, y) merge(x, y, by = c("x", "y")), df_list, accumulate=FALSE) wasserstein_dist <- compareAll(Coordinates = as.matrix(df[,1:2]), Weights = as.matrix(df[,3:12])) M <- matrix(wasserstein_dist$distance, nrow = 10, ncol = 10) colnames(M) <- as.character(c(2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023)) rownames(M) <- as.character(c(2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023)) par(mar=c(2,3,0,0)) image(t(M), xaxt = "n", yaxt = "n") axis(side = 1, at = seq(from = 0, to = 1, length.out = 10),labels = colnames(M)) axis(side = 2, at = seq(from = 0, to = 1, length.out = 10),labels = rownames(M), las = 2) abline(h = seq(from = 0, to = 1, length.out = 10) + 0.055) abline(v = seq(from = 0, to = 1, length.out = 10) + 0.055) text(x = rep(seq(from = 0, to = 1, length.out = 10), 10), y = sort(rep(seq(from = 0, to = 1, length.out = 10), 10)), labels = round(c(M), 2))