计算分布拟合函数 R 的 y 值

[英]Calculate y value for distribution fitting functions R

``````library(plyr)
library(dplyr)
library(fitdistrplus)
library(evd)
library(gamlss)

fdistr <- function(d) {

#  Uncomment to try  run line by line
# d <- data_to_plot

TLT <- d\$TLT
if (sum(TLT<=0)) {TLT[TLT<=0] <- 0.001} # removing value < 0 for log clculation
gev <- fgev(TLT, std.err=FALSE)
distr <- c('norm', 'lnorm', 'weibull', 'gamma')
fit <- lapply(X=distr, FUN=fitdist, data=TLT)
fit[[5]] <- gev
distr[5] <- 'gev'
names(fit) <- distr
Loglike <- sapply(X=fit, FUN=logLik)
Loglike_Best <- which(Loglike == max(Loglike))

#  Uncomment to try  run line by line
# max <- which.max(density(d\$TLT)\$y)
# max_density <- stats::density(d\$TLT)\$y[max]
# max_y <- max_density

x_data <- max(d\$TLT)

hist(TLT, prob=TRUE, breaks= x_data,
main=paste(d\$DLT_Code[1],
'- best :',
names(Loglike[Loglike_Best])),
col='lightgrey',
border='white'
# ylim=  c(0,max_y)
)

lines(density(TLT),
col='darkgrey',
lty=2,
lwd=2)

grid(nx = NA, ny = NULL, col = "gray", lty = "dotted",
lwd = .5, equilogs = TRUE)

curve(dnorm(x,
mean=fit[['norm']]\$estimate[1],
sd=fit[['norm']]\$estimate[2]),

curve(dlnorm(x,
meanlog=fit[['lnorm']]\$estimate[1],
sdlog=fit[['lnorm']]\$estimate[2]),

curve(dweibull(x,
shape=fit[['weibull']]\$estimate[1],
scale=fit[['weibull']]\$estimate[2]),

curve(dgamma(x,
shape=fit[['gamma']]\$estimate[1],
rate=fit[['gamma']]\$estimate[2]),

curve(dgev(x,
loc=fit[['gev']]\$estimate[1],
scale=fit[['gev']]\$estimate[2],
shape=fit[['gev']]\$estimate[3]),

legend_loglik <- paste(c('Norm', 'LogNorm', 'Weibull', 'Gamma','GEV'), c(':'),
round(Loglike, digits=2))

legend("topright", legend=legend_loglik,
col=c('blue', 'darkgreen', 'purple', 'gold', 'red'),
lty=1, lwd=2,
bty='o', bg='white', box.lty=2, box.lwd = 1, box.col='white')

return(data.frame(DLT_Code = d\$DLT_Code[1],
n = length(d\$TLT),
Best = names(Loglike[Loglike_Best]),
lnorm = Loglike[1],
norm = Loglike[2],
weibul = Loglike[3],
gamma = Loglike[4],
GEV = Loglike[5]))

}

#  Creating data set
TLT <- c(rep(0,32), rep(1,120), rep(2,10), rep(3,67), rep(4,14),  rep(5,7), 6)
DLT_Code <- c(rep('DLT_Code',251))

data_to_plot <- data.frame(cbind(DLT_Code,TLT))
data_to_plot\$TLT <- as.numeric(as.character(data_to_plot\$TLT ))

DLT_Distr <- do.call(rbind, by(data = data_to_plot, INDICES = data_to_plot\$DLT_Code, FUN=fdistr))
``````

``````library(purrr)

mynorm <- function(x) dnorm(x, mean = 0, sd = 1)
mygamma <- function(x) dgamma(x, rate = .5, shape = 1)

# get largest maximum over interval
ymax <- max(purrr::map_dbl(c(mynorm, mygamma), ~ optimize(., interval = c(0, 3), maximum = T)\$objective))

# 0.4999811

# plot data
curve(mynorm, col = "blue", lwd = 2, xlim = c(0, 3), ylim = c(0, ymax * 1.1))
curve(mygamma, col = "red", lwd = 2, add = T)
``````

``````library(plyr)
library(dplyr)
library(fitdistrplus)
library(evd)
library(gamlss)
library(purrr) # <- add this library

fdistr <- function(d) {

#  Uncomment to try  run line by line
# d <- data_to_plot

TLT <- d\$TLT
if (sum(TLT<=0)) {TLT[TLT<=0] <- 0.001} # removing value < 0 for log clculation
gev <- fgev(TLT, std.err=FALSE)
distr <- c('norm', 'lnorm', 'weibull', 'gamma')
fit <- lapply(X=distr, FUN=fitdist, data=TLT)
fit[[5]] <- gev
distr[5] <- 'gev'
names(fit) <- distr
Loglike <- sapply(X=fit, FUN=logLik)
Loglike_Best <- which(Loglike == max(Loglike))

#  Uncomment to try  run line by line
# max <- which.max(density(d\$TLT)\$y)
# max_density <- stats::density(d\$TLT)\$y[max]
# max_y <- max_density

x_data <- max(d\$TLT)

# parameterize your densities before plotting
mynorm <- function(x) {
dnorm(x,
mean=fit[['norm']]\$estimate[1],
sd=fit[['norm']]\$estimate[2])
}

mylnorm <- function(x){
dlnorm(x,
meanlog=fit[['lnorm']]\$estimate[1],
sdlog=fit[['lnorm']]\$estimate[2])
}

myweibull <- function(x) {
dweibull(x,
shape=fit[['weibull']]\$estimate[1],
scale=fit[['weibull']]\$estimate[2])
}

mygamma <- function(x) {
dgamma(x,
shape=fit[['gamma']]\$estimate[1],
rate=fit[['gamma']]\$estimate[2])
}

mygev <- function(x){
dgev(x,
loc=fit[['gev']]\$estimate[1],
scale=fit[['gev']]\$estimate[2],
shape=fit[['gev']]\$estimate[3])
}

distributions <- c(mynorm, mylnorm, myweibull, mygamma, mygev)

# get the max of each density
y <- purrr::map_dbl(distributions, ~ optimize(., interval = c(0, x_data), maximum = T)\$objective)

# find the max (excluding infinity)
ymax <- max(y[abs(y) < Inf])

hist(TLT, prob=TRUE, breaks= x_data,
main=paste(d\$DLT_Code[1],
'- best :',
names(Loglike[Loglike_Best])),
col='lightgrey',
border='white',
ylim=  c(0, ymax)
)

lines(density(TLT),
col='darkgrey',
lty=2,
lwd=2)

grid(nx = NA, ny = NULL, col = "gray", lty = "dotted",
lwd = .5, equilogs = TRUE)

curve(mynorm,
add=TRUE, col='blue', lwd=2, n = 1E5) # <- increase x grid

curve(mylnorm,
add=TRUE, col='darkgreen', lwd=2, n = 1E5) # <- increase x grid

curve(myweibull,
add=TRUE, col='purple', lwd=2, n = 1E5) # <- increase x grid

curve(mygamma,
add=TRUE, col='Gold', lwd=2, n = 1E5) # <- increase x grid

curve(mygev,
add=TRUE, col='red', lwd=2, n = 1E5) # <- increase x grid

legend_loglik <- paste(c('Norm', 'LogNorm', 'Weibull', 'Gamma','GEV'), c(':'),
round(Loglike, digits=2))

legend("topright", legend=legend_loglik,
col=c('blue', 'darkgreen', 'purple', 'gold', 'red'),
lty=1, lwd=2,
bty='o', bg='white', box.lty=2, box.lwd = 1, box.col='white')

return(data.frame(DLT_Code = d\$DLT_Code[1],
n = length(d\$TLT),
Best = names(Loglike[Loglike_Best]),
lnorm = Loglike[1],
norm = Loglike[2],
weibul = Loglike[3],
gamma = Loglike[4],
GEV = Loglike[5]))

}

#  Creating data set
TLT <- c(rep(0,32), rep(1,120), rep(2,10), rep(3,67), rep(4,14),  rep(5,7), 6)
DLT_Code <- c(rep('DLT_Code',251))

data_to_plot <- data.frame(cbind(DLT_Code,TLT))
data_to_plot\$TLT <- as.numeric(as.character(data_to_plot\$TLT ))

DLT_Distr <- do.call(rbind, by(data = data_to_plot, INDICES = data_to_plot\$DLT_Code, FUN=fdistr))
``````

``````library(purrr)

mygamma <- function(x) {
dgamma(x,
shape=fit[['gamma']]\$estimate[1], # 0.6225622
rate=fit[['gamma']]\$estimate[2]) # 0.3568242
}

number_of_x <- c(5, 10, 101, 75000)
purrr::imap_dfr(number_of_x, ~ curve(mygamma, xlim = c(0, 6), n = .), .id = "n") %>%
dplyr::mutate_at(1, ~ sprintf("n = %i", number_of_x[as.numeric(.)])) %>%
dplyr::mutate(n = factor(n, unique(n))) %>%
dplyr::filter(x > 0) %>%
dplyr::group_by(n) %>%
dplyr::slice_min(order_by = x, n = 5)

n                 x       y
<fct>         <dbl>   <dbl>
1 n = 5     1.5        0.184
2 n = 5     3          0.0828
3 n = 5     4.5        0.0416
4 n = 5     6          0.0219
5 n = 10    0.667      0.336
6 n = 10    1.33       0.204
7 n = 10    2          0.138
8 n = 10    2.67       0.0975
9 n = 10    3.33       0.0707
10 n = 101   0.06       1.04
11 n = 101   0.12       0.780
12 n = 101   0.18       0.655
13 n = 101   0.24       0.575
14 n = 101   0.3        0.518
15 n = 75000 0.0000800 12.9
16 n = 75000 0.000160   9.90
17 n = 75000 0.000240   8.50
18 n = 75000 0.000320   7.62
19 n = 75000 0.000400   7.01
``````

``````mygamma <- function(x) dgamma(x, shape=fit[['gamma']]\$estimate[1],
rate=fit[['gamma']]\$estimate[2])
get_curve_values <- function(fn, x_data){
res <- curve(fn, from=0, to=x_data)
dev.off()
res
}
curve_val <- get_curve_values(mygamma, x_data)
ylim <- max(curve_val\$y,na.rm = TRUE)

``````