perl/etc/justonce/sperm/myfit.R

   1 D__myfit <- 1
   2
   3 print.myfit <- function(x, ...)
   4 {
   5     cat(paste("\nObserved and fitted values for", x$type, "distribution\n"))
   6     if(x$method[1] == "fixed")
   7       cat("with fixed parameters \n\n")
   8     else
   9       cat(paste("with parameters estimated by `", paste(x$method, collapse = " "), "' \n\n", sep = ""))
  10     RVAL <- cbind(x$count, x$observed, x$fitted)
  11     colnames(RVAL) <- c("count", "observed", "fitted")
  12     rownames(RVAL) <- rep("", nrow(RVAL))
  13     print(RVAL)
  14     invisible(x)
  15 }
  16 fitted.myfit <- function(object, ...)
  17 {
  18   object$fitted
  19 }
  20 summary.myfit <- function(object, ...)
  21 {
  22     df <- object$df
  23     obsrvd <- object$observed
  24     count <- object$count
  25     expctd <- fitted(object)
  26
  27     G2 <- sum(ifelse(obsrvd == 0, 0, obsrvd * log(obsrvd/expctd))) * 2
  28
  29     n <- length(obsrvd)
  30     switch(object$type,
  31     "poisson" = { pfun <- "ppois" },
  32     "binomial" = { pfun <- "pbinom" },
  33     "nbinomial" = { pfun <- "pnbinom" })
  34     p.hat <- diff(c(0, do.call(pfun, c(list(q = count[-n]), object$par)), 1))
  35     #expctd <- p.hat * sum(obsrvd)
  36     X2 <- sum((obsrvd - expctd)^2/expctd)
  37
  38     names(G2) <- "Likelihood Ratio"
  39     names(X2) <- "Pearson"
  40     if(any(expctd < 5) & object$method[1] != "ML")
  41         warning("Chi-squared approximation may be incorrect")
  42
  43     RVAL <- switch(object$method[1],
  44       "ML" = { G2 },
  45       "MinChisq" = { X2 },
  46       "fixed" = { c(X2, G2) }
  47     )
  48
  49     RVAL <- cbind(RVAL, df, pchisq(RVAL, df = df, lower.tail = FALSE))
  50     colnames(RVAL) <- c("X^2", "df", "P(> X^2)")
  51
  52     cat(paste("\n\t Goodness-of-fit test for", object$type, "distribution\n\n"))
  53     print(RVAL)
  54     invisible(RVAL)
  55 }
  56
  57 myfit <- function(x, method = c("ML", "MinChisq"), par = NULL) {
  58         if (is.vector(x)) {
  59                 x <- table(x)
  60         }
  61         if (is.table(x)) {
  62                 if (length(dim(x)) > 1)
  63                         stop("x must be a 1-way table")
  64                 freq <- as.vector(x)
  65                 count <- as.numeric(names(x))
  66         } else {
  67                 if (!(!is.null(ncol(x)) && ncol(x) == 2))
  68                         stop("x must be a 2-column matrix or data.frame")
  69                 freq <- as.vector(x[, 1])
  70                 count <- as.vector(x[, 2])
  71         }
  72         #ncount <- 0:max(count)
  73         n <- length(count)
  74         df <- -1
  75         method <- match.arg(method)
  76         if (!is.null(par)) {
  77                 if (!is.list(par)) stop("`par' must be a named list")
  78                 if (!(names(par) == "lambda")) stop("`par' must specify `lambda'")
  79                 par <- par$lambda
  80                 method <- "fixed"
  81         } else if (method == "ML") {
  82                 df <- df - 1
  83                 par <- weighted.mean(count, freq)
  84         } else if (method == "MinChisq") {
  85                 df <- df - 1
  86                 chi2 <- function(x) {
  87                         p.hat <- diff(c(0, ppois(count[-n], lambda = x),1)) # DO NOT USE THIS NOW.
  88                         expected <- sum(freq) * p.hat
  89                         sum((freq - expected)^2/expected)
  90                 }
  91                 par <- optimize(chi2, range(count))$minimum
  92         }
  93         par <- list(lambda = par)
  94         p.hat <- dpois(count, lambda = par$lambda)
  95
  96         zoomratio <- sum(freq) / sum(p.hat)
  97         expected <- p.hat * zoomratio
  98         print(c(sum(freq),sum(p.hat),zoomratio))
  99         #print(cbind(p.hat,expected))
 100
 101         df <- switch(method[1], MinChisq = {
 102                 length(freq) + df
 103         }, ML = {
 104                 sum(freq > 0) + df
 105         }, fixed = {
 106                 c(length(freq), sum(freq > 0)) + df
 107         })
 108         RVAL <- list(observed = freq, count = count, fitted = expected,
 109                 type = "poisson", method = method, df = df, par = par, zoom = zoomratio)
 110         class(RVAL) <- "myfit"
 111         RVAL
 112 }
 113
 114
 115
 116 scale2pois <- function(x, lambda, cntmin = NULL, cntmax = NULL) {
 117         if (is.vector(x)) {
 118                 x <- table(x)
 119         }
 120         if (is.table(x)) {
 121                 if (length(dim(x)) > 1)
 122                         stop("x must be a 1-way table")
 123                 freq <- as.vector(x)
 124                 count <- as.numeric(names(x))
 125         } else {
 126                 if (!(!is.null(ncol(x)) && ncol(x) == 2))
 127                         stop("x must be a 2-column matrix or data.frame")
 128                 freq <- as.vector(x[, 1])
 129                 count <- as.vector(x[, 2])
 130         }
 131         #ncount <- 0:max(count)
 132         n <- length(count)
 133
 134         p.hat <- dpois(count, lambda = lambda)
 135         if (is.null(cntmin) | is.na(cntmin))
 136                 cntmin <- min(count)
 137         if (is.null(cntmax) | is.na(cntmax))
 138                 cntmax <- max(count)
 139         flag <- (count >= cntmin) & (count <= cntmax)
 140         #print(c(cntmin,cntmax,count,flag))
 141         zoomratio <- sum(p.hat[flag]) / sum(freq[flag])
 142         scaled <- freq * zoomratio
 143         #expected <- p.hat / zoomratio
 144         #print(c(sum(freq),sum(scaled),sum(p.hat),sum(expected),zoomratio))
 145         #print(cbind(freq,scaled, p.hat,expected))
 146
 147         RVAL <- list(ret = cbind(scaled,count), zoom = zoomratio, cntmin = cntmin, cntmax = cntmax)
 148         RVAL
 149 }
 150
 151 cdf2pdf <- function(x, rescale = FALSE) {
 152         mylen <- length(x) - 1
 153         #RVAL <- diff(c(0, x, 1))[1:mylen]
 154         RVAL <- diff(c(0, x), 1)[1:mylen]
 155         if (rescale) {
 156                 s <- sum(RVAL)
 157                 RVAL <- RVAL/s
 158         }
 159         RVAL
 160 }
 161
 162 # http://www.csee.ogi.edu/~gormanky/papers/handbook/ky.R
 163 pdf2cdf <- function(x, complete = FALSE, rescale = FALSE) {
 164     # make a sorted PDF into a sorted CDF
 165         mylen <- length(x)
 166         RVAL <- x
 167         if (complete | rescale) {
 168                 RVAL <- c(x,x[mylen])
 169                 mylen <- 1 + mylen
 170                 #length(RVAL) <- mylen
 171         }
 172         for (i in 2:mylen) {
 173                 RVAL[i] <- RVAL[i] + RVAL[i-1]
 174         }
 175         if (rescale) {
 176                 RVAL <- RVAL/RVAL[mylen]
 177         }
 178         return(RVAL)
 179 }