% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/predict.R
\name{predict.glmGamPoi}
\alias{predict.glmGamPoi}
\title{Predict 'link' or 'response' values for Gamma-Poisson GLMs}
\usage{
\method{predict}{glmGamPoi}(
  object,
  newdata = NULL,
  type = c("link", "response"),
  se.fit = FALSE,
  offset = mean(object$Offset),
  on_disk = NULL,
  verbose = FALSE,
  ...
)
}
\arguments{
\item{object}{a \code{glmGamPoi} fit object (produced by \code{glm_gp()}).}

\item{newdata}{a specification of the new data for which the
expression for each gene is predicted. \code{newdata} should be a
\describe{
\item{data.frame}{if the original fit was specified with
a \code{formula}, provide a \code{data.frame} with one column for
each variable in the formula.
For example, if \code{glm_gp(se, design = ~ age + batch + treatment)},
then the data.frame needs a \code{age}, \code{batch}, and \code{treatment} column that
contain the same data types as the original fit.}
\item{vector}{if the original fit was specified using a vector, you
need to again provide a vector with the same format.}
\item{matrix}{if \code{newdata} is a matrix, it is applied directly
as \code{Mu <- exp(object$Beta \%*\% t(newdata) + object$offset_matrix)}.
So make sure, that it is constructed correctly.}
\item{NULL}{if \code{newdata} is \code{NULL}, the predicted values for the original
input data are returned.}
}}

\item{type}{either 'link' or 'response'. The default is 'link', which returns the predicted values
before the link function (\code{exp()}) is applied. Thus, the values can be
positive and negative numbers. However, often the predicted values are easier to interpret
\strong{after} the link function is applied (i.e., type = "response"), because then the
values are on the same scale as the original counts.}

\item{se.fit}{boolean that indicates if in addition to the mean the standard error of the
mean is returned.}

\item{offset}{count models (in particular for sequencing experiments) usually have a sample
specific size factor (\verb{offset = log(size factor)}). It defines how big we expect the predicted
results are. If \code{newdata} is \code{NULL}, the \code{offset} is ignored, because the \code{predict()} returns
a result based on the pre-calculated \code{object$Mu}. If \code{newdata} is not \code{NULL}, by default the
\code{offset} is \code{mean(object$Offset)}, which puts the in the same size as the average sample.}

\item{on_disk}{a boolean that indicates if the results are \code{HDF5Matrix}'s from the \code{HDF5Array}
package. If \code{newdata} is \code{NULL}, \code{on_disk} is ignored. Otherwise, if \code{on_disk = NULL},
the result is calculated on disk depending if \code{offset} is stored on disk.}

\item{verbose}{a boolean that indicates if information about the individual steps are
printed while predicting. Default: \code{FALSE}.}

\item{...}{currently ignored.}
}
\value{
If \code{se.fit == FALSE}, a matrix with dimensions \verb{nrow(object$data) x nrow(newdata)}. \cr
If \code{se.fit == TRUE}, a list with three entries
\describe{
\item{fit}{the predicted values as a matrix with dimensions \verb{nrow(object$data) x nrow(newdata)}.
This is what would be returned if \code{se.fit == FALSE}.}
\item{se.fit}{the associated standard errors for each \code{fit}. Also a matrix with
dimensions \verb{nrow(object$data) x nrow(newdata)}.}
\item{residual.scale}{Currently fixed to 1. In the future, this might become the values from
\code{object$overdispersion_shrinkage_list$ql_disp_shrunken}.}
}
}
\description{
Predict \code{mu} (i.e., \code{type = "response"}) or \code{log(mu)} (i.e., \code{type = "link"})
from a 'glmGamPoi' fit (created by \code{glm_gp(...)}) with the corresponding
estimate of the standard error. If \code{newdata} is \code{NULL}, \code{mu} is returned
for the original input data.
}
\details{
For \code{se.fit = TRUE}, the function sticks very close to the behavior of \code{stats::predict.glm()} for
fits from \code{MASS::glm.nb()}.

\strong{Note}: If \code{type = "link"}, the results are computed using the natural logarithm as the
link function. This differs from the \code{lfc} estimate provided by \code{\link{test_de}}, which are on the
log2 scale.
}
\examples{

 set.seed(1)
 # The simplest example
 y <- rnbinom(n = 10, mu = 3, size = 1/2.4)
 fit <- glm_gp(y, size_factors = FALSE)
 predict(fit, type = "response")
 predict(fit, type = "link", se.fit = TRUE)


 # Fitting a whole matrix
 model_matrix <- cbind(1, rnorm(5))
 true_Beta <- cbind(rnorm(n = 30), rnorm(n = 30, mean = 3))
 sf <- exp(rnorm(n = 5, mean = 0.7))
 model_matrix
 Y <- matrix(rnbinom(n = 30 * 5, mu = sf * exp(true_Beta \%*\% t(model_matrix)), size = 1/2.4),
             nrow = 30, ncol = 5)
 fit <- glm_gp(Y, design = model_matrix, size_factors = sf, verbose = TRUE)

 head(predict(fit, type = "response"))
 pred <- predict(fit, type = "link", se.fit = TRUE, verbose = TRUE)
 head(pred$fit)
 head(pred$se.fit)


 # Fitting a model with covariates
 data <- data.frame(fav_food = sample(c("apple", "banana", "cherry"), size = 50, replace = TRUE),
                    city = sample(c("heidelberg", "paris", "new york"), size = 50, replace = TRUE),
                    age = rnorm(n = 50, mean = 40, sd = 15))
 Y <- matrix(rnbinom(n = 4 * 50, mu = 3, size = 1/3.1), nrow = 4, ncol = 50)
 fit <- glm_gp(Y, design = ~ fav_food + city + age, col_data = data)
 predict(fit)[, 1:3]

 nd <- data.frame(fav_food = "banana", city = "paris", age = 29)
 predict(fit, newdata = nd)

 nd <- data.frame(fav_food = "banana", city = "paris", age = 29:40)
 predict(fit, newdata = nd, se.fit = TRUE, type = "response")


}
\seealso{
\code{\link[stats:predict.lm]{stats::predict.lm()}} and \code{\link[stats:predict.glm]{stats::predict.glm()}}
}
