\name{gIndex}
\alias{gIndex}
\alias{print.gIndex}
\alias{plot.gIndex}
\alias{GiniMd}
\title{
Calculate Total and Partial g-indexes for an rms Fit
}
\description{
  \code{gIndex} computes the total \eqn{g}-index for a model based on
  the vector of linear predictors, and the partial \eqn{g}-index for
  each predictor in a model.  The latter is computed by summing all the
  terms involving each variable, weighted by their regression
  coefficients, then computing Gini's mean difference on this sum.  For
  example, a regression model having age and sex and age*sex on the
  right hand side, with corresponding regression coefficients \eqn{b_{1},
	b_{2}, b_{3}}{b1, b2, b3} will have the \eqn{g}-index for age
  computed from Gini's mean 
  difference on the product of age \eqn{\times (b_{1} + b_{3}w)}{times
	(b1 + b3*w)} where
  \eqn{w} is an indicator set to one for observations with sex not equal
  to the reference value.  When there are nonlinear terms associated
  with a predictor, these terms will also be combined.

  A \code{print}
  method is defined, and there is a \code{plot} method for displaying
  \eqn{g}-indexes using a dot chart.

  A basic function \code{GiniMD} computes Gini's mean difference on a
  numeric vector.  This index is defined as the mean absolute difference
  between any two distinct elements of a vector.  For a Bernoulli
  (binary) variable with proportion of ones equal to \eqn{p} and sample
  size \eqn{n}, Gini's mean difference is
  \eqn{2\frac{n}{n-1}p(1-p)}{2np(1-p)/(n-1)}.  For a 
  trinomial variable (e.g., predicted values for a 3-level categorical
  predictor using two dummy variables) having (predicted)
  values \eqn{A, B, C} with corresponding proportions \eqn{a, b, c},
  Gini's mean difference is
  \eqn{2\frac{n}{n-1}[ab|A-B|+ac|A-C|+bc|B-C|]}{2n[ab|A-B|+ac|A-C|+bc|B-C|]/(n-1).}
}
\usage{
gIndex(object, partials = TRUE,
lplabel = if (length(object$scale)) object$scale[1] else "X*Beta",
fun,
funlabel = if (missing(fun)) character(0) else deparse(substitute(fun)),
postfun = if (length(object$scale) == 2) exp else NULL,
postlabel = if (length(postfun))
 ifelse(missing(postfun), if (length(object$scale) > 1) object$scale[2]
  else "Anti-log", deparse(substitute(postfun))) else character(0), ...)

\method{print}{gIndex}(x, digits=4, abbrev=FALSE,
 vnames=c("names","labels"), \dots)

\method{plot}{gIndex}(x, what=c('pre', 'post'),
 xlab=NULL, pch=16, rm.totals=FALSE,
sort=c('descending', 'ascending', 'none'), \dots)

GiniMd(x, na.rm=FALSE)
}
\arguments{
  \item{object}{result of an \code{rms} fitting function}
  \item{partials}{set to \code{FALSE} to suppress computation of partial
	\eqn{g}s}
  \item{lplabel}{a replacement for default values such as
	\code{"X*Beta"} or \code{"log odds"}/}
  \item{fun}{an optional function to transform the linear predictors
	before computing the total (only) \eqn{g}.  When this is present, a
	new component \code{gtrans} is added to the attributes of the object
	resulting from \code{gIndex}.}
  \item{funlabel}{a character string label for \code{fun}, otherwise
	taken from the function name itself}
  \item{postfun}{a function to transform \eqn{g} such as \code{exp}
	(anti-log), which is the default for certain models such as the
	logistic and Cox models}
  \item{postlabel}{a label for \code{postfun}}
\item{\dots}{
  For \code{gIndex}, passed to \code{predict.rms}.
  Ignored for \code{print}.  Passed to \code{\link[Hmisc]{dotchart2}}
  for \code{plot}.
}
\item{x}{
  an object created by \code{gIndex} (for \code{print} or \code{plot})
  or a numeric vector (for \code{GiniMd})
}
\item{digits}{causes rounding to the \code{digits} decimal place}
\item{abbrev}{set to \code{TRUE} to abbreviate labels if
  \code{vname="labels"}}
\item{vnames}{set to \code{"labels"} to print predictor labels instead
  of names}
\item{what}{set to \code{"post"} to plot the transformed \eqn{g}-index
  if there is one (e.g., ratio scale)}
\item{xlab}{\eqn{x}-axis label; constructed by default}
\item{pch}{plotting character for point}
\item{rm.totals}{set to \code{TRUE} to remove the total \eqn{g}-index
  when plotting}
\item{sort}{specifies how to sort predictors by \eqn{g}-index; default
  is in descending order going down the dot chart}
\item{na.rm}{set to \code{TRUE} if you suspect there may be \code{NA}s
  in \code{x}; these will then be removed.  Otherwise an error will
  result.}
}
\details{
  For stratification factors in a Cox proportional hazards model, there is
  no contribution of variation towards computing a partial \eqn{g}
  except from terms that interact with the stratification variable.
}
\value{
  \code{gIndex} returns a matrix of class \code{"gIndex"} with auxiliary
  information stored as attributes, such as variable labels.
  \code{GiniMd} returns a scalar.
}
\references{
David HA (1968): Gini's mean difference rediscovered.  Biometrika 55:573--575.
}
\author{
Frank Harrell\cr
Department of Biostatistics\cr
Vanderbilt University\cr
\email{f.harrell@vanderbilt.edu}
}
\seealso{\code{\link{predict.rms}}}
\examples{
set.seed(1)
n <- 100
x <- 1:n
w <- factor(sample(c('a','b'), n, TRUE))
u <- factor(sample(c('A','B'), n, TRUE))
y <- .01*x + .2*(w=='b') + .3*(u=='B') + .2*(w=='b' & u=='B') + rnorm(n)/5
dd <- datadist(x,w,u); options(datadist='dd')
f <- ols(y ~ x*w*u, x=TRUE, y=TRUE)
f
anova(f)

zc <- predict(f, type='cterms')

# Test GiniMd against a brute-force solution
gmd <- function(x)
  {
    n <- length(x)
    sum(outer(x, x, function(a, b) abs(a - b)))/n/(n-1)
  }
gmd(zc[, 1])
GiniMd(zc[, 1])
GiniMd(zc[, 2])
GiniMd(zc[, 3])
GiniMd(f$linear.predictors)
g <- gIndex(f)
g
g['Total',]
gIndex(f, partials=FALSE)

z <- c(rep(0,17), rep(1,6))
n <- length(z)
GiniMd(z)
2*mean(z)*(1-mean(z))*n/(n-1)

a <- 12; b <- 13; c <- 7; n <- a + b + c
A <- -.123; B <- -.707; C <- 0.523
xx <- c(rep(A, a), rep(B, b), rep(C, c))
GiniMd(xx)
2*(a*b*abs(A-B) + a*c*abs(A-C) + b*c*abs(B-C))/n/(n-1)

y <- y > .8
f <- lrm(y ~ x * w * u, x=TRUE, y=TRUE)
gIndex(f, fun=plogis, funlabel='Prob[y=1]')
options(datadist=NULL)
}
\keyword{predictive accuracy}
\keyword{robust}
\keyword{univar}

