% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rfcca.R
\name{rfcca}
\alias{rfcca}
\title{Random Forest with Canonical Correlation Analysis}
\usage{
rfcca(
  X,
  Y,
  Z,
  ntree = 200,
  mtry = NULL,
  nodesize = NULL,
  nodedepth = NULL,
  nsplit = 10,
  importance = FALSE,
  finalcca = c("cca", "scca", "rcca"),
  bootstrap = TRUE,
  samptype = c("swor", "swr"),
  sampsize = if (samptype == "swor") function(x) {     x * 0.632 } else function(x) {  
      x },
  forest = TRUE,
  membership = FALSE,
  bop = TRUE,
  Xcenter = TRUE,
  Ycenter = TRUE,
  ...
)
}
\arguments{
\item{X}{The first multivariate data set which has \eqn{n} observations and
\eqn{px} variables. A data.frame of numeric values.}

\item{Y}{The second multivariate data set which has \eqn{n} observations and
\eqn{py} variables. A data.frame of numeric values.}

\item{Z}{The set of subject-related covariates which has \eqn{n} observations
and \eqn{pz} variables. Used in random forest growing. A data.frame with
numeric values and factors.}

\item{ntree}{Number of trees.}

\item{mtry}{Number of z-variables randomly selected as candidates for
splitting a node. The default is \eqn{pz/3} where \eqn{pz} is the number of
z variables. Values are always rounded up.}

\item{nodesize}{Forest average number of unique data points in a terminal
node. The default is the \eqn{3 * (px+py)} where \eqn{px} and \eqn{py} are
the number of x and y variables, respectively.}

\item{nodedepth}{Maximum depth to which a tree should be grown. In the
default, this parameter is ignored.}

\item{nsplit}{Non-negative integer value for the number of random splits to
consider for each candidate splitting variable. When zero or \code{NULL},
all possible splits considered.}

\item{importance}{Should variable importance of z-variables be assessed? The
default is \code{FALSE}.}

\item{finalcca}{Which CCA should be used for final canonical correlation
estimation? Choices are \code{cca}, \code{scca} and \code{rcca}, see below
for details. The default is \code{cca}.}

\item{bootstrap}{Should the data be bootstrapped? The default value is
\code{TRUE} which bootstraps the data by sampling without replacement.
If \code{FALSE} is chosen, the data is not bootstrapped. It is not possible
to return OOB predictions and variable importance measures if \code{FALSE}
is chosen.}

\item{samptype}{Type of bootstrap. Choices are \code{swor} (sampling without
replacement/sub-sampling) and \code{swr} (sampling with replacement/
bootstrapping). The default action here (as in \code{randomForestSRC}) is
sampling without replacement.}

\item{sampsize}{Size of sample to draw. For sampling without replacement, by
default it is .632 times the sample size. For sampling with replacement, it
is the sample size.}

\item{forest}{Should the forest object be returned? It is used for prediction
on new data. The default is \code{TRUE}.}

\item{membership}{Should terminal node membership and inbag information be
returned?}

\item{bop}{Should the Bag of Observations for Prediction (BOP) for training
observations be returned? The default is \code{TRUE}.}

\item{Xcenter}{Should the columns of X be centered? The default is
\code{TRUE}.}

\item{Ycenter}{Should the columns of Y be centered? The default is
\code{TRUE}.}

\item{...}{Optional arguments to be passed to other methods.}
}
\value{
An object of class \code{(rfcca,grow)} which is a list with the
following components:

\item{call}{The original call to \code{rfcca}.}
\item{n}{Sample size of the data (\code{NA}'s are omitted).}
\item{ntree}{Number of trees grown.}
\item{mtry}{Number of variables randomly selected for splitting at each
node.}
\item{nodesize}{Minimum forest average number of unique data points in a
terminal node.}
\item{nodedepth}{Maximum depth to which a tree is allowed to be grown.}
\item{nsplit}{Number of randomly selected split points.}
\item{xvar}{Data frame of x-variables.}
\item{xvar.names}{A character vector of the x-variable names.}
\item{yvar}{Data frame of y-variables.}
\item{yvar.names}{A character vector of the y-variable names.}
\item{zvar}{Data frame of z-variables.}
\item{zvar.names}{A character vector of the z-variable names.}
\item{leaf.count}{Number of terminal nodes for each tree in the forest.
Vector of length \code{ntree}.}
\item{bootstrap}{Was the data bootstrapped?}
\item{forest}{If \code{forest=TRUE}, the \code{rfcca} forest object is
returned. This object is used for prediction with new data.}
\item{membership}{A matrix recording terminal node membership where each
cell represents the node number that an observations falls in for that
tree.}
\item{importance}{Variable importance measures (VIMP) for each z-variable.}
\item{inbag}{A matrix recording inbag membership where each cell represents
whether the observation is in the bootstrap sample in the corresponding
tree.}
\item{predicted.oob}{OOB predicted canonical correlations for training
observations based on the selected final canonical correlation estimation
method.}
\item{predicted.coef}{Predicted canonical weight vectors for x- and y-
variables.}
\item{bop}{If \code{bop=TRUE}, a list containing BOP for each training
observation is returned.}
\item{finalcca}{The selected CCA used for final canonical correlation
estimations.}
\item{rfsrc.grow}{An object of class \code{(rfsrc,grow)} is returned. This
object is used for prediction with training or new data.}
}
\description{
Estimates the canonical correlations between two sets of variables depending
on the subject-related covariates.
}
\section{Details}{
 \describe{

\item{\emph{Final canonical correlation estimation:}}{Final canonical
correlation can be computed with CCA (Hotelling, 1936), Sparse CCA (Witten
et al., 2009) or Regularized CCA (Vinod,1976; Leurgans et al., 1993). If
Regularized CCA will be used, \eqn{\lambda_1} and \eqn{\lambda_2} should be
specified.}

}
}

\examples{
\donttest{
## load generated example data
data(data, package = "RFCCA")
set.seed(2345)

## define train/test split
smp <- sample(1:nrow(data$X), size = round(nrow(data$X) * 0.7),
  replace = FALSE)
train.data <- lapply(data, function(x) {x[smp, ]})
test.Z <- data$Z[-smp, ]

## train rfcca
rfcca.obj <- rfcca(X = train.data$X, Y = train.data$Y, Z = train.data$Z,
  ntree = 100, importance = TRUE)

## print the grow object
print(rfcca.obj)

## get the OOB predictions
pred.oob <- rfcca.obj$predicted.oob

## predict with new test data
pred.obj <- predict(rfcca.obj, newdata = test.Z)
pred <- pred.obj$predicted

## get the variable importance measures
z.vimp <- rfcca.obj$importance

## train rfcca and estimate the final canonical correlations with "scca"
rfcca.obj2 <- rfcca(X = train.data$X, Y = train.data$Y, Z = train.data$Z,
  ntree = 100, finalcca = "scca")
}

}
\references{
Hotelling, H. (1936). Relations between two sets of variates.
Biometrika, 28(3/4), 321–377.

Leurgans, S. E., Moyeed, R. A., & Silverman, B. W. (1993).
Canonical correlation analysis when the data are curves. Journal of the
Royal Statistical Society: Series B (Methodological), 55(3), 725-740.

Vinod, H.D. (1976). Canonical ridge and econometrics of joint
production. Journal of econometrics, 4(2), 147–166.

Witten, D. M., Tibshirani, R., & Hastie, T. (2009). A penalized
matrix decomposition, with applications to sparse principal components and
canonical correlation analysis. Biostatistics, 10(3), 515-534.
}
\seealso{
\code{\link{predict.rfcca}}
\code{\link{global.significance}}
\code{\link{vimp.rfcca}}
\code{\link{print.rfcca}}
}
