% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/by_split.R
\name{by_split}
\alias{by_split}
\title{Calculate split scores per participant}
\usage{
by_split(
  data,
  participants,
  fn_score,
  stratification = NULL,
  replications = 1,
  method = c("random", "odd_even", "first_second"),
  replace = FALSE,
  split_p = 0.5,
  subsample_p = 1,
  subsample_n = NULL,
  careful = TRUE,
  match_participants = FALSE,
  ncores = detectCores(),
  seed = NULL,
  verbose = TRUE
)
}
\arguments{
\item{data}{(data frame) data frame containing data to score. Data should be
in long format, with one row per combination of participant and trial or
item.}

\item{participants}{(vector) Vector that identifies participants in
\code{data}.}

\item{fn_score}{(function) receives full or split sets, should return a
single number.}

\item{stratification}{(vector). Vector that identifies which subsets of
\code{data} should be split separately (denoted as strata in splitting
functions) in order to ensure they are evenly distributed between parts.
By default, the dataset of a participant formes a single stratum.}

\item{replications}{(numeric) Number of replications that split scores are
calculated.}

\item{method}{(character) Splitting method. Note that \code{first_second} and
\code{odd_even} splitting method will only deliver a valid split with
default settings for other arguments (\code{split_p = 0.5, replace = FALSE,
subsample_p = 1})}

\item{replace}{(logical) If TRUE, stratum is sampled with replacement.}

\item{split_p}{(numeric) Desired length of both parts, expressed as a
proportion of the length of the data per participant. If \code{split_p}
is larger than 1 and \code{careful} is FALSE, then parts are automatically
sampled with replacement}

\item{subsample_p}{(numeric) Subsample a proportion of \code{stratum} before
splitting.}

\item{subsample_n}{(numeric) Subsample a number of participants before
splitting.}

\item{careful}{(boolean) If TRUE, stop with an error when called with
arguments that may yield unexpected splits}

\item{match_participants}{(logical) Default FALSE. If FALSE, the split-halves
are newly randomized for each iteration and participant. If TRUE, the
split-halves are newly randomized for each replication, but within a
replication the same randomization is applied across participants. If the
order of rows of datasets per participant denotes similar observations
(such as items in a questionnaire), \code{match_participants} can be set to
TRUE to ensure that per iteration, the same items are assigned to each part
of the split-halves across participants. If \code{method} is "odd_even" or
"first_second", splits are based on row number, so
\code{match_participants} generally has little effects. If TRUE, each 
stratum
should have the same number of rows, as checked via 
\code{\link{check_strata}}.}

\item{ncores}{(integer). By default, all available CPU cores are used. If 1,
split replications are executed serially (via \code{\link{lapply}}). If
greater than 1, split replications are executed in parallel, via (via
\code{\link{parLapply}}).}

\item{seed}{(integer). When split replications are exectured in parallel,
\code{seed} can be used to specificy a random seet to generate random seeds
from for each worker via \code{\link{clusterSetRNGStream}}.}

\item{verbose}{(logical) If TRUE, reports progress. Note that progress across
split replications is not displayed when these are executed in parallel.}
}
\value{
(data frame) Returns a data frame with a column for
  \code{participant}, a column \code{replication} that counts split
  replications, and \code{score_1} and \code{score_2} for the score
  calculated of each part via \code{fn_score}.
}
\description{
Calculates split scores, by applying \code{fn_score} to subsets of
\code{data} as specified via \code{participants}. It provides a range of
additional arguments for different splitting methods and to support parallel
processing. To learn more about writing scoring algorithms for use with the
\code{\link{splithalfr}}, see the included vignettes. \code{\link{by_split}}
is modeled after the \code{\link{by}} function, accepting similar values for
the first three arguments (\code{data}, \code{INDICES}, \code{FUN}). For more
information about different metods for splitting data, see
\code{\link{get_split_indexes_from_stratum}}. For more information about 
stratification, see \code{\link{split_df}}
}
\examples{
# N.B. This example uses R script from the vignette: "rapi_sum"
data("ds_rapi", package = "splithalfr")
# Convert to long format
ds_long <- reshape(
  ds_rapi,
  varying = paste("V", 1 : 23, sep = ""),
  v.names = "answer",
  direction = "long",
  idvar = "twnr",
  timevar = "item"
)
# Function for RAPI sum score
rapi_fn_score <- function (data) {
  return (sum(data$answer))
}
# Calculate scores on full data
by(
  ds_long,
  ds_long$twnr,
  rapi_fn_score
)
# Permutation split, one iteration, items matched across participants
split_scores <- by_split(
  ds_long,
  ds_long$twnr,
  rapi_fn_score,
  ncores = 1,
  match_participants = TRUE
)
# Mean flanagan-rulon coefficient across splits
fr <- mean(split_coefs(split_scores, flanagan_rulon))
}
