% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/summ_roc.R
\name{summ_roc}
\alias{summ_roc}
\alias{summ_rocauc}
\alias{roc_plot}
\alias{roc_lines}
\title{Summarize distributions with ROC curve}
\usage{
summ_roc(f, g, n_grid = 1001)

summ_rocauc(f, g, method = "expected")

roc_plot(roc, ..., add_bisector = TRUE)

roc_lines(roc, ...)
}
\arguments{
\item{f}{A pdqr-function of any \link[=meta_type]{type} and
\link[=meta_class]{class}. Represents "true" distribution of "negative" values.}

\item{g}{A pdqr-function of any type and class. Represents "true"
distribution of "positive" values.}

\item{n_grid}{Number of points of ROC curve to be computed.}

\item{method}{Method of computing ROC AUC. Should be one of "expected",
"pessimistic", "optimistic" (see Details).}

\item{roc}{A data frame representing ROC curve. Typically an output of
\code{summ_roc()}.}

\item{...}{Other arguments to be passed to \code{plot()} or \code{lines()}.}

\item{add_bisector}{If \code{TRUE} (default), \code{roc_plot()} adds bisector line as
reference for "random guess" classifier.}
}
\value{
\code{summ_roc()} returns a data frame with \code{n_grid} rows and columns
"threshold" (grid of classification thresholds, ordered decreasingly), "fpr",
and "tpr" (corresponding false and true positive rates, ordered
non-decreasingly by "fpr").

\code{summ_rocauc()} returns single number representing area under the ROC curve.

\code{roc_plot()} and \code{roc_lines()} create plotting side effects.
}
\description{
These functions help you perform a ROC ("Receiver Operating Characteristic")
analysis for one-dimensional linear classifier: values not more than some
threshold are classified as "negative", and more than threshold -
as "positive". Here input pair of pdqr-functions represent "true"
distributions of values with "negative" (\code{f}) and "positive" (\code{g}) labels.
}
\details{
ROC curve describes how well classifier performs under different
thresholds. For all possible thresholds two classification metrics are
computed which later form x and y coordinates of a curve:
\itemize{
\item \strong{False positive rate (FPR)}: proportion of "negative" distribution which
was (incorrectly) classified as "positive". This is the same as one minus
"specificity" (proportion of "negative" values classified as "negative").
\item \strong{True positive rate (TPR)}: proportion of "positive" distribution which
was (correctly) classified as "positive". This is also called "sensitivity".
}

\code{summ_roc()} creates a uniform grid of decreasing \code{n_grid} values (so that
output points of ROC curve are ordered from left to right) covering range of
all meaningful thresholds. This range is computed as slightly extended range
of \code{f} and \code{g} supports (extension is needed to achieve extreme values of
"fpr" in presence of "discrete" type). Then FPR and TPR are computed for
every threshold.

\code{summ_rocauc()} computes a common general (without any particular threshold
in mind) diagnostic value of classifier, \strong{area under ROC curve} ("ROC AUC"
or "AUROC"). Numerically it is equal to a probability of random variable with
distribution \emph{\code{g} being strictly greater than \code{f}} plus \emph{possible correction
for functions being equal}, with multiple ways to account for it. Method
"pessimistic" doesn't add correction, "expected" adds half of probability of
\code{f} and \code{g} being equal (which is default), "optimistic" adds full
probability. \strong{Note} that this means that correction might be done only if
both input pdqr-functions have "discrete" type. See \link[=methods-group-generic]{pdqr methods for "Ops" group generic family} for more details on comparing
functions.

\code{roc_plot()} and \code{roc_lines()} perform plotting (with
\link[graphics:plot.default]{plot()}) and adding (with \link[graphics:lines]{lines()})
ROC curves respectively.
}
\examples{
d_norm_1 <- as_d(dnorm)
d_norm_2 <- as_d(dnorm, mean = 1)
roc <- summ_roc(d_norm_1, d_norm_2)
head(roc)

# `summ_rocauc()` is equivalent to probability of `g > f`
summ_rocauc(d_norm_1, d_norm_2)
summ_prob_true(d_norm_2 > d_norm_1)

# Plotting
roc_plot(roc)
roc_lines(summ_roc(d_norm_2, d_norm_1), col = "blue")

# For "discrete" functions `summ_rocauc()` can produce different outputs
d_dis_1 <- new_d(1:2, "discrete")
d_dis_2 <- new_d(2:3, "discrete")
summ_rocauc(d_dis_1, d_dis_2)
summ_rocauc(d_dis_1, d_dis_2, method = "pessimistic")
summ_rocauc(d_dis_1, d_dis_2, method = "optimistic")
## These methods correspond to different ways of plotting ROC curves
roc <- summ_roc(d_dis_1, d_dis_2)
## Default line plot for "expected" method
roc_plot(roc, main = "Different type of plotting ROC curve")
## Method "pessimistic"
roc_lines(roc, type = "s", col = "blue")
## Method "optimistic"
roc_lines(roc, type = "S", col = "green")
}
\seealso{
\code{\link[=summ_separation]{summ_separation()}} for computing optimal separation threshold.

Other summary functions: 
\code{\link{summ_center}()},
\code{\link{summ_classmetric}()},
\code{\link{summ_distance}()},
\code{\link{summ_entropy}()},
\code{\link{summ_hdr}()},
\code{\link{summ_interval}()},
\code{\link{summ_moment}()},
\code{\link{summ_order}()},
\code{\link{summ_prob_true}()},
\code{\link{summ_pval}()},
\code{\link{summ_quantile}()},
\code{\link{summ_separation}()},
\code{\link{summ_spread}()}
}
\concept{summary functions}
