Polygon methods


param.df <- data.frame(
  mean=c(0, 0, 2),
  sd=c(1, 2, 1))
density.df.list <- list()
for(param.i in 1:nrow(param.df)){
  one.param <- param.df[param.i,]
  observation <- seq(-4, 4, by=0.1)
  density.df.list[[param.i]] <- data.frame(
    param.i,
    param.fac=factor(param.i),
    one.param,
    observation,
    density=dnorm(observation, one.param$mean, one.param$sd),
    row.names=NULL)
}
density.df <- do.call(rbind, density.df.list)

library(ggplot2)
#> Warning: replacing previous import 'vctrs::data_frame' by 'tibble::data_frame'
#> when loading 'dplyr'
gg <- ggplot()+
  geom_line(aes(
    observation, density, color=param.fac),
    data=density.df)
directlabels::direct.label(gg, "top.polygons")

plot of chunk unnamed-chunk-1


density.df$mean.lab <- paste0("mean=", density.df$mean)
gg <- ggplot()+
  geom_line(aes(
    observation, density, color=param.fac),
    data=density.df)+
  directlabels::geom_dl(aes(
    observation, density,
    color=param.fac,
    label.group=param.fac,
    label=mean.lab),
    method="top.polygons",
    data=density.df)
gg

plot of chunk unnamed-chunk-1


gg <- ggplot()+
  geom_line(aes(
    observation, density, color=mean.lab, group=param.fac),
    data=density.df)
directlabels::direct.label(gg, "top.polygons")

plot of chunk unnamed-chunk-1


data(BodyWeight, package="nlme")
gg <- ggplot()+
  geom_line(aes(
    Time, weight, color=Rat),
    data=BodyWeight)+
  facet_grid(. ~ Diet)
gg

plot of chunk unnamed-chunk-1


directlabels::direct.label(gg, "right.polygons")

plot of chunk unnamed-chunk-1


gg.wider <- gg+xlim(-10, 70)
directlabels::direct.label(gg.wider, "right.polygons")

plot of chunk unnamed-chunk-1


directlabels::direct.label(gg.wider, "left.polygons")

plot of chunk unnamed-chunk-1

SO post about stats

https://github.com/tdhock/directlabels/issues/24


library("ggplot2")
library(directlabels)
set.seed(124234345)
# Generate data
df.2 <- data.frame("n_gram" = c("word1"),
                   "year" = rep(100:199),
                   "match_count" = runif(100 ,min = 1000 , max = 2000))
df.2 <- rbind(df.2, data.frame("n_gram" = c("word2"),
                               "year" = rep(100:199),
                               "match_count" = runif(100 ,min = 1000 , max = 2000)) )
# use stat smooth with geom_dl to get matching direct labels.
span <- 0.3
ggplot(df.2, aes(year, match_count, group=n_gram, color=n_gram)) +
  geom_line(alpha = I(7/10), color="grey") +
  stat_smooth(size=2, span=span, se=F) +
  geom_dl(aes(
    label=n_gram),
    ## method should be passed to geom_dl but ggplot2 (mistakenly?)
    ## passes it to stat_smooth, which rightly raises a warning about
    ## an unknown smoothing function.
    method = "last.qp", 
    stat="smooth", span=span) +
  xlim(c(100,220))+
  guides(colour="none")
#> `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#> Warning: Computation failed in `stat_smooth()`:
#> object 'last.qp' of mode 'function' was not found
#> Warning in grid.Call.graphics(C_lines, x$x, x$y, index, x$arrow): semi-
#> transparency is not supported on this device: reported only once per page

plot of chunk unnamed-chunk-2

serialize issue

https://github.com/tdhock/directlabels/issues/6


library("dplyr")
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library("ggplot2")
library("directlabels")
library("ggthemes")

## create data
aaa <- structure(
  list(x = c(28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 
             18, 17, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17),
       count = c(2344L, 
                 4088L, 3247L, 2808L, 2046L, 1669L, 1315L, 951L, 610L, 543L, 469L, 
                 370L, 937L, 1116L, 550L, 379L, 282L, 204L, 174L, 160L, 136L, 
                 132L, 128L, 122L),
       term = c("aaa", "aaa", "aaa", "aaa", "aaa", 
                "aaa", "aaa", "aaa", "aaa", "aaa", "aaa", "aaa", "bbb", "bbb", 
                "bbb", "bbb", "bbb", "bbb", "bbb", "bbb", "bbb", "bbb", "bbb", 
                "bbb")),
  class = c("tbl_df", "tbl", "data.frame"),
  row.names = c(NA, 
                -24L),
  .Names = c("x", "count", "term"))
## have a look
aaa
#> # A tibble: 24 x 3
#>        x count term 
#>    <dbl> <int> <chr>
#>  1    28  2344 aaa  
#>  2    27  4088 aaa  
#>  3    26  3247 aaa  
#>  4    25  2808 aaa  
#>  5    24  2046 aaa  
#>  6    23  1669 aaa  
#>  7    22  1315 aaa  
#>  8    21   951 aaa  
#>  9    20   610 aaa  
#> 10    19   543 aaa  
#> # … with 14 more rows

## initial plot
p2 <- aaa %>% ggplot(aes(x = x, y = count, group = term, colour = term)) + geom_line()
## have a look
p2

plot of chunk unnamed-chunk-3


## works
direct.label(p2)

plot of chunk unnamed-chunk-3




## plot with theme
p2 <- aaa %>% ggplot(aes(x = x, y = count, group = term, colour = term)) + geom_line() + theme_fivethirtyeight()
## have a look
p2

plot of chunk unnamed-chunk-3


## used to fail but should be OK as of 19 June 2020.
direct.label(p2)

plot of chunk unnamed-chunk-3

changepoint cost minima

This is a test for polygon.method with only one unaligned point per group as input, in particular the new bottom.polygons method.


data(LOPART100, package="directlabels")
abbrev.vec <- c(
  data="data and models",
  cost="cost of last change")
yfac <- function(l){
  factor(abbrev.vec[[l]], abbrev.vec)
}
COST <- function(dt){
  data.frame(y.var=yfac("cost"), dt)
}
DATA <- function(dt){
  data.frame(y.var=yfac("data"), dt)
}
sig.color <- "grey50"
tau <- 99
up.to.t <- 100
change.dt <- data.frame(tau, change=tau+0.5)
t.dt <- data.frame(up.to.t)
my.hjust <- function(x)ifelse(x < nrow(LOPART100$signal)/2, 0, 1)
min.dt <- do.call(rbind, by(
  LOPART100$cost,
  LOPART100$cost$Algorithm,
  function(df)df[which.min(df$cost_candidates),]))
cost.range <- range(LOPART100$cost$cost_candidates)
cost.h <- cost.range[2]-cost.range[1]
blank.dt <- data.frame(
  position=1, cost=cost.range[1]-cost.h/4)
label.colors <- c(
  "1"="#ff7d7d",
  "0"="#f6c48f")
library(ggplot2)
gg <- ggplot()+
  geom_blank(aes(
    position, cost),
    data=COST(blank.dt))+
  geom_vline(aes(
    xintercept=up.to.t),
    color=sig.color,
    data=t.dt)+
  geom_text(aes(
    up.to.t, 13,
    hjust=my.hjust(up.to.t),
    label=sprintf(
      "$t=%s$", up.to.t)),
    color=sig.color,
    data=DATA(t.dt))+
  geom_rect(aes(
    xmin=start, xmax=end,
    fill=paste(changes),
    ymin=-Inf, ymax=Inf),
    alpha=0.5,
    data=LOPART100$labels)+
  scale_fill_manual("label", values=label.colors)+
  theme_bw()+
  theme(panel.spacing=grid::unit(0, "lines"))+
  facet_grid(y.var ~ ., scales="free")+
  geom_text(aes(
    change, 1,
    hjust=my.hjust(change),
    label=sprintf(
      "$\\tau = %d$", tau)),
    vjust=0,
    data=DATA(change.dt))+
  geom_vline(aes(
    xintercept=change),
    data=change.dt)+
  geom_segment(aes(
    start-0.5, mean,
    size=Algorithm,
    color=Algorithm,
    xend=end+0.5, yend=mean),
    data=DATA(LOPART100$segments))+
  geom_point(aes(
    position, signal),
    color=sig.color,
    shape=1,
    data=DATA(LOPART100$signal))+
  scale_size_manual(values=c(
    OPART=1.5,
    LOPART=0.5),
    drop=FALSE)+
  scale_shape_manual(values=c(
    OPART=1,
    LOPART=2),
    drop=FALSE)+
  scale_color_manual(values=c(
    OPART="deepskyblue",
    LOPART="black"),
    drop=FALSE)+
  ylab("")+
  scale_x_continuous(
    "position $t,\\tau$",
    breaks=seq(0, 100, by=10))+
  geom_point(aes(
    change, cost_candidates,
    color=Algorithm, shape=Algorithm),
    data=COST(LOPART100$cost))+
  geom_point(aes(
    change, cost_candidates,
    color=Algorithm),
    data=COST(min.dt))
print(gg)
#> Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
#> resolveHJust(x$just, : semi-transparency is not supported on this device:
#> reported only once per page

plot of chunk unnamed-chunk-4


label.cost <- function(df){  
  gg+
    directlabels::geom_dl(aes(
      change, cost_candidates,
      color=Algorithm,
      label.group=Algorithm,
      label=sprintf("$\\tau^*_{%d} = %d$", up.to.t, tau)),
      method="bottom.polygons",
      data=COST(df))
}
label.cost(LOPART100$cost)
#> Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
#> resolveHJust(x$just, : semi-transparency is not supported on this device:
#> reported only once per page

plot of chunk unnamed-chunk-4


## to make sure it works when there is only one point to label.
label.cost(min.dt)
#> Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
#> resolveHJust(x$just, : semi-transparency is not supported on this device:
#> reported only once per page

plot of chunk unnamed-chunk-4

LOPART ROC curve

This is a test for polygon.method with only one unaligned point per group as input, in particular with right.polygons.


data(LOPART.ROC, package="directlabels")
algo.colors <- c(
  OPART="#0077CC",
  LOPART="black",
  SegAnnot="#22CC22")
library(ggplot2)
ggplot()+
  theme_bw()+
  scale_color_manual(values=algo.colors)+
  scale_size_manual(values=c(
    LOPART=1.5,
    OPART=1))+
  directlabels::geom_dl(aes(
    FPR, TPR,
    color=model.name,
    label=paste0(model.name, ifelse(is.na(auc), "", sprintf(
      " AUC=%.3f", auc
    )))),
    method=list(
      cex=0.8,
      directlabels::polygon.method(
        "right",
        offset.cm=0.5,
        padding.cm=0.05)),
    data=LOPART.ROC$points)+
  geom_path(aes(
    FPR, TPR,
    color=model.name,
    size=model.name,
    group=paste(model.name, test.fold)),
    data=LOPART.ROC$roc)+
  geom_point(aes(
    FPR, TPR,
    color=model.name),
    size=3,
    shape=21,
    fill="white",
    data=LOPART.ROC$points)+
  theme(
    panel.spacing=grid::unit(0, "lines"),
    legend.position="none"
  )+
  facet_grid(test.fold ~ Penalty + Parameters, labeller=label_both)+
  coord_equal()+
  scale_x_continuous(
    "False Positive Rate (test set labels)",
    breaks=c(0, 0.5, 1),
    labels=c("0", "0.5", "1"))+
  scale_y_continuous(
    "True Positive Rate (test set labels)",
    breaks=c(0, 0.5, 1),
    labels=c("0", "0.5", "1"))

plot of chunk unnamed-chunk-5

white or black text on colored background

The weighted method for rgb to grayscale conversion is used for the default text.color in polygon.method, and explained here https://www.tutorialspoint.com/dip/grayscale_to_rgb_conversion.htm

m <- RColorBrewer::brewer.pal.info
brewer.dt.list <- list()
for(brewer.row in 1:nrow(m)){
  brewer.name <- rownames(m)[[brewer.row]]
  brewer.info <- m[brewer.name, ]
  col.vec <- RColorBrewer::brewer.pal(brewer.info[, "maxcolors"], brewer.name)
  rgb.mat <- col2rgb(col.vec)
  hsv.mat <- rgb2hsv(rgb.mat)
  brewer.dt.list[[brewer.name]] <- data.frame(
    brewer.name,
    brewer.fac=factor(brewer.name, rownames(m)),
    brewer.row,
    category=factor(brewer.info[, "category"], c("seq", "qual", "div")),
    column=seq_along(col.vec),
    color=col.vec,
    t(rgb.mat),
    t(hsv.mat))
}
brewer.dt <- do.call(rbind, brewer.dt.list)
library(ggplot2)
ggplot()+
  theme_bw()+
  theme(panel.spacing=grid::unit(0, "lines"))+
  facet_grid(category ~ ., scales="free", space="free")+
  geom_tile(aes(
    factor(column), brewer.fac, fill=color),
    data=brewer.dt)+
  geom_text(aes(
    factor(column), brewer.fac, label=brewer.fac, color=ifelse(
      ((0.3 * red) + (0.59 * green) + (0.11 * blue))/255 < 0.5, "white", "black")),
    data=brewer.dt)+
  scale_fill_identity()+
  scale_color_identity()

plot of chunk unnamed-chunk-6

odd qp labels for timings figure

In the image below the strange thing in the labels is that the end of the pointer of nc::capture_melt_single is inside of the pointer for cdata::unpivot_to_blocks – this is ok, but we could probably avoid this by switching the order. we should be able to detect/avoid this using a linear inequality constraint: bottom of label box must be greater than next target down, etc. But if targets are too close together this could lead to no feasible solution.


data(odd_timings, package="directlabels")
odd4 <- subset(odd_timings, captures==4)
library(ggplot2)
gg <- ggplot()+
  geom_line(aes(
    N.col, median.seconds, color=fun),
    data=odd4)+
  scale_x_log10(limits=c(10, 1e6))+
  scale_y_log10()
directlabels::direct.label(gg, "right.polygons")

plot of chunk unnamed-chunk-7

TODO edit polygon.method so that the right panel labels do not cross – can this be added as a constraint in the qp, or do we just need to re-order?

two dlgrobs

This example has two geom_dl with the same method, but the grobs need different names to render correctly https://github.com/tdhock/directlabels/issues/30

data(odd_timings, package="directlabels")
zero <- subset(odd_timings, captures==0)
on.right <- with(zero, N.col==max(N.col))
funs.right <- unique(zero[on.right, "fun"])
is.right <- zero$fun %in% funs.right
timings.right <- zero[is.right,]
timings.left <- zero[!is.right,]
library(ggplot2)
gg <- ggplot()+
  geom_line(aes(
    N.col, median.seconds, color=fun),
    data=zero)+
  directlabels::geom_dl(aes(
    N.col, median.seconds, color=fun, label=fun),
    method="right.polygons",
    data=timings.left)+
  directlabels::geom_dl(aes(
    N.col, median.seconds, color=fun, label=fun),
    method="right.polygons",
    data=timings.right)+
  scale_x_log10(limits=c(10, 1e6))+
  scale_y_log10()
gg

plot of chunk unnamed-chunk-8