Comparing interaction effect plots involving continuous variables from ggplot2 vs. base R using the effects package

I want to plot interaction effects on my data set. I cannot share the full data set due to confidentiality reasons, but I have added the output of the effect() function, which can be substituted into the plot functions to reproduce my results

The interaction effect is between two continuous variables, varP and varL. The model is a binomial probit glm. I am using the effects() package.

Calling base R plot() yields the following chart:

plot(effect("varL:varP", hx.x), multiline = TRUE)

Calling ggplot yields an error, because ggplot2 cannot deal with continuous variables (Error: A continuous variable can not be mapped to linetype). So I decided to use cut() to transform the continuous into a categorical variable and redo the regression. Calling ggplot now yields the following plot:

ggplot(data.frame(effect("varL:varP_range", hx.x1))) + 
 geom_line(aes(varL,fit,linetype=varP_range)) + 
 theme_bw() + 
 geom_point(aes(varL,fit, shape = varP_range), size = 4)

The effect() call results in the following data frame:

structure(list(varL = c(0, 4900000, 9800000, 1.5e+07, 2e+07, 
0, 4900000, 9800000, 1.5e+07, 2e+07, 0, 4900000, 9800000, 1.5e+07, 
2e+07, 0, 4900000, 9800000, 1.5e+07, 2e+07, 0, 4900000, 9800000, 
1.5e+07, 2e+07, 0, 4900000, 9800000, 1.5e+07, 2e+07, 0, 4900000, 
9800000, 1.5e+07, 2e+07, 0, 4900000, 9800000, 1.5e+07, 2e+07), 
    varP_range = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
    2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 
    5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 
    8L, 8L), .Label = c("(0,0.1]", "(0.1,0.2]", "(0.2,0.3]", 
    "(0.3,0.4]", "(0.4,0.5]", "(0.5,0.6]", "(0.6,0.7]", "(0.7,0.8]", 
    NA), class = "factor"), fit = c(0.0493753018091432, 0.0674980435061065, 
    0.0903776022141578, 0.120475525568566, 0.155489425305936, 
    0.0137978311572348, 0.0146948327184384, 0.0156415523658014, 
    0.0167030042368772, 0.0177811545669692, 0.0283241226002015, 
    0.0320688611660079, 0.0362132772188478, 0.0410808356583781, 
    0.0462488565292175, 0.0376893470992477, 0.0434689290038736, 
    0.0499436899670642, 0.0576329205782917, 0.0658761148111993, 
    0.0461027343009466, 0.0491516310196363, 0.0523594626652533, 
    0.0559432088894986, 0.0595689358953948, 0.0271243286792884, 
    0.0288090021916312, 0.0305797680715237, 0.0325565879397662, 
    0.0345556259933517, 0.0585874566843392, 0.0695486360371181, 
    0.0820248510196267, 0.0970343941431089, 0.113280038210583, 
    0.0209075509863267, 0.0315251673839061, 0.046253681947139, 
    0.0674584535698224, 0.0942769896468577), se = c(0.026293040668674, 
    0.0305781859107515, 0.0540612604209606, 0.0833019722980397, 
    0.112487542810938, 0.0338534463222995, 0.0398771851800222, 
    0.0731653210576088, 0.113913900015802, 0.15437353282528, 
    0.0409108254231718, 0.0511475671899819, 0.0972355260310069, 
    0.152187250282481, 0.206403037601197, 0.0130040830190118, 
    0.0163131582968098, 0.03036212612247, 0.0472104967827754, 
    0.0638665398985353, 0.00832820196917705, 0.0121268883535081, 
    0.0232740805148284, 0.0361535949146859, 0.0487814483071773, 
    0.0113362224665359, 0.0147098330114556, 0.027542020584634, 
    0.0427882020481342, 0.057830481511906, 0.01601220706873, 
    0.0215340145342622, 0.040911260287911, 0.0636748713064177, 
    0.0860732173940343, 0.0419361989774607, 0.0474273262485864, 
    0.085507397635156, 0.132979171779259, 0.180309829530109), 
    lower = c(0.0443332267166309, 0.0600183728153565, 0.0743144400165266, 
    0.0907939516369112, 0.108668209187463, 0.0116238722892657, 
    0.012023490450893, 0.0108135425488573, 0.00937570172867644, 
    0.00809811278621186, 0.0235056239743921, 0.025500658500369, 
    0.0234608868503482, 0.0208470526400479, 0.0184481609874694, 
    0.0356438216686479, 0.0406013691901053, 0.0441057429680943, 
    0.0477084891588961, 0.0512971927819841, 0.0445466712620454, 
    0.0467806376034436, 0.0476567944606461, 0.048393808464277, 
    0.0490625016612481, 0.0257635103540086, 0.0269634191042765, 
    0.0270321095726769, 0.0269061995593561, 0.0267392100653416, 
    0.0550074959164803, 0.0640838876027606, 0.0705422656571929, 
    0.0773047979652944, 0.0841065921246788, 0.017106637503393, 
    0.025481438787035, 0.0321633667203674, 0.0395762352261805, 
    0.0476311065775831), upper = c(0.0548650233126245, 0.0756784493915628, 
    0.108885802402636, 0.156404820719125, 0.213976690522108, 
    0.0163138248731072, 0.0178616891841985, 0.0222128708020368, 
    0.0284614557616305, 0.0359832219741959, 0.0339378335208811, 
    0.039975201853988, 0.0541535055907615, 0.0749340625856128, 
    0.100655675600276, 0.0398297048294564, 0.0464977933470156, 
    0.0563819985882714, 0.0691131684577491, 0.0834778881744663, 
    0.0477021593328273, 0.0516176358055639, 0.0574233019439704, 
    0.0643924091986213, 0.0717620710757599, 0.0285446055381808, 
    0.0307584171780848, 0.0345046440925246, 0.0391516285886827, 
    0.0441582201173176, 0.062347826961093, 0.0753653653841884, 
    0.0948621519669373, 0.120230399782042, 0.149038245196167, 
    0.0254001895885684, 0.0387079476679479, 0.0649209421145592, 
    0.108535107340218, 0.16815843130232)), .Names = c("varL", 
"varP_range", "fit", "se", "lower", "upper"), row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", 
"36", "37", "38", "39", "40"), class = "data.frame")

My questions are:

1) How does effect() from the effects package transform the continuous into a categorical variable? How would this be replicated with ggplot()?

2) What is the reason that the lines in the base R plot all intersect at the same coordinate?

Answer

To answer your first question: To define the factors, Effects uses

nice(seq(min(var), max(var), length.out=5))

where nice is defined like so (from here):

nice <- function (x, direction = c("round", "down", "up"), lead.digits = 1) {
  direction <- match.arg(direction)
  if (length(x) > 1){
    result <- sapply(x, nice, direction = direction, lead.digits = lead.digits)
    if (anyDuplicated(result)) result <- nice(x, direction=direction, lead.digits = lead.digits + 1)
    return(result)
  }
  if (x == 0) 
    return(0)
  power.10 <- floor(log(abs(x), 10))
  if (lead.digits > 1) 
    power.10 <- power.10 - lead.digits + 1
  lead.digit <- switch(direction, round = round(abs(x)/10^power.10), 
                       down = floor(abs(x)/10^power.10), up = ceiling(abs(x)/10^power.10))
  sign(x) * lead.digit * 10^power.10
}

An example of using it:

library(effects)

set.seed(123)
x = rnorm(100)
z = rexp(100)
y = factor(sample(1:2, 100, replace=T))

test = glm(y~x+z+x*z, family = binomial(link = "probit"))

preddat <- matrix('', 25, 100)
preddat <- expand.grid(nice(seq(min(x), max(x), length.out=5)), nice(seq(min(z), max(z), length.out=5)))
colnames(preddat) <- c("x", "z")
predicts <- predict(test, preddat, type = "response")
dim(predicts) <- c(5,5)

effectspred <- pnorm(effect("x:z", test)$fit)
dim(effectspred) <- c(5,5)

all.equal(effectspred, predicts)

[1] TRUE

This is straight forward to use with ggplot:

library(tidyverse)

predicts %>% as.data.frame() %>%
  gather() %>% ggplot() + geom_line(aes(x = rep(nice(seq(min(x), max(x), length.out=5)), 5), y = value, color=key))

And regarding question 2, an intuitive way to think about it might be that, since the mean of Yhat doesn't change (predicted with fixed z), all the standard normal CDFs of the partial effects share the same intersection. E.g.

sapply(seq(0, 5, length.out = 15), function(k) {
  predict(test, data.frame(x = seq(-20, 20, length.out = 200), z = k), type = "response")}) %>%
  as.data.frame() %>% gather() %>% ggplot() + 
  geom_line(aes(x = rep(seq(-20, 20, length.out = 200), 15), y = value, color = key))

Blog

Thursday, 8 September 2016

Comparing interaction effect plots involving continuous variables from ggplot2 vs. base R using the effects package

No comments:

Post a Comment

c++ - Does curly brackets matter for empty constructor?