Simple Logit with glmnet

library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-4
### load data
ddf = read.csv(file="http://rob-mcculloch.org/data/swe8there.csv")
### pull off x and y
y = ddf$y
nc = ncol(ddf)
x = as.matrix(ddf[,1:(nc-1)])
print(dim(x))
## [1] 6166  200
print(length(y))
## [1] 6166
### fit cv Lasso
cvfit = cv.glmnet(x,y,
                    family = "binomial",
                    alpha = 1,                        # lasso - 1, ridge - 0
                    nfold = 10
                 )
### plot Lasso
par(mfrow=c(2,1))
plot(cvfit)
plot(cvfit$glmnet.fit)

### look at lasso coefs
p=ncol(x)
# lambda vs the number of non-zero coefficients
plot(cvfit$glmnet$lambda,cvfit$glmnet$df)
abline(v=cvfit$lambda.1se,col="red")
abline(v=cvfit$lambda.min,col="blue")

#look at coefficients, which are pos/neg, interpret
coefL = coef(cvfit$glmnet.fit, s=cvfit$lambda.1se)
oo = order( coefL, decreasing = TRUE )
## <sparse>[ <logic> ]: .M.sub.i.logical() maybe inefficient
# positive coefficients
ncoef=30
cat("Big positive coefficients:\n")
## Big positive coefficients:
print(coefL@Dimnames[[1]][oo[1:ncoef]])
##  [1] "can.wait"        "melt.mouth"      "out.world"       "best.meal"      
##  [5] "cozi.atmospher"  "great.price"     "best.food"       "serv.great"     
##  [9] "price.reason"    "servic.alway"    "(Intercept)"     "great.experi"   
## [13] "great.place"     "breakfast.lunch" "white.wine"      "friend.servic"  
## [17] "great.time"      "top.notch"       "cook.perfect"    "food.wonder"    
## [21] "veri.tasti"      "year.now"        "chicken.beef"    "definit.go"     
## [25] "daili.special"   "good.too"        "food.enjoy"      "servic.friend"  
## [29] "larg.portion"    "like.famili"
print(coefL[oo[1:ncoef]])
##  [1] 1.6705504 1.4533918 1.2438039 1.2287070 1.1655835 1.0843139 1.0809273
##  [8] 0.9735067 0.9705231 0.9095727 0.9082489 0.8775476 0.8512984 0.8494758
## [15] 0.7490832 0.7446157 0.7348737 0.7228204 0.6858694 0.6203141 0.6058231
## [22] 0.6054181 0.5812378 0.5539081 0.5181082 0.5149701 0.5068833 0.4821377
## [29] 0.4818745 0.4810275
# negative coefficients
cat("Big negative coefficients:\n")
## Big negative coefficients:
print(coefL@Dimnames[[1]][tail(oo,ncoef)])
##  [1] "out.restaur"     "never.heard"     "still.pretti"    "hang.out"       
##  [5] "half.hour"       "time.ate"        "too.bad"         "didn.want"      
##  [9] "order.food"      "don.think"       "anoth.tabl"      "just.becaus"    
## [13] "stood.out"       "sat.down"        "after.got"       "dri.out"        
## [17] "waitress.took"   "food.wasn"       "ran.out"         "minut.befor"    
## [21] "wait.minut"      "custom.servic"   "tourist.trap"    "didn.even"      
## [25] "mess.up"         "after.wait"      "never.again"     "veri.disappoint"
## [29] "servic.terribl"  "mediocr.food"
print(coefL[tail(oo,ncoef)])
##  [1] -0.4313684 -0.4535332 -0.4701116 -0.5346012 -0.5684762 -0.5846042
##  [7] -0.6445708 -0.6755352 -0.6762567 -0.7257156 -0.7293355 -0.7308214
## [13] -0.7627184 -0.7916123 -0.8125207 -0.8317084 -0.8710503 -0.9589081
## [19] -1.1022563 -1.1275869 -1.2401135 -1.2842805 -1.3810230 -1.5072303
## [25] -1.5663067 -1.7470438 -1.7498940 -2.5819852 -2.6992290 -2.8558035