################################################################################ # R CODE USED IN THE SEMINAR: # 'Shape-constrained splines: Applications and examples in R # Antonio Gasparrini and Zaid Chalabi # Centre for Statistical Methodology # London School of Hygiene and Tropical Medicine # 30 October 2015 ################################################################################ # GENERATE THE DATA set.seed(1) x <- sort(c(runif(98,0,10),0,10)) y <- (x-3)^2 + rnorm(100,0,10) # PLOT plot(x,y,main="Scatterplot of x and y",col=grey(0.7),pch=19) lines(x,(x-3)^2,lty=2,lwd=2) # NB: SET par(mar=c(4,4,2,1)), EXCLUDE TITLE AND SAVE PDF 6*5 # GENERATE B-SPLINES BASIS TRANSFORMATIONS source("ps.R") X <- ps(x,int=T) # GRAPHICAL REPRESENTATION matplot(x,X,type="l",ylim=c(0,1),lwd=2,main="Splines basis terms", ylab=expression(b[kr](x))) #segments(attr(X,"knots"),0,attr(X,"knots"),1,lty=3,col=grey(0.7)) # REGRESSION MODEL lm <- lm(y~0+X) text(x[apply(X,2,which.max)],c(0.78,0.92), formatC(coef(lm),digits=2,format="f"),cex=0.7) # FITTED CURVE AS LINEAR CONTRIBUTION OF SPLINE TERMS coefmat <- matrix(coef(lm),nrow(X),ncol(X),byrow=T) plot(x,y,main="Fitted curve",col=grey(0.7),pch=19) matlines(x,X*coefmat,type="l",ylim=c(0,1)) lines(x,rowSums(X*coefmat),col=2,lwd=2) # COMPARISON plot(x,y,main="Fitted and true",col=grey(0.7),pch=19) lines(x,(x-3)^2,lty=2,lwd=2) lines(x,predict(lm),col=2,lwd=2) legend("topleft",c("Fitted","True"),col=2:1,lty=1:2,inset=0.05) ################################################################################ # (MINUS) LOG-LIKELIHOOD FUNCTION ll <- function(beta,y,X) { n <- length(y) p <- length(beta) mu <- X%*%beta sigma2 <- sum((y-mu)^2)/(n-p) ll <- -0.5*log(2*pi*sigma2)-1/(2*sigma2)*(y-mu)^2 return(-sum(ll)) } # (MINUS) PARTIAL DERIVATIVES OF LOG-LIKELIHOOD FUNCTION dll <- function(beta,y,X) { n <- length(y) p <- length(beta) mu <- X%*%beta sigma2 <- sum((y-mu)^2)/(n-p) dll <- apply(X,2,function(x) sum(x*(y-mu)/sigma2)) return(-dll) } # LOG-LIKELIHOOD AND PARTIAL DERIVATIVES OF THE FITTED MODEL ll(coef(lm),y,X) dll(coef(lm),y,X) # REPLICATE THE MODEL ABOVE USING A GENERAL-PURPOSE OPTIMIZATION FUNCTION # STARTING VALUES init <- coef(lm) + rnorm(ncol(X)) # OPTIMIZATION opt1 <- optim(init,ll,dll,y=y,X=X,method="BFGS", control=list(trace=T,REPORT=1)) # COMPARISON plot(x,y,main="Fitted curve",col=grey(0.7),pch=19) lines(x,predict(lm),col=2,lwd=2) lines(x,X%*%opt1$par,col=4,lty=2,lwd=2) legend("topleft",c("lm","optim"),col=c(2,4),lty=1:2,inset=0.05) ################################################################################ # MONOTONIC INCREASING # STARTING VALUES IN THE FEASIBLE REGION coef <- coef(lm) uci <- coef(lm)+1.96*sqrt(diag(vcov(lm))) lci <- coef(lm)-1.96*sqrt(diag(vcov(lm))) plot(coef(lm),ylim=c(-40,100),pch=19) arrows(1:10,uci,1:10,lci,code=3,angle=90,length=0.1) init <- predict(lm(coef(lm)~I(1:10),weights=1/diag(vcov(lm)))) points(init,col=2,pch=19,cex=1.5) # DEFINE THE CONSTRAINTS MATRIX AND CHECK THE INITIAL VALIES (D <- diff(diag(ncol(X)))) D %*% init > 0 # OPTIMIZATION USING constrOptim opt2 <- constrOptim(init,ll,dll,y=y,X=X,ui=D,ci=rep(0,nrow(D)),method="BFGS", control=list(trace=T,REPORT=1)) # COMPARISON plot(x,y,main="Monotonic increasing",col=grey(0.7),pch=19) lines(x,predict(lm),col=2,lwd=2) lines(x,X%*%opt2$par,col=4,lty=2,lwd=2) legend("topleft",c("Unconstrained","Constrained"),col=c(2,4),lty=1:2,inset=0.05) ################################################################################ # CONVEX # STARTING VALUES IN THE FEASIBLE REGION plot(coef(lm),ylim=c(-40,100),pch=19) arrows(1:10,uci,1:10,lci,code=3,angle=90,length=0.1) init <- predict(lm(coef(lm)~I(1:10)+I((1:10)^2),weights=1/diag(vcov(lm)))) points(init,col=2,pch=19,cex=1.5) # DEFINE THE CONSTRAINTS MATRIX (D <- diff(diag(ncol(X)),diff=2)) D %*% init > 0 # OPTIMIZATION USING constrOptim opt3 <- constrOptim(init,ll,dll,y=y,X=X,ui=D,ci=rep(0,nrow(D)),method="BFGS", control=list(trace=T,REPORT=1)) # COMPARISON plot(x,y,main="Convex",col=grey(0.7),pch=19) lines(x,predict(lm),col=2,lwd=2) lines(x,X%*%opt3$par,col=4,lty=2,lwd=2) legend("topleft",c("Unconstrained","Constrained"),col=c(2,4),lty=1:2,inset=0.05) ################################################################################ # MONOTONIC INCREASING AND CONVEX # STARTING VALUES IN THE FEASIBLE REGION plot(coef(lm),ylim=c(-40,100),pch=19) arrows(1:10,uci,1:10,lci,code=3,angle=90,length=0.1) init <- predict(lm(coef(lm)~0+I((1:10)^2),weights=1/diag(vcov(lm)))) points(init,col=2,pch=19,cex=1.5) # DEFINE THE CONSTRAINTS MATRIX (D <- rbind(diff(diag(ncol(X)),diff=2),diff(diag(ncol(X))))) D %*% init > 0 # OPTIMIZATION USING constrOptim: 2 METHODS opt4a <- constrOptim(init,ll,dll,y=y,X=X,ui=D,ci=rep(0,nrow(D)),method="BFGS", control=list(trace=T,REPORT=1)) opt4b <- constrOptim(init,ll,grad=NULL,y=y,X=X,ui=D,ci=rep(0,nrow(D)), control=list(trace=T,REPORT=1)) # COMPARISON plot(x,y,main="Monotonic increasing and convex",col=grey(0.7),pch=19) lines(x,predict(lm),col=2,lwd=2) lines(x,X%*%opt4a$par,col=3,lty=2,lwd=2) lines(x,X%*%opt4b$par,col=4,lty=2,lwd=2) legend("topleft",c("Unconstrained","Constrained (BFGS)","Constrained (NM)"), col=2:4,lty=c(1,2,2),inset=0.05) # CHECK ll(opt4a$par,y,X) ll(opt4b$par,y,X) D %*% opt4a$par ################################################################################ # USING THE FUNCTIONS IN THE scam PACKAGE library(scam) # MONOTONIC INCREASING scam1 <- scam(y~s(x,bs="mpi")) plot(x,y,main="Comparison with the scam R package",col=grey(0.7),pch=19) lines(x,predict(lm),col=2,lwd=2) lines(x,X%*%opt2$par,col=3,lty=2,lwd=2) lines(x,predict(scam1),col=4,lty=2,lwd=2) legend("topleft",c("Unconstrained","Constrained (NM)","Constrained (SCAM)"), col=2:4,lty=c(1,2,2),inset=0.05) # MONOTONIC INCREASING AND CONVEX scam2 <- scam(y~s(x,bs="micx")) plot(x,y,main="Comparison with the scam R package",col=grey(0.7),pch=19) lines(x,predict(lm),col=2,lwd=2) lines(x,X%*%opt4b$par,col=3,lty=2,lwd=2) lines(x,predict(scam2),col=4,lty=2,lwd=2) legend("topleft",c("Unconstrained","Constrained (NM)","Constrained (SCAM)"), col=2:4,lty=c(1,2,2),inset=0.05) #