#
# EPage 313
#
# Written by:
# -- 
# John L. Weatherwax                2009-04-21
# 
# email: wax@alum.mit.edu
# 
# Please send comments and especially bug reports to the
# above email address.
# 
#-----

save_plots = F

library(gbm)

set.seed(0)

# Part (1):
#
x1 = rnorm(1000)
x12 = x1^2
ysys = 1 + (-5*x12)
y = ysys + (5*rnorm(500))
dta = data.frame(y,x1,x12)

x1_sorted = sort(x1)
y_truth = 1 - 5*(x1_sorted^2)

max_x = max(x1_sorted)
min_x = min(x1_sorted)
max_y = max(y_truth)
min_y = min(y_truth)

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter6/prob_1_f_of_X.eps", onefile=FALSE, horizontal=FALSE) }
plot( x1_sorted, y_truth, col="green", type="l", xlab="x1", ylab="y" )
if( save_plots ){ dev.off() }

# Part (2): Train a gbm: 
#
dta1 = data.frame(y,x1)
boosts = gbm( y ~ x1, distribution="gaussian", n.trees=50000, data=dta1, cv.folds=5 )

if( save_plots ){ postscript("../../WriteUp/Graphics/Chapter6/prob_1_gbm_perf.eps", onefile=FALSE, horizontal=FALSE) }
opt_ntrees = gbm.perf( boosts, method="cv" )
if( save_plots ){ dev.off() }

# View the partial dependence plot for various number of trees: 
#
n_trees = c(100,500,1000,5000,10000,opt_ntrees)
for( nt in n_trees ){
  if( save_plots ){
    fn = sprintf( "../../WriteUp/Graphics/Chapter6/prob_1_gbm_partial_dependence_w_ntrees_%d.eps", nt )
    postscript(fn, onefile=FALSE, horizontal=FALSE)
  }
  plot( boosts, n.trees=nt, xlim=c(max_x,min_x), ylim=c(min_y,max_y) )
  lines( x1_sorted, y_truth, type='l', col='green' )
  if( save_plots ){ dev.off() }
}