DF = read.csv( "../../Data/HOT.csv", header=TRUE, stringsAsFactors=FALSE )

## Plot the requested scatter plot:
##
plot(-DF$Longitude, DF$Latitude, type='p', cex=1.5, pch=19, xlim=c(-170, -50))
grid()

## Label some cities:
##
nm = 'Miami FL'
ext = grepl(nm, DF$Name)
text(-DF$Longitude[ext], DF$Latitude[ext], labels=c(nm), pos=c(4))

nm = 'New York NY'
ext = grepl(nm, DF$Name)
text(-DF$Longitude[ext], DF$Latitude[ext], labels=c(nm), pos=c(4))

nm = 'Juneau AK'
ext = grepl(nm, DF$Name)
text(-DF$Longitude[ext], DF$Latitude[ext], labels=c(nm), pos=c(4))

nm = 'Honolulu HI'
ext = grepl(nm, DF$Name)
text(-DF$Longitude[ext], DF$Latitude[ext], labels=c(nm), pos=c(4))


## Build a linear model:
##
m1 = lm(MaxJanTemp ~ Latitude + Longitude + Altitude, data=DF)
print(summary(m1))


plot(m1, which=1)


## Display the records with the largest residuals:
##
print(DF[c(3, 30, 16), ])

## Plot the hat matrix diagonal against the fitted values:
##
inf = influence(m1)
plot(m1$fitted.values, inf$hat, type='p', pch=19, xlab='Fitted values', ylab='Hat diagonals')
ext = match(sort(inf$hat)[dim(DF)[1] - (0:3)], inf$hat)
text(m1$fitted.values[ext], inf$hat[ext], labels=row.names(DF)[ext], pos=c(1, 4, 4, 4))
grid()

## Display the records with the largest influence:
##
print(DF[c(16, 3, 60, 61), ])


## Build a second linear model:
##
m2 = lm(MaxJanTemp ~ Latitude + Longitude + Altitude + I(Latitude^2) + I(Longitude^2) + I(Altitude^2), data=DF)
print(summary(m2))