function [beta,sC,s_hat,t_stats,SSE,SSR,SSTO,R2,MSR,MSE,F_ratio,MI] = wwx_regression(X,Y,verbose) % WWX_REGRESSION - Returns the regression coefficients and various statistics on them. % % This is a "home grown" function to do this ... more robust versions probably exist. % % Input: % X: [n,p] matrix of regressands ... note the user should augment the first column to be ones if regression to a constant is desired. % Y: [n,1] matrix of regressors % % Outputs: % betas : estimates of beta the linear regression coefficients % sC : estimate of the standard error of each beta % s_hat : estimate of the variance of the residuals % t_stats : estimate of the t statistics for each value of beta % SSE : sum of squared error % SSR : sum of squared regression % SSTO : sum of squared total % % % Written by: % -- % John L. Weatherwax 2009-04-21 % % email: wax@alum.mit.edu % % Please send comments and especially bug reports to the % above email address. % %----- alpha = 0.05; if( nargin<3 ) verbose = 0; end [n,p] = size(X); n_betas = p; if( length(find(X(:,1)==1))==n ) constantTerm = 1; p = p-1; %fprintf( 'We are expecting an intial column of ones\n' ); else constantTerm = 0; end M = (X' * X); xpy = X' * Y; beta = M \ xpy; % our estimates of beta if( verbose ) for pi=1:n_betas if( constantTerm ) fprintf( 'p=%10d variable betas(%10d)=%10.6f\n', p, pi-1, beta(pi) ); else fprintf( 'p=%10d variable betas(%10d)=%10.6f\n', p, pi, beta(pi) ); end end end % Lets consider the t-statistics for these ... first an estimate of the variance of our errors \sigma^2: % yhat = X * beta; e_res = Y - yhat; SSE = sum( e_res .^2 ); s2 = SSE / ( n - p - 1 ); s_hat = sqrt(s2); % next estimate the standard error: % MI = inv(M); dM = diag(MI); sC = s_hat * sqrt(dM); if( verbose ) for pi=1:n_betas, if( constantTerm ) fprintf( 'p=%10d variable betas(%10d) standard error=%10.6f\n', p, pi-1, sC(pi) ); else fprintf( 'p=%10d variable betas(%10d) standard error=%10.6f\n', p, pi, sC(pi) ); end end end % next estimate the t-statistics for each: % t_stats = beta ./ sC; if( verbose ) for pi=1:n_betas, if( constantTerm ) fprintf( 'p=%10d variable t stats(%10d)=%10.6f\n', p, pi-1, t_stats(pi) ); else fprintf( 'p=%10d variable t stats(%10d)=%10.6f\n', p, pi, t_stats(pi) ); end end end % compare these values to if( verbose ) fprintf('tinv( 1 - alpha/2, n-p-1 ) = tinv( %10.4f, %10d ) = %10.6f\n', 1 - alpha/2, n-p-1, tinv( 1 - alpha/2, n-p-1 )); end % some metrics on how well this regression is performing: % y_bar = mean( Y ); SSR = sum( ( yhat - y_bar ).^2 ); % sum squared regression SSTO = SSR + SSE; % sum squared total R2 = SSR / SSTO; % the R^2 statstic MSR = SSR / p; MSE = SSE / (n-p-1); F_ratio = MSR / MSE; %fprintf('F_ratio = %10.6f\n', F_ratio);