## Read in the data: ## gu = read.table('../../Data/glove.txt', header=FALSE, na.strings=c('.')) colnames(gu) = c('period', 'observed', 'gloved', 'years_of_experience') print(head(gu, 10)) ## Tag the periods as pre/post training: ## gu$post_training = 0 gu$post_training[gu$period > 1] = 1 print(head(gu, 10)) ##gu = gu[complete.cases(gu), ] ## drop missing data ## resp = cbind(gu$gloved, gu$observed - gu$gloved) ## Does training change the proportion of times gloves are used: ## logit_model = glm(resp ~ gu$post_training, family=binomial()) print(summary(logit_model)) ## How does years of experience matter: ## logit_model = glm(resp ~ gu$post_training + gu$years_of_experience, family=binomial()) print(summary(logit_model)) ## How does the period affect things: ## logit_model = glm(resp ~ gu$post_training + gu$period, family=binomial()) print(summary(logit_model)) ## Combine all three predictors: ## logit_model = glm(resp ~ gu$post_training + gu$period + gu$years_of_experience, family=binomial()) print(summary(logit_model)) ## Is there an interaction term ## logit_model = glm(resp ~ gu$post_training + gu$period + gu$years_of_experience * gu$post_training, family=binomial()) print(summary(logit_model)) gu$gt_five_years_experience = 0 gu$gt_five_years_experience[gu$years_of_experience > 5] = 1 print(head(gu, 10)) ## Look at the correlation: ## print(cor.test(gu$gt_five_years_experience, gu$observed)) ## For a transitional approach we need to read the data in a different way (so that each row corresponds to an individual): ## ## Insert a "nurse" index: ## n_nurses = dim(gu)[1]/4 ## this is the number of nurses we have in the study gu$nurse_index = rep(seq(1, n_nurses), each=4) library(reshape) ## used to easily take the dataset from "tall" to "wide" tgu = gu tgu$post_training = NULL tgu$gt_five_years_experience = NULL tgu_wide = reshape(tgu, idvar=c('nurse_index', 'years_of_experience'), timevar='period', direction='wide') ## Replace NAs with zero: ## tgu_wide[is.na(tgu_wide)] = 0 ##print(head(tgu_wide)) ## Classify pre/post training percentiles for each nurse: ## tgu_wide$pre_training_pct = tgu_wide$gloved.1 / tgu_wide$observed.1 tgu_wide$post_training_pct = ( tgu_wide$gloved.2 + tgu_wide$gloved.3 + tgu_wide$gloved.4 ) / ( tgu_wide$observed.2 + tgu_wide$observed.3 + tgu_wide$observed.4 ) ## Drop rows we don't have pre or post training percents (the nurse was not observed then): ## tgu_wide = tgu_wide[complete.cases(tgu_wide),] tgu_wide$pre_pct_gt_50 = tgu_wide$pre_training_pct >= 0.5 tgu_wide$post_pct_gt_50 = tgu_wide$post_training_pct >= 0.5 ##print(head(tgu_wide)) print('All nurses pre vs. post percent of glove use greater than 50%') print(table(tgu_wide$pre_pct_gt_50, tgu_wide$post_pct_gt_50)) print('Less than 5 years of experence:') mask = tgu_wide$years_of_experience < 5 print(table(tgu_wide$pre_pct_gt_50[mask], tgu_wide$post_pct_gt_50[mask])) print('Greater than 5 years of experence:') mask = tgu_wide$years_of_experience >= 5 print(table(tgu_wide$pre_pct_gt_50[mask], tgu_wide$post_pct_gt_50[mask])) ## Lets see if the missing data is related to years of experience: ## tgu = gu tgu$post_training = NULL tgu$gt_five_years_experience = NULL tgu_wide = reshape(tgu, idvar=c('nurse_index', 'years_of_experience'), timevar='period', direction='wide') print(head(tgu_wide))