# child protection indicators # 2015-16 library(haven) library(readr) library(dplyr) library(stringr) library(glmnet) library(class) source("https://raw.githubusercontent.com/robertgrant/html-reports/master/html-reports.R") set.seed(1599) #cp<-read_spss("CIN SPSS_13 year_310715.sav") #cp<-read_spss("CIN SPSS_13 year.sav") cp<-read_csv("child-protection.csv") # corrections to Ofsted following Rick's checks March 2016: cp[cp[,'LA']=='Slough','OfstedRating2013']<-4 cp[cp[,'LA']=='Hartlepool','OfstedRating2013']<-2 cp[cp[,'LA']=='Hillingdon','OfstedRating2012']<-3 ofsted4<-rep(FALSE,dim(cp)[1]) ofsted4[cp$OfstedRating2012==4 | cp$OfstedRating2013==4]<-TRUE cp2<-data.frame(cp$LA, cp$CINrAssessmentInitial10Days2012, cp$CINrPriorReferral2012, cp$WFrAgencyWorkerRate2012, cp$OfstedRating2009, cp$OfstedRating2010, cp$OfstedRating2011, cp$OfstedRating2012, cp$OfstedRating2013, ofsted4) colnames(cp2)<-c('LA','initial','prior','agency', 'ofsted09','ofsted10','ofsted11','ofsted12','ofsted13','ofsted4') # make assessments as percentage of referrals attach(cp) for(i in 2001:2012) { assign(paste0('AssessmentCorePercOfReferrals',i), 100*get(paste0('CINrAssesmentCore',i))/get(paste0('CINrReferrals',i))) } detach(cp) #============================================================================== # below we list the stems of variables; in 'cp' they are followed by the year # how much did we do (input quantity) #CINrReferrals #Number of referrals 2001-2013 #CINrAssessmentInitial # Initial Assessments, 2001-13 #CINrAssessmentInitial7Days # Initial Assessments completed within 7/10 days, 2001-2008 SEE BELOW #CINrAssessmentInitial10Days # Initial Assessments completed within 7/10 days, 2009-2013 SEE ABOVE #CINrAssesmentCore # Core Assessments, 2001-12 # NO DATA Continuous Assessments #CINrDuring # Numbers of Children in Need (CIN), 2009-13 #CINrSection47During # Section 47 inquiries, 2001-13 #CINrCPConference # CP conferences, 2001-13 #CINrCPPAtMarch31St # CP Plans at March 31st, 2001-13 input_quantity<-c('CINrReferrals', 'CINrAssessmentInitial', 'CINrAssessmentInitial7Days', 'CINrAssessmentInitial10Days', 'CINrAssesmentCore', 'CINrDuring', 'CINrSection47During', 'CINrCPConference', 'CINrCPPAtMarch31St') temp<-cp_input_quantity<-data.frame(deleteme=rep(1,dim(cp)[1])) for(i in 1:length(input_quantity)) { temp<-cbind(temp,select(cp,starts_with(input_quantity[i]))) } for(i in 2001:2013) { cp_input_quantity<-cbind(cp_input_quantity,select(temp,ends_with(as.character(i)))) } #============================================================================== # how well did we do it (input quality) #WFrCINPerSocialWorker # Numbers of CIN per social worker, 2010-13 #WFrVacancyRate # Numbers of social work vacancies, 2010-13 #WFrTurnoverRate # Rate of turnover of social workers, 2010-13 #WFrAgencyWorkerRate # Percentage of agency workers, 2010-13 #CINrAssessmentCore35Days # Core assessments completed within 35 days, 2001-12 # NO DATA Continuous Assessments completed within 45 days #AssessmentCorePercOfReferrals # Assessments as a percentage of referrals, 2001-2 #CINrCPCon15DaysSection47 # CP conference held within 15 days of Section 47, 2001-13 #CINrCPPCeased2plusYears # CPP cease times (2+ years), 2001-13 #CINrCPPReviewTimeframe # CP Plans review held within 6 months, 2009-13 #CINrCeased2plusYears # CIN Cease times (2+ years), 2009-13 #OfstedRating # Ofsted ratings, 2009-13 input_quality<-c('WFrCINPerSocialWorker', 'WFrVacancyRate', 'WFrTurnoverRate', 'WFrAgencyWorkerRate', 'CINrAssessmentCore35Days', 'AssessmentCorePercOfReferrals', 'CINrCPCon15DaysSection47', 'CINrCPPCeased2plusYears', 'CINrCPPReviewTimeframe', 'CINrCeased2plusYears', 'OfstedRating') #============================================================================== # did anything change (output quantity) #CINrCPPCeased # CP Plans Ceased, 2001-13 #CINrCareProceedings # Care proceedings, 2008-13 #CINrReferralNoAction # Referrals not assessed, 2011-13 #CINrReferralNotCIN # Referrals deemed not CIN, 2011-13 #CINrCeasedDuring # Numbers of CIN Ceased, 2009-13 #CINrLookedAfterCareOrders # Care orders, 2001-13 output_quantity<-c('CINrCPPCeased', 'CINrCareProceedings', 'CINrReferralNoAction', 'CINrReferralNotCIN', 'CINrCeasedDuring', 'CINrLookedAfterCareOrders') #============================================================================== # was it change for the better (output quality) #CINrCPPWithPriorCPP # CPP where children had prior CPP , 2001-13 #CINrPriorReferral # Referrals within 12 months of a prior referral 2001-13 output_quality<-c('CINrCPPWithPriorCP', 'CINrPriorReferral') #============================================================================== ######################################################################## ########################## trend graphs ############################ ######################################################################## ######################################################################## ##################### summarise correlations ####################### ######################################################################## ############################################################################## ##################### predicting Ofsted inadequate ####################### ############################################################################## # predictor variables for elastic-net predvars2012<-c('CINrDuring2012', 'CINrReferrals2012', 'CINrAssessmentInitial2012', 'CINrAssessmentInitial10Days2012', 'CINrAssesmentCore2012', 'CINrAssessmentCore35Days2012', 'CINrSection47During2012', 'CINrCPConference2012', 'CINrCPCon15DaysSection472012', 'CINrCPPDuring2012', 'CINrCPPReviewTimeframe2012', 'CINrCareProceedings2012', 'CINrLookedAfterCareOrders2012', 'CINrLookedAfterS202012', 'CINrPriorReferral2012', 'CINrCPPWithPriorCPP2012', 'FINCINrChildrenSafety2012', 'WFrCINPerSocialWorker2012', 'WFrVacancyRate2012', 'WFrTurnoverRate2012', 'WFrAgencyWorkerRate2012') predmatrix2012<-as.matrix(cp[,predvars2012]) # drop LAs that have any missing values deleteme<-function(x) { all(!is.na(x)) } completeLA2012<-apply(predmatrix2012,1,deleteme) # fit the elastic-net regression netmod1<-glmnet(y=cp2$ofsted4[completeLA2012], x=predmatrix2012[completeLA2012,], family='binomial') # run cross-validation to find the optimal lambda cvnetmod1<-cv.glmnet(y=cp2$ofsted4[completeLA2012], x=predmatrix2012[completeLA2012,], family='binomial') plot(cvnetmod1) print(paste('lambda for minimum CV error =',cvnetmod1$lambda.min)) print(paste('lambda for most regularised model within 1 SE of minimum CV error',cvnetmod1$lambda.1se)) coef(cvnetmod1,s='lambda.min') coef(cvnetmod1,s='lambda.1se') print(paste('% deviance explained at minimum CV error =', netmod1$dev.ratio[which(netmod1$lambda==cvnetmod1$lambda.min)])) #ofsted4[is.na(cp2$initial) | is.na(cp2$prior) | is.na(cp2$agency)]<-NA # linear predictor (1se) pred1se2012<-predict(cvnetmod1,newx=predmatrix2012[completeLA2012,],s='lambda.1se') # convert to probabilities pred1se2012<-exp(pred1se2012)/(1+exp(pred1se2012)) # how many were inadequate? p_ofsted4_2012<-sum(cp2$ofsted4[completeLA2012])/length(cp2$ofsted4[completeLA2012]) # classify predictions binpred1se2012<-(pred1se2012>quantile(pred1se2012,prob=1-p_ofsted4_2012)) class12<-table(binpred1se2012,cp2$ofsted4[completeLA2012]) print(class12) print(paste('PPV =',class12[2,2]/(class12[2,1]+class12[2,2]), '; NPV =',class12[1,1]/(class12[1,1]+class12[1,2]))) # mis-classified: falsely inadequate cp2[completeLA2012,][binpred1se2012==TRUE & cp2$ofsted4[completeLA2012]==FALSE,] # mis-classified: falsely 'adequate' cp2[completeLA2012,][binpred1se2012==FALSE & cp2$ofsted4[completeLA2012]==TRUE,] ############################################################################# ##################### effect of Ofsted inadequate ####################### ############################################################################# # we look at LAs that were in the range of values predicting Ofsted inadequate during 2010-12 # we divide these into those rated inadequate in that year and those not (nor the following year) # we already have prediction for 2012 # get lists of predvars for 2010, 2011 and 2013 predvars<-str_sub(predvars2012,1,-5) predvars2010<-paste0(predvars,'2010') predvars2011<-paste0(predvars,'2011') # core assessments were dropped in 2013: cp$CINrAssesmentCore2013<-rep(NA,dim(cp)[1]) cp$CINrAssessmentCore35Days2013<-rep(NA,dim(cp)[1]) predvars2013<-paste0(predvars,'2013') # make predictions for 2010, 2011 and 2013 predmatrix2010<-as.matrix(cp[,predvars2010]) predmatrix2011<-as.matrix(cp[,predvars2011]) predmatrix2013<-as.matrix(cp[,predvars2013]) # drop LAs that have any missing values (don't need 2013 until later) completeLA2010<-apply(predmatrix2010,1,deleteme) completeLA2011<-apply(predmatrix2011,1,deleteme) # linear predictor (1se) pred1se2010<-predict(cvnetmod1,newx=predmatrix2010[completeLA2010,],s='lambda.1se') pred1se2011<-predict(cvnetmod1,newx=predmatrix2011[completeLA2011,],s='lambda.1se') # convert to probabilities pred1se2010<-exp(pred1se2010)/(1+exp(pred1se2010)) pred1se2011<-exp(pred1se2011)/(1+exp(pred1se2011)) # how many were inadequate? p_ofsted4_2010<-sum(cp2$ofsted4[completeLA2010])/length(cp2$ofsted4[completeLA2010]) p_ofsted4_2011<-sum(cp2$ofsted4[completeLA2011])/length(cp2$ofsted4[completeLA2011]) # classify predictions binpred1se2010<-(pred1se2010>quantile(pred1se2010,prob=1-p_ofsted4_2010)) binpred1se2011<-(pred1se2011>quantile(pred1se2011,prob=1-p_ofsted4_2011)) # compare with reality table(binpred1se2010,cp2$ofsted10[completeLA2010],useNA='always') print(cp$LA[completeLA2010][binpred1se2010==TRUE]) print(cp$OfstedRating2010[completeLA2010][binpred1se2010==TRUE]) table(binpred1se2011,cp2$ofsted11[completeLA2011],useNA='always') print(cp$LA[completeLA2011][binpred1se2011==TRUE]) print(cp$OfstedRating2011[completeLA2011][binpred1se2011==TRUE]) table(binpred1se2012,cp2$ofsted12[completeLA2012],useNA='always') print(cp$LA[completeLA2012][binpred1se2012==TRUE]) print(cp$OfstedRating2012[completeLA2012][binpred1se2012==TRUE]) # the only authorities to be predicted inadequate more than once in these 3 years: # Cheshire East: 2010, 2011, 2012 # Herefordshire: 2010, 2012 # Slough: 2011, 2012 # Stockton on Tees: 2010, 2012 # Worcestershire: 2010, 2012 # in these cases we'll compare the first year predicted inadequate with the year after that # construct the data frame firstyear<-rep(0,dim(cp)[1]) names(firstyear)<-cp$LA firstyear[cp$LA[completeLA2012][binpred1se2012==TRUE]]<-2012 firstyear[cp$LA[completeLA2011][binpred1se2011==TRUE]]<-2011 firstyear[cp$LA[completeLA2010][binpred1se2010==TRUE]]<-2010 predvars1st<-predvars2nd<-matrix(NA, nrow=dim(predmatrix2010)[1], ncol=dim(predmatrix2010)[2]+1) for(i in 1:length(firstyear)) { if(firstyear[i]==2010) { predvars1st[i,]<-c(cp$OfstedRating2010[i],predmatrix2010[i,]) predvars2nd[i,]<-c(cp$OfstedRating2011[i],predmatrix2011[i,]) } else if(firstyear[i]==2011) { predvars1st[i,]<-c(cp$OfstedRating2011[i],predmatrix2011[i,]) predvars2nd[i,]<-c(cp$OfstedRating2012[i],predmatrix2012[i,]) } else if(firstyear[i]==2012) { predvars1st[i,]<-c(cp$OfstedRating2012[i],predmatrix2012[i,]) predvars2nd[i,]<-c(cp$OfstedRating2013[i],predmatrix2013[i,]) } } rownames(predvars1st)<-rownames(predvars2nd)<-cp$LA colnames(predvars1st)<-colnames(predvars2nd)<-c('Ofsted',predvars) predvars1st<-cbind(firstyear,predvars1st) predvars2nd<-cbind(firstyear,predvars2nd) predvars1st<-predvars1st[firstyear!=0,] predvars2nd<-predvars2nd[firstyear!=0,] inadequate<-as.numeric(predvars1st[,'Ofsted']==4) inadequate[is.na(inadequate)]<-0 # prepare html file con<-file('cp-indicators.html',open='w') html_start(projecttitle='Child protection indicators: effect of Ofsted inadequate ratings') tableno<-figureno<-1 html_p(paste('Our starting point is LAs thaht we would predict to be Ofsted-inadequate in 2010-2012.', 'We then compare the first year in this underperforming state with the following year,', 'and contrast those that really were rated inadequate by Ofsted with those that', 'were not. In this way, we hope to account for regression to the mean (those that', 'happen to be having a bad year by chance will look better next year: is it Ofsted or', 'just natural fluctuations in caseload and workforce?')) tableno<-html_unitab(inadequate,caption='Rated inadequate by Ofsted?') tableno<-html_unitab(change[,'firstyear'], caption='First year we would statistically predict an inadequate rating') # calculate changes change<-predvars1st change[,3:dim(change)[2]]<-predvars2nd[,3:dim(change)[2]]-predvars1st[,3:dim(change)[2]] wilcoxresults<-matrix(NA,nrow=dim(change)[2]-2,ncol=7) colnames(wilcoxresults)<-c('adequate_median', 'adequate_IQR', 'adequate_%_worse', 'inadequate_median', 'inadequate_IQR', 'inadequate_%_worse', 'Wilcoxon p-value') rownames(wilcoxresults)<-predvars for(i in 3:dim(change)[2]) { html_h(colnames(change)[i],level=3) png(paste0('boxplot_change_',colnames(change)[i])) boxplot(change[,i]~inadequate,ylab='Change',main=colnames(change)[i]) dev.off() figureno<-html_img(paste0('boxplot_change_',colnames(change)[i]), figureno=figureno) wilcoxresults[i-2,1]<-median(change[inadequate==0,i],na.rm=TRUE) wilcoxresults[i-2,2]<-quantile(change[inadequate==0,i],probs=0.75,na.rm=TRUE)- quantile(change[inadequate==0,i],probs=0.25,na.rm=TRUE) wilcoxresults[i-2,3]<-100*(sum(!is.na(change[inadequate==0,i])&change[inadequate==0,i]<0)/ sum(!is.na(change[inadequate==0,i]))) wilcoxresults[i-2,4]<-median(change[inadequate==1,i],na.rm=TRUE) wilcoxresults[i-2,5]<-quantile(change[inadequate==1,i],probs=0.75,na.rm=TRUE)- quantile(change[inadequate==1,i],probs=0.25,na.rm=TRUE) wilcoxresults[i-2,6]<-100*(sum(!is.na(change[inadequate==1,i])&change[inadequate==1,i]<0)/ sum(!is.na(change[inadequate==1,i]))) # remember this test is about inference over temporal fluctuations because we have the whole population wilcoxresults[i-2,7]<-wilcox.test(change[inadequate==0,i],change[inadequate==1,i])$p.value } wilcoxresults<-round(wilcoxresults,digits=3) tableno<-html_anytab(wilcoxresults,tableno=tableno) close(con) # compare elastic-net (linear predictor) with k-nearest neighbours (non-parametric) cp2$inad10<-as.numeric(cp2$ofsted10==4) cp2$inad10[is.na(cp2$ofsted10)]<-0 cp2$inad11<-as.numeric(cp2$ofsted11==4) cp2$inad11[is.na(cp2$ofsted11)]<-0 cp2$inad12<-as.numeric(cp2$ofsted12==4) cp2$inad12[is.na(cp2$ofsted12)]<-0 cp2$inad13<-as.numeric(cp2$ofsted13==4) cp2$inad13[is.na(cp2$ofsted13)]<-0 ofstedplot<-function(x,y,z) { plot(x,y,col=(ifelse(z==1,'#ff0000','#000000'))) } ofstedplot(cp2$initial,cp2$prior,cp2$inad12) ofstedplot(cp2$initial,cp2$agency,cp2$inad12) ofstedplot(cp2$agency,cp2$prior,cp2$inad12) ofstedplot(cp$CINrAssessmentInitial10Days2013, cp$CINrPriorReferral2013, cp2$inad13) set.seed(6627) #knn2010<-knn(train=predmatrix2012[completeLA2012,], # test=predmatrix2010[completeLA2010,], # cl=as.factor(cp2$ofsted4[completeLA2012]), # k=10, # prob=TRUE) knn2010<-knn(train=cp2[completeLA2012,c('initial', 'prior', 'agency')], test=cp[completeLA2010,c('CINrAssessmentInitial10Days2010', 'CINrPriorReferral2010', 'WFrAgencyWorkerRate2010')], cl=as.factor(cp2$inad12[completeLA2012]), k=10, prob=TRUE) table(knn2010,cp2$ofsted4[completeLA2010]) # knn fails to predict any inadequates in 2010 knn2011<-knn(train=cp2[completeLA2012,c('CINrAssessmentInitial10Days2012', 'CINrPriorReferral2012', 'WFrVacancyRate2012', 'WFrAgencyWorkerRate2012')], test=cp2[completeLA2011,c('CINrAssessmentInitial10Days2011', 'CINrPriorReferral2011', 'WFrVacancyRate2011', 'WFrAgencyWorkerRate2011')], cl=as.factor(cp2$ofsted4[completeLA2012]), k=3, prob=TRUE) table(knn2011,cp2$ofsted4[completeLA2011])