#Commands to the trial project

 

# This only gives the R commands for the trial project.

# It is not a draft solution of the project.

 

# Read the data into R and attach the survival library

cirrhosis=read.table("http://www.uio.no/studier/emner/matnat/math/STK4080/h12/cirrhosis.txt", header=T)

library(survival)

 

# Define treatment, sex and acites as factors (the reference group is then coded as 0)

cirrhosis$treat=factor(cirrhosis$treat)

cirrhosis$sex=factor(cirrhosis$sex)

cirrhosis$asc=factor(cirrhosis$asc)

 

# Use years as time scale

cirrhosis$time=cirrhosis$time/365.25

 

 

# a)

#  Simple univariate analyses for one covariate at a time

 

# Treatment

 

# Nelson-Aalen plots:

fit.naa=coxph(Surv(time,status)~strata(treat),data=cirrhosis)

surv.naa=survfit(fit.naa)

plot(surv.naa,fun="cumhaz", mark.time=F ,lty=1:2, xlab="Years since randomization",ylab="Cumulative hazard", main="Treatment")

legend("topleft",legend=c("Prednisone","Placebo"),lty=1:2)

 

# Kaplan-Meier plots:

fit.km=survfit(Surv(time,status)~treat,data=cirrhosis, conf.type="plain")

plot(fit.km, mark.time=F, lty=1:2, xlab="Years since randomization", ylab="Survival",main="Treatment")

legend("topright",legend=c("Prednisone","Placebo"),lty=1:2)

 

# Estimates of five years survival probabilities with "plain" confidence intervals (alternatively we could have used the option "log-log")

summary(fit.km,time=5)

 

# Estimates of median survival time

print(fit.km)

 

# Log-rank test:

survdiff(Surv(time,status)~treat,data=cirrhosis)

 

# The commands for sex and ascites are similar to the commands for treatment

 

# For numeric covariates age and prothrombin index, we have create a categorical covariate (factor) by a "reasonable" grouping

 

# For age

cirrhosis$agegroup=cut(cirrhosis$age,breaks=c(0,49,59,69,100), labels=1:4)

 

# For prothrombin index (approximately according to quartiles)

cirrhosis$protgroup=cut(cirrhosis$prot,breaks=c(0,49,69,89,150), labels=1:4)

 

# After we have obtained categorical covariates for age and prothrombin index, the commands are similar to the ones given above for treatment

 

 

# d) Univariate Cox regressions

 

# Cox regression for treatment

cox.treat=coxph(Surv(time,status)~treat,data=cirrhosis)

summary(cox.treat)

 

# The commands for sex and ascites are similar to the commands for treatment

 

# For the numeric covariates age and prothrombin index, we need to decide how thay should be coded  

# (as given on the data file, or suitably transformed, or grouped).

 

# To see how age should be coded, we fit a model using a spline for age:

cox.psage=coxph(Surv(time,status)~pspline(age),data=cirrhosis)

print(cox.psage)

termplot(cox.psage,se=T)

 

# Both the plot (from the templot-command) and the test (from the print-command) show that it is reasonable to assume a log-linear effect of age

 

# We will therefore fit a Cox model using age as a numbeic covariate.

# It may be sensible to report the effect of age per 10 years, so we define a new covariate

# where age is given per 10 years and fit a Cox model with this covariate

cirrhosis$age10= cirrhosis$age/10

cox.age10=coxph(Surv(time,status)~age10,data=cirrhosis)

summary(cox.age10)

 

# Then we use a spline fit to see how prothrombin index should be coded

cox.psprot=coxph(Surv(time,status)~pspline(prot),data=cirrhosis)

print(cox.psprot)

termplot(cox.psprot,se=T)

 

# The plot indicates that we do not have a log-linear effect for low prothrombin values.

# As there is not easy to find a suitable transformation, we chose to work with grouped prothrombin index.

# (The non-linear part of prothrombin index is not significant, so could also be acceptable to use prothrombin index as a numeric covariate)

 

# When using grouped prothrombin index as a categorical covariate, it is natural to use group with highest values as reference:

cirrhosis$protgroup=relevel(cirrhosis$protgroup,ref=4)

cox.protgroup=coxph(Surv(time,status)~protgroup,data=cirrhosis)

summary(cox.protgroup)

 

 

# c) Multivariate Cox regression

 

# We then fit a Cox model with all the covariates

cox.all=coxph(Surv(time,status)~treat+sex+asc+age10+ protgroup, data=cirrhosis)

summary(cox.all)

 

# [In principle it may be the case that the coding of a numeric covariate that is appropriate for a univariate analysis,

# is not appropriate for a multivariate analysis and vice versa. But this does not seem to be the case here (commands not shown)]

 

# We then check for first order interactions between any pair of two covariates:

# We may e.g. check for interaction between treatment and sex by the commands:

cox.int=coxph(Surv(time,status)~treat+sex+asc+age10+ protgroup+treat:sex, data=cirrhosis)

anova(cox.all,cox.int)

 

# Similar commands may be used to check other first order interactions

# [Note, however, that for checking interaction between ascites and prothrombin group,

# we need to merge the two highest prothrombin groups (since there is only one person with severe ascites in the highest prothrombin group)]

 

# We find an interaction between treatment and ascites and between sex and age.

# To ease the interpretation of the interaction between sex and the numeric covariate age, it is useful to

# center age by subtracting 60 years (which is close to the mean age)

cirrhosis$cage10=(cirrhosis$age-60)/10

cox.final=coxph(Surv(time,status)~treat+sex+asc+cage10+protgroup +treat:asc+sex:cage10, data=cirrhosis)

summary(cox.final)

 

 

# Log-linearity of the numeric covariates has been checked along the way using splines

# [both for the unvariate and mulivariate Cox models (the latter commands not given here)]

 

# We also need to check for proportional hazards:

cox.test=cox.zph(cox.final,transform='log')

print(cox.test)

par(mfrow=c(2,6))

plot(cox.test)

 

# There are no indication of violation of the assumption of proportional hazard

# We will then check the global fit of the model

surv.final=survfit(cox.final)

 

# Make 4 groups according to the values of the prognostic index

break.pi=quantile(cox.final$linear.predictor)

cirrhosis$pred.group=cut(cox.final$linear.predictor,break.pi, include.lowest=T,label=1:4)

 

# Make Kaplan-Meier plots for the four groups:

par(mfrow=c(1,1))

plot(survfit(Surv(time,status)~pred.group,data=cirrhosis),mark.time=F,lty=1:4)

 

# Add lines for the survival function estimates based on the fitted Cox model

# for the mean value of the prognositc index within each group

for (i in 1:4)

{

mean.pi=mean(cox.final$linear.predictor[cirrhosis$pred.group==i])

lines(surv.final$time,surv.final$surv^exp(mean.pi),type="s",lty=i,col="red")

}

# For illustration we plot the estimated survival function for the following combinations of the covariates

#    1) prednisone, female, no ascites , age 60, prothrombin above 90

#    2) prednisone, female, slight ascites , age 60, prothrombin 50-69

#    3) prednisone, female, slight ascites , age 60, prothrombin below 50

#    4) prednisone, female, marked ascites , age 60, prothrombin below 50

#    5) prednisone, female, no ascites , age 60, prothrombin above 90

#    6) prednisone, female, slight ascites , age 60, prothrombin 50-69

#    7) prednisone, female, slight ascites , age 60, prothrombin below 50

#    8) prednisone, female, marked ascites , age 60, prothrombin below 50

 

cox.final=coxph(Surv(time,status)~treat+sex+asc+cage10+protgroup +treat:asc+sex:cage10, data=cirrhosis)

par(mfrow=c(1,2))

new.covariates=data.frame(treat=c("0","0","0","0"),sex=c("0","0","0","0"),asc=c("0","1","1","2"),cage10=c(0,0,0,0),protgroup=c("4","2","1","1"))

surv.final=survfit(cox.final,newdata=new.covariates)

plot(surv.final,mark.time=F, xlab="Years since randomization", ylab="Survival",lty=1:4, main="Prednisone")

legend("topright",c("1","2","3","4"),lty=1:4)

new.covariates=data.frame(treat=c("1","1","1","1"),sex=c("0","0","0","0"),asc=c("0","1","1","2"),cage10=c(0,0,0,0),protgroup=c("4","2","1","1"))

surv.final=survfit(cox.final,newdata=new.covariates)

plot(surv.final,mark.time=F, xlab="Years since randomization", ylab="Survival",lty=1:4, main="Placebo")

legend("topright",c("5","6","7","8"),lty=1:4)