We will take a look at the file of liverdata.RData which contains data on liver resection. The goal is to carry out a statistical analysis in order to investigate the survival functions of the individuals in the dataset.
load("F:\\TUD lecture\\SA Lecture\\R-Programming\\liverdata.RData")
liver_data<-as_tibble(liver_data)
colnames(liver_data)
[1] "overall" "diseasefree" "death" "recurrence"
[5] "age" "height" "weight" "bmi"
[9] "sex" "complications" "resection_vol" "pringle"
[13] "overall_yrs" "diseasefree_yrs"
#Total number of death and censored:
table(liver_data$death)
0 1
135 157
#Death proportions between gender :
with(liver_data,prop.table(table(death,sex),2))
sex
death male female
0 0.4717949 0.4432990
1 0.5282051 0.5567010
#Proportions death rate and censored respective to the gender:
with(liver_data,prop.table(table(death,sex),1))
sex
death male female
0 0.6814815 0.3185185
1 0.6560510 0.3439490
#Total number of death based on SEX:
table(liver_data$death,liver_data$sex) #or
male female
0 92 43
1 103 54
with(liver_data,table(death,sex))
sex
death male female
0 92 43
1 103 54
Visualization:
library(ggplot2)
ggplot(liver_data,aes(x=sex,y=overall_yrs))+
geom_boxplot()+
labs(x = "Sex", y = "Overall years") +
theme_bw()
Female are less likely to spread than males in the upper quantile
library(survminer)
library(survival)
liver_data<-mutate(liver_data,sex=as.character(sex))
km<- survfit(Surv(overall_yrs,death)~sex,data=liver_data)
km1<-survfit(Surv(diseasefree,recurrence)~1,data=liver_data)
surv_pvalue(km)
summary(km)
Call: survfit(formula = Surv(overall_yrs, death) ~ sex, data = liver_data)
sex=female
time n.risk n.event survival std.err lower 95% CI upper 95% CI
0.0383 97 1 0.990 0.0103 0.970 1.000
0.1013 95 1 0.979 0.0145 0.951 1.000
0.1834 94 1 0.969 0.0177 0.935 1.000
0.4901 93 1 0.958 0.0203 0.919 0.999
0.5640 92 1 0.948 0.0226 0.905 0.993
0.6516 91 1 0.938 0.0247 0.890 0.987
0.7337 88 1 0.927 0.0266 0.876 0.981
0.7858 87 1 0.916 0.0283 0.862 0.974
0.8323 86 1 0.906 0.0299 0.849 0.966
0.8679 85 1 0.895 0.0314 0.835 0.959
1.1499 84 1 0.884 0.0328 0.822 0.951
1.1882 83 1 0.874 0.0341 0.809 0.943
1.2320 82 1 0.863 0.0353 0.797 0.935
1.2731 80 1 0.852 0.0365 0.784 0.927
1.3388 79 1 0.841 0.0376 0.771 0.918
1.3689 78 1 0.831 0.0386 0.758 0.910
1.3744 77 1 0.820 0.0396 0.746 0.901
1.5332 75 1 0.809 0.0405 0.733 0.892
1.5962 74 1 0.798 0.0414 0.721 0.884
1.8207 72 1 0.787 0.0423 0.708 0.874
1.8344 71 1 0.776 0.0432 0.696 0.865
1.8398 70 1 0.765 0.0439 0.683 0.856
1.9384 69 1 0.754 0.0447 0.671 0.847
2.1793 68 1 0.743 0.0454 0.659 0.837
2.2697 66 1 0.731 0.0461 0.646 0.827
2.3244 64 1 0.720 0.0467 0.634 0.818
2.5927 62 1 0.708 0.0474 0.621 0.808
2.6858 61 1 0.697 0.0480 0.609 0.797
2.9322 57 1 0.684 0.0487 0.595 0.787
2.9843 56 1 0.672 0.0494 0.582 0.776
3.0116 54 1 0.660 0.0500 0.569 0.765
3.2060 53 1 0.647 0.0506 0.555 0.754
3.3320 51 1 0.635 0.0511 0.542 0.743
3.4031 49 1 0.622 0.0517 0.528 0.732
3.4114 48 1 0.609 0.0522 0.515 0.720
3.4579 47 1 0.596 0.0527 0.501 0.709
3.7098 44 1 0.582 0.0532 0.487 0.696
3.7864 41 1 0.568 0.0538 0.472 0.684
3.8138 40 1 0.554 0.0543 0.457 0.671
4.0575 36 1 0.538 0.0549 0.441 0.658
4.3094 35 1 0.523 0.0555 0.425 0.644
4.6023 34 1 0.508 0.0559 0.409 0.630
4.6242 33 1 0.492 0.0563 0.393 0.616
4.6626 30 1 0.476 0.0568 0.377 0.601
4.7228 25 1 0.457 0.0576 0.357 0.585
4.9090 23 1 0.437 0.0584 0.336 0.568
5.1882 18 1 0.413 0.0600 0.310 0.549
5.2594 17 1 0.388 0.0612 0.285 0.529
5.3607 16 1 0.364 0.0620 0.261 0.508
5.4894 14 1 0.338 0.0628 0.235 0.487
5.5277 13 1 0.312 0.0631 0.210 0.464
5.8207 12 1 0.286 0.0630 0.186 0.441
6.0123 11 1 0.260 0.0624 0.163 0.416
6.6886 6 1 0.217 0.0653 0.120 0.391
sex=male
time n.risk n.event survival std.err lower 95% CI upper 95% CI
0.0137 195 2 0.990 0.00722 0.976 1.000
0.0383 190 1 0.985 0.00886 0.967 1.000
0.0575 189 1 0.979 0.01023 0.959 1.000
0.0712 188 1 0.974 0.01143 0.952 0.997
0.0767 187 1 0.969 0.01250 0.945 0.994
0.0958 186 1 0.964 0.01347 0.938 0.990
0.1424 185 1 0.958 0.01437 0.931 0.987
0.2245 184 1 0.953 0.01521 0.924 0.984
0.3723 182 1 0.948 0.01600 0.917 0.980
0.3943 181 1 0.943 0.01675 0.911 0.976
0.4025 180 1 0.938 0.01745 0.904 0.972
0.4298 179 1 0.932 0.01813 0.897 0.969
0.5886 174 1 0.927 0.01880 0.891 0.965
0.6297 173 1 0.922 0.01944 0.884 0.961
0.6543 171 1 0.916 0.02006 0.878 0.956
0.7036 170 1 0.911 0.02065 0.871 0.952
0.7337 169 1 0.905 0.02122 0.865 0.948
0.8159 166 2 0.895 0.02232 0.852 0.939
0.8761 164 1 0.889 0.02284 0.845 0.935
0.8816 163 1 0.884 0.02334 0.839 0.931
0.9391 162 1 0.878 0.02383 0.833 0.926
0.9446 161 1 0.873 0.02430 0.826 0.922
0.9500 160 1 0.867 0.02475 0.820 0.917
0.9966 159 1 0.862 0.02519 0.814 0.913
1.0021 158 1 0.856 0.02561 0.808 0.908
1.1855 157 1 0.851 0.02602 0.801 0.903
1.2074 156 1 0.845 0.02642 0.795 0.899
1.2676 155 1 0.840 0.02681 0.789 0.894
1.2923 154 1 0.835 0.02718 0.783 0.890
1.3443 151 1 0.829 0.02756 0.777 0.885
1.5578 148 1 0.823 0.02794 0.770 0.880
1.6044 147 1 0.818 0.02830 0.764 0.875
1.6290 146 1 0.812 0.02866 0.758 0.870
1.6728 145 1 0.807 0.02900 0.752 0.865
1.7714 142 1 0.801 0.02935 0.745 0.861
1.8261 141 1 0.795 0.02969 0.739 0.856
1.8809 140 1 0.790 0.03001 0.733 0.851
1.9603 137 1 0.784 0.03034 0.727 0.846
1.9986 136 1 0.778 0.03066 0.720 0.841
2.1793 133 1 0.772 0.03098 0.714 0.835
2.2177 132 1 0.766 0.03130 0.707 0.830
2.2505 131 1 0.760 0.03160 0.701 0.825
2.2752 128 1 0.755 0.03191 0.695 0.820
2.3053 127 1 0.749 0.03220 0.688 0.814
2.3244 125 1 0.743 0.03250 0.682 0.809
2.3326 124 1 0.737 0.03278 0.675 0.804
2.3819 123 2 0.725 0.03333 0.662 0.793
2.4285 119 1 0.719 0.03360 0.656 0.788
2.4778 118 1 0.712 0.03386 0.649 0.782
2.5079 117 1 0.706 0.03411 0.643 0.777
2.6256 115 1 0.700 0.03437 0.636 0.771
2.6831 114 1 0.694 0.03461 0.629 0.765
2.7296 112 1 0.688 0.03485 0.623 0.760
2.7351 111 1 0.682 0.03508 0.616 0.754
2.8611 110 1 0.676 0.03531 0.610 0.748
2.8720 109 1 0.669 0.03552 0.603 0.743
2.9350 107 1 0.663 0.03574 0.597 0.737
2.9377 104 1 0.657 0.03596 0.590 0.731
3.0144 101 1 0.650 0.03619 0.583 0.725
3.0171 100 1 0.644 0.03640 0.576 0.719
3.0253 97 1 0.637 0.03663 0.569 0.713
3.0308 96 1 0.630 0.03684 0.562 0.707
3.2088 93 1 0.624 0.03706 0.555 0.701
3.2416 92 1 0.617 0.03728 0.548 0.694
3.4141 90 1 0.610 0.03749 0.541 0.688
3.4552 87 1 0.603 0.03771 0.533 0.682
3.5921 86 1 0.596 0.03791 0.526 0.675
3.6249 85 1 0.589 0.03811 0.519 0.669
3.6797 83 1 0.582 0.03831 0.511 0.662
3.6988 82 2 0.568 0.03866 0.497 0.649
3.8084 79 1 0.560 0.03884 0.489 0.642
3.9179 74 1 0.553 0.03904 0.481 0.635
3.9890 72 1 0.545 0.03925 0.473 0.628
4.0027 71 1 0.538 0.03944 0.466 0.621
4.0657 68 1 0.530 0.03964 0.457 0.613
4.3066 67 1 0.522 0.03983 0.449 0.606
4.3258 66 1 0.514 0.04001 0.441 0.599
4.3559 65 1 0.506 0.04016 0.433 0.591
4.3723 64 1 0.498 0.04031 0.425 0.584
4.6735 63 1 0.490 0.04044 0.417 0.576
4.7967 60 1 0.482 0.04058 0.409 0.568
4.8706 57 1 0.473 0.04074 0.400 0.560
5.1280 54 1 0.465 0.04092 0.391 0.552
5.1855 52 1 0.456 0.04109 0.382 0.544
5.3279 50 1 0.447 0.04127 0.373 0.535
5.3634 49 1 0.438 0.04142 0.363 0.527
5.7166 44 1 0.428 0.04166 0.353 0.518
5.7276 43 1 0.418 0.04186 0.343 0.508
5.7413 42 1 0.408 0.04203 0.333 0.499
5.7659 41 1 0.398 0.04216 0.323 0.490
5.7851 40 1 0.388 0.04226 0.313 0.480
5.8754 39 1 0.378 0.04233 0.303 0.471
6.0205 35 1 0.367 0.04248 0.293 0.461
6.2122 33 1 0.356 0.04262 0.282 0.450
6.5517 26 1 0.342 0.04313 0.267 0.438
6.8118 23 1 0.327 0.04374 0.252 0.425
7.2060 21 1 0.312 0.04435 0.236 0.412
8.1424 14 1 0.290 0.04644 0.211 0.396
8.3066 12 1 0.265 0.04843 0.186 0.380
Separating gender data:
male<-filter(liver_data,sex == "male")
female<-filter(liver_data,sex == "female")
male_km<-survfit(Surv(overall_yrs,death)~1,data=male)
female_km<-survfit(Surv(overall_yrs,death)~1,data=female)
Visualization:
library(GGally)
male_viz<-ggsurv(male_km, CI=TRUE,plot.cens=TRUE, surv.col= "gg.def", cens.col="gg.def", lty.est=1, size.est= 2, size.ci=1, back.white= TRUE, xlab="Time", ylab="Survival")+ggtitle("Survival of Males")
female_viz<-ggsurv(female_km,CI=TRUE)+ggtitle("Survival of Females")+theme_bw()
library(patchwork)
male_viz|female_viz
Survival rates of both gender monotonically decreasing and on comparison males have a bit high survival chance than females
na<-survfit(coxph(Surv(diseasefree,recurrence)~1,data=liver_data),type='aalen')
na_d<-ggsurv(na,CI= TRUE)+
ggtitle("Nelson-AAlen Estimation")+
theme_bw()+
ylim(c(0,1))
na_d
library(discSurv)
liver_data<-mutate(liver_data, diseasefree_months=ceiling(diseasefree/30.4),diseasefree_years=ceiling(diseasefree_yrs))
data<-as.data.frame(liver_data)
#Days:
lifetable_liverresection_days<-lifeTable(dataSet= data,timeColumn="diseasefree", censColumn="recurrence")
lifetable_liverresection_days<-lifetable_liverresection_days$Output
#Months:
lifetable_liverresection_months<-lifeTable(dataSet= data,timeColumn="diseasefree_months", censColumn="recurrence")
lifetable_liverresection_months<-lifetable_liverresection_months$Output
#years:
lifetable_liverresection_years<-lifeTable(dataSet= data,timeColumn="diseasefree_years", censColumn="recurrence")
lifetable_liverresection_years<-lifetable_liverresection_years$Output
alpha=0.5
#Days
lifetable_liverresection_days_v<-ggplot(lifetable_liverresection_days,aes(x=1:nrow(lifetable_liverresection_days),y=S))+
geom_line()+
geom_ribbon(data=lifetable_liverresection_days, aes(ymin=(S-(qnorm(1-(alpha/2))*seS)),ymax=(S+(qnorm(1-(alpha/2))*seS))),alpha=0.2)+
lims(y=c(0,1))+
xlab("Days")+
ylab("S(t)")+
ggtitle("Lifetable Estimation")+theme_bw()
#Months
lifetable_liverresection_months_v<-ggplot(lifetable_liverresection_months,aes(x=1:nrow(lifetable_liverresection_months),y=S))+
geom_line()+
geom_ribbon(data=lifetable_liverresection_months, aes(ymin=S-qnorm(1-alpha/2)*seS,ymax=S+qnorm(1-alpha/2)*seS),alpha=0.2)+
lims(y=c(0,1))+
xlab("Months")+
ylab("S(t)")+
ggtitle("Disease Free Survival- Lifetable")+theme_bw()
#Years
lifetable_liverresection_years_v<-ggplot(lifetable_liverresection_years,aes(x=1:nrow(lifetable_liverresection_years),y=S))+
geom_line()+
geom_ribbon(data=lifetable_liverresection_years, aes(ymin=S-qnorm(1-alpha/2)*seS,ymax=S+qnorm(1-alpha/2)*seS),alpha=0.2)+
lims(y=c(0,1))+
xlab("Years")+
ylab("S(t)")+
ggtitle("Disease Free Survival- Lifetable")+theme_bw()
lifetable_liverresection_days_v | lifetable_liverresection_months_v / lifetable_liverresection_years_v
Comparing all Three estimation:
lifetable_liverresection_days_v|km_d|na_d
female_old<-filter(liver_data , sex=="female" & age>70)
summary(female_old)
overall diseasefree death recurrence
Min. : 14 Min. : 14.0 Min. :0.0000 Min. :0.0000
1st Qu.: 733 1st Qu.: 278.0 1st Qu.:0.0000 1st Qu.:1.0000
Median :1246 Median : 547.0 Median :1.0000 Median :1.0000
Mean :1186 Mean : 735.4 Mean :0.5652 Mean :0.8261
3rd Qu.:1570 3rd Qu.:1197.0 3rd Qu.:1.0000 3rd Qu.:1.0000
Max. :2443 Max. :2443.0 Max. :1.0000 Max. :1.0000
age height weight bmi
Min. :71.0 Min. :1.53 Min. :48.00 Min. :18.29
1st Qu.:72.0 1st Qu.:1.60 1st Qu.:59.50 1st Qu.:22.46
Median :74.0 Median :1.62 Median :65.00 Median :25.39
Mean :74.7 Mean :1.62 Mean :65.37 Mean :24.97
3rd Qu.:77.0 3rd Qu.:1.65 3rd Qu.:71.00 3rd Qu.:27.79
Max. :82.0 Max. :1.70 Max. :83.00 Max. :32.88
sex complications resection_vol pringle
Length:23 minor:17 Min. : 0.0 no :19
Class :character major: 6 1st Qu.: 48.5 yes: 4
Mode :character Median : 250.0
Mean : 401.7
3rd Qu.: 632.5
Max. :1980.0
overall_yrs diseasefree_yrs diseasefree_months diseasefree_years
Min. :0.03833 Min. :0.03833 Min. : 1.0 Min. :1.000
1st Qu.:2.00685 1st Qu.:0.76112 1st Qu.:10.0 1st Qu.:1.000
Median :3.41136 Median :1.49760 Median :18.0 Median :2.000
Mean :3.24614 Mean :2.01351 Mean :24.7 Mean :2.565
3rd Qu.:4.29979 3rd Qu.:3.27721 3rd Qu.:40.0 3rd Qu.:4.000
Max. :6.68857 Max. :6.68857 Max. :81.0 Max. :7.000
library(broom)
km_f_log<- survfit(Surv(diseasefree,recurrence)~1,data=female_old)
km_f_logit<- survfit(Surv(diseasefree,recurrence)~1,data=female_old, conf.type="logit")
km_f_loglog<- survfit(Surv(diseasefree,recurrence)~1,data=female_old, conf.type="log-log")
km_f_plain<- survfit(Surv(diseasefree,recurrence)~1,data=female_old, conf.type="plain")
t<- function(data){
data1<- tidy(data)
return(data1)
}
km_f_loglog<-t(km_f_loglog)
km_f_logit<-t(km_f_logit)
km_f_plain<-t(km_f_plain)
#visulation:
km_f_d_ci<-ggsurv(km_f_log)+
geom_step(data=km_f_loglog,aes(x=time,y=conf.low),col="blue")+
geom_step(data=km_f_loglog,aes(x=time,y=conf.high),col="blue")+
geom_step(data=km_f_logit,aes(x=time,y=conf.low),col="green")+
geom_step(data=km_f_logit,aes(x=time,y=conf.high),col="green")+
geom_step(data=km_f_plain,aes(x=time,y=conf.low),col="red")+
geom_step(data=km_f_plain,aes(x=time,y=conf.high),col="red")+
labs("Kaplan-Meier")+
theme_bw()+
xlim(c(0,2000))
na_f_log<- survfit(Surv(diseasefree,recurrence)~1,data=female_old,stype = 2)
na_f_logit<- survfit(Surv(diseasefree,recurrence)~1,data=female_old, conf.type="logit",stype = 2)
na_f_loglog<- survfit(Surv(diseasefree,recurrence)~1,data=female_old, conf.type="log-log",stype = 2)
na_f_plain<- survfit(Surv(diseasefree,recurrence)~1,data=female_old, conf.type="plain",stype = 2)
na_f_loglog<-t(na_f_loglog)
na_f_logit<-t(na_f_logit)
na_f_plain<-t(na_f_plain)
#visulation:
na_f_d_ci<-ggsurv(na_f_log)+
geom_step(data=na_f_loglog,aes(x=time,y=conf.low),col="blue")+
geom_step(data=na_f_loglog,aes(x=time,y=conf.high),col="blue")+
geom_step(data=na_f_logit,aes(x=time,y=conf.low),col="green")+
geom_step(data=na_f_logit,aes(x=time,y=conf.high),col="green")+
geom_step(data=na_f_plain,aes(x=time,y=conf.low),col="red")+
geom_step(data=na_f_plain,aes(x=time,y=conf.high),col="red")+
labs("Nelson-Aalen")+
theme_bw()+
xlim(c(0,2000))
ci<- km_f_d_ci|na_f_d_ci
ci+ plot_annotation(caption = "CI legend: black: default, green: logit scale, red: plain (Wald),blue: loglog scale")
#Log-rank test:
survdiff(Surv(overall,death) ~ sex,data = liver_data)
Call:
survdiff(formula = Surv(overall, death) ~ sex, data = liver_data)
N Observed Expected (O-E)^2/E (O-E)^2/V
sex=female 97 54 50.6 0.222 0.33
sex=male 195 103 106.4 0.106 0.33
Chisq= 0.3 on 1 degrees of freedom, p= 0.6
#Wilcoxon test:
survdiff(Surv(overall,death) ~ sex,data = liver_data, rho=1)
Call:
survdiff(formula = Surv(overall, death) ~ sex, data = liver_data,
rho = 1)
N Observed Expected (O-E)^2/E (O-E)^2/V
sex=female 97 37.3 36.1 0.0416 0.0825
sex=male 195 71.8 73.0 0.0206 0.0825
Chisq= 0.1 on 1 degrees of freedom, p= 0.8
km_hz<- survfit(Surv(diseasefree,recurrence)~1,data=liver_data)
str(km_hz)
List of 16
$ n : int 292
$ time : num [1:272] 1 5 10 11 12 14 16 21 26 27 ...
$ n.risk : num [1:272] 292 291 289 288 287 286 284 283 282 281 ...
$ n.event : num [1:272] 1 2 0 0 0 2 0 1 1 1 ...
$ n.censor : num [1:272] 0 0 1 1 1 0 1 0 0 0 ...
$ surv : num [1:272] 0.997 0.99 0.99 0.99 0.99 ...
$ std.err : num [1:272] 0.00343 0.00596 0.00596 0.00596 0.00596 ...
$ cumhaz : num [1:272] 0.00342 0.0103 0.0103 0.0103 0.0103 ...
$ std.chaz : num [1:272] 0.00342 0.00595 0.00595 0.00595 0.00595 ...
$ type : chr "right"
$ logse : logi TRUE
$ conf.int : num 0.95
$ conf.type: chr "log"
$ lower : num [1:272] 0.99 0.978 0.978 0.978 0.978 ...
$ upper : num [1:272] 1 1 1 1 1 ...
$ call : language survfit(formula = Surv(diseasefree, recurrence) ~ 1, data = liver_data)
- attr(*, "class")= chr "survfit"
hz<- with(km_hz,data.frame(
time=time[n.event!=0],
hazard=diff(c(0, cumhaz[n.event !=0]))/
diff(c(0, time[n.event !=0]))
))
hz_piecewise<- with(liver_data,
muhaz::pehaz(diseasefree,recurrence,max.time = 2500))
max.time= 2500
width= 108.0886
nbins= 24
hz_piecewise_bin_specific<-with(liver_data,
muhaz::pehaz(diseasefree,recurrence,max.time = 2500, width=20))
max.time= 2500
width= 20
nbins= 125
hz_piecewise_smooth<-with(liver_data,
muhaz::muhaz(diseasefree,recurrence,max.time = 2500))
hz_piecewise<- data.frame(time=hz_piecewise$Cuts[-1],
hazard=hz_piecewise$Hazard,
CHaz=cumsum(c(0,hz_piecewise$Hazard*hz_piecewise$Width))[-1]
)
hz_piecewise_smooth<- data.frame(time=hz_piecewise_smooth$est.grid,
hazard=hz_piecewise_smooth$haz.est,
CHaz=cumsum(hz_piecewise_smooth$haz.est*diff(c(0,hz_piecewise_smooth$est.grid))))
hz_piecewise_bin_specific<- data.frame(time=hz_piecewise_bin_specific$Cuts[-1],
hazard=hz_piecewise_bin_specific$Hazard,
CHaz=cumsum(c(0,hz_piecewise_bin_specific$Hazard*hz_piecewise_bin_specific$Width))[-1])
hz$CHaz<-cumsum(c(0,(diff(hz$time)))*hz$hazard)
hz_piecewise<-mutate(hz_piecewise,method=rep("Piecewise"))
hz_piecewise_smooth<-mutate(hz_piecewise_smooth,method=rep("Smooth"))
hz_piecewise_bin_specific<-mutate(hz_piecewise_bin_specific,method=rep("Bin=10"))
hz<-mutate(hz,method=rep("Definition"))
hz_c<-rbind(select(hz,time,hazard,CHaz,method),
hz_piecewise,
hz_piecewise_bin_specific,
hz_piecewise_smooth)
##hazard##
ggplot(hz_c,aes(time,hazard,col=method))+
geom_step()+
labs(y=expression(hat(lambda)(t)),x="time")+
scale_color_viridis_d()+
theme_bw()+
theme(legend.position = "top")
##Cumulative hazard##
ggplot(hz_c,aes(time,CHaz,col=method))+
geom_line()+
labs(y=expression(hat(Lambda)(t)),x="time")+
scale_color_viridis_d()+
theme_bw()+
theme(legend.position = "top")