library(survival)
survival <-read.csv(file ="Graveyard_Survival.csv")
attach(survival)
survival$Death_Age <- as.numeric(Death_Year-Birth_Year)
survival$Event<-as.logical(rep("TRUE",length(survival$Death_Age)))
survival$Century<-as.factor(ifelse(survival$Birth_Year<1900,"19th","20th"))
# Plotting survival curves by sex
sfit2<-survfit(Surv(Death_Age, Event)~Sex, data=survival)
summary(sfit2, times=seq(0,100,10))
## Call: survfit(formula = Surv(Death_Age, Event) ~ Sex, data = survival)
##
## Sex=F
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0 262 0 1.0000 0.00000 1.00000 1.0000
## 10 233 34 0.8702 0.02076 0.83047 0.9119
## 20 208 23 0.7824 0.02549 0.73405 0.8340
## 30 177 31 0.6641 0.02918 0.60933 0.7238
## 40 155 20 0.5878 0.03041 0.53111 0.6505
## 50 134 25 0.4924 0.03089 0.43540 0.5568
## 60 105 26 0.3931 0.03018 0.33822 0.4570
## 70 78 26 0.2939 0.02814 0.24360 0.3546
## 80 45 32 0.1718 0.02330 0.13165 0.2241
## 90 7 40 0.0191 0.00845 0.00801 0.0455
##
## Sex=M
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0 288 0 1.00000 0.00000 1.00000 1.0000
## 10 257 33 0.88542 0.01877 0.84938 0.9230
## 20 230 28 0.78819 0.02408 0.74239 0.8368
## 30 200 31 0.68056 0.02747 0.62878 0.7366
## 40 157 44 0.52778 0.02942 0.47316 0.5887
## 50 128 26 0.43750 0.02923 0.38380 0.4987
## 60 101 29 0.33681 0.02785 0.28642 0.3961
## 70 53 46 0.17708 0.02249 0.13806 0.2271
## 80 21 32 0.06597 0.01463 0.04272 0.1019
## 90 3 17 0.00694 0.00489 0.00175 0.0276
plot(sfit2) # pretty bad

#devtools::install_github("kassambara/survminer", build_vignettes = FALSE)
#devtools::install_github("kassambara/ggpubr")
library(ggpubr)
## Loading required package: ggplot2
## Loading required package: magrittr
library(survminer)
# ggplot survival curves: not as curvy as how we calculated
# ggsurvplot(sfit2)
ggsurvplot(sfit2, conf.int=TRUE, pval=TRUE, risk.table=FALSE, legend.title="Sex",
legend.labs=c("Female", "Male"),
palette=c("orchid2","dodgerblue2"),
main="Kaplan-Meier Curve for Survival between sexes")

# Plotting survival curves by century
sfit3<-survfit(Surv(Death_Age, Event)~Century, data=survival)
summary(sfit3, times=seq(0,100,10))
## Call: survfit(formula = Surv(Death_Age, Event) ~ Century, data = survival)
##
## Century=19th
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0 293 0 1.00000 0.00000 1.00000 1.0000
## 10 258 41 0.86007 0.02027 0.82125 0.9007
## 20 224 32 0.75085 0.02527 0.70293 0.8020
## 30 177 49 0.58362 0.02880 0.52982 0.6429
## 40 135 38 0.45392 0.02909 0.40035 0.5147
## 50 113 23 0.37543 0.02829 0.32388 0.4352
## 60 92 22 0.30034 0.02678 0.25218 0.3577
## 70 59 31 0.19454 0.02313 0.15411 0.2456
## 80 28 29 0.09556 0.01718 0.06719 0.1359
## 90 4 26 0.00683 0.00481 0.00172 0.0272
##
## Century=20th
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0 257 0 1.0000 0.00000 1.00000 1.0000
## 10 232 26 0.8988 0.01881 0.86271 0.9365
## 20 214 19 0.8249 0.02371 0.77972 0.8727
## 30 200 13 0.7743 0.02608 0.72486 0.8272
## 40 177 26 0.6732 0.02926 0.61818 0.7330
## 50 149 28 0.5642 0.03093 0.50672 0.6282
## 60 114 33 0.4358 0.03093 0.37920 0.5008
## 70 72 41 0.2763 0.02789 0.22667 0.3367
## 80 38 35 0.1401 0.02165 0.10347 0.1896
## 90 6 31 0.0195 0.00862 0.00817 0.0463
plot(sfit3) # pretty crappy

ggsurvplot(sfit3, conf.int=TRUE, pval=TRUE, risk.table=FALSE, legend.title="Century",
legend.labs=c("19th Century", "20th Century"),
palette=c("darkslategrey","blue"),
main="Kaplan-Meier Curve for Survival between centuries")

sfit4<-survfit(Surv(Death_Age, Event)~Century+Sex, data=survival)
ggsurvplot(sfit4)

## Cox PH test: Cox regression (or proportional hazards regression) allows analyzing the effect of several risk factors on survival
# Hazard Ratio (HR) is exp(coef)
#HR=1: No effect
#HR>1: Increase in hazard
#HR<1: Reduction in hazard (protective)
diff_Sex <- coxph(Surv(Death_Age, Event)~Sex, data=survival)
diff_Sex
## Call:
## coxph(formula = Surv(Death_Age, Event) ~ Sex, data = survival)
##
## coef exp(coef) se(coef) z p
## SexM 0.2562 1.2921 0.0867 2.96 0.0031
##
## Likelihood ratio test=8.76 on 1 df, p=0.00308
## n= 550, number of events= 550
## since exp(coef)>1, males have increase in hazard
diff_Century <- coxph(Surv(Death_Age, Event)~Century, data=survival)
diff_Century
## Call:
## coxph(formula = Surv(Death_Age, Event) ~ Century, data = survival)
##
## coef exp(coef) se(coef) z p
## Century20th -0.323 0.724 0.086 -3.75 0.00017
##
## Likelihood ratio test=14.1 on 1 df, p=0.00017
## n= 550, number of events= 550
## since exp(coef)<1, 20th century have reduction in hazard (hazard = probablity of death)
diff_Sex_Century <- coxph(Surv(Death_Age, Event)~Sex+Century, data=survival)
diff_Sex_Century
## Call:
## coxph(formula = Surv(Death_Age, Event) ~ Sex + Century, data = survival)
##
## coef exp(coef) se(coef) z p
## SexM 0.2364 1.2667 0.0867 2.73 0.00640
## Century20th -0.3081 0.7348 0.0862 -3.58 0.00035
##
## Likelihood ratio test=21.6 on 2 df, p=2.05e-05
## n= 550, number of events= 550
## both are significant
## logrank test: log-rank test is useful for comparing survival curves in two or more groups
#using survdiff in survival package
survdiff(Surv(Death_Age, Event)~Sex, data=survival)
## Call:
## survdiff(formula = Surv(Death_Age, Event) ~ Sex, data = survival)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## Sex=F 262 262 296 3.83 8.87
## Sex=M 288 288 254 4.45 8.87
##
## Chisq= 8.9 on 1 degrees of freedom, p= 0.0029
survdiff(Surv(Death_Age, Event)~Century, data=survival)
## Call:
## survdiff(formula = Surv(Death_Age, Event) ~ Century, data = survival)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## Century=19th 293 293 250 7.50 14.5
## Century=20th 257 257 300 6.24 14.5
##
## Chisq= 14.5 on 1 degrees of freedom, p= 0.000138
survdiff(Surv(Death_Age, Event)~Century+Sex, data=survival)
## Call:
## survdiff(formula = Surv(Death_Age, Event) ~ Century + Sex, data = survival)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## Century=19th, Sex=F 140 140 125 1.883 2.556
## Century=19th, Sex=M 153 153 125 6.255 8.510
## Century=20th, Sex=F 122 122 171 14.017 22.142
## Century=20th, Sex=M 135 135 129 0.248 0.343
##
## Chisq= 24.3 on 3 degrees of freedom, p= 2.19e-05