0
点赞
收藏
分享

微信扫一扫

数据结构排序之插入排序,希尔排序详解

Brose 2024-11-07 阅读 28
python

加载包

rm(list = ls())
options(stringsAsFactors = F)
gc()
library(TCGAbiolinks)
library(scRNAseq)
library(data.table)
library(limma)
library(dplyr)
library(DT)
library(survival)
library(survminer)

具体如何获取矩阵和生存数据,在我去年TCGA更新之后写的很清楚

TCGAbiolinks整理表达数据和临床数据这篇推文地址在下面

https://blog.csdn.net/sayhello1025/article/details/125218337

下载TCGA 胃癌的数据

###表达数据下载
query <- GDCquery(
  project = "TCGA-STAD",
  data.category = "Transcriptome Profiling",
  data.type = "Gene Expression Quantification", 
  workflow.type = "STAR - Counts"
)
GDCdownload(query = query)
expData<- GDCprepare(query = query,
                     save = TRUE,
                     save.filename = 'STAD_exp.rda'
)
##利用scRNAseq这个包直接获取
tpm_data <- assay(expData,i = "tpm_unstrand")
row_file <-  data.table::fread('./GDCdata/TCGA-STAD/Transcriptome_Profiling/Gene_Expression_Quantification/002cf74b-098b-4de6-9a76-0d889c894ab6/49a0c23d-e0e7-4b67-b9f0-527c14105345.rna_seq.augmented_star_gene_counts.tsv',data.table = F)
row_file <- row_file[-c(1:4),]
rownames(row_file) <- row_file[,1]
###
same <- intersect(row.names(tpm_data),row.names(row_file))
STAD_tpmExp <- cbind(row_file[same,],tpm_data[same,])
STAD_tpmExp <- STAD_tpmExp[,-c(1,3:9)]
rt=as.matrix(STAD_tpmExp)
rownames(rt)=rt[,1]
exp=rt[,2:ncol(rt)]
dimnames=list(rownames(exp),colnames(exp))
data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames)
data=avereps(data)
STAD_exp=data[rowMeans(data)>0,]

Out=rbind(id=colnames(STAD_exp), STAD_exp)
write.table(Out, file="./STAD_exp.txt", sep="\t", quote=F, col.names=F)

下在临床数据

query <- GDCquery(
  project = "TCGA-STAD", 
  data.category = "Clinical",
  data.type = "Clinical Supplement", 
  data.format = "BCR XML"
)
GDCdownload(query)
cli <- GDCprepare_clinic(query,'follow_up')

cli <- cli  %>%
  select(bcr_followup_barcode,vital_status,                            
         days_to_death,days_to_last_followup) %>%
  distinct(bcr_followup_barcode, .keep_all = TRUE)
table(cli$vital_status)

dead_patient <-  cli %>%
  dplyr::filter(vital_status == 'Dead') %>%
  dplyr::select(-days_to_last_followup) %>%
  rename(c(bcr_followup_barcode = 'Barcode',
           vital_status = 'fustat',
           days_to_death='futime'
  )) %>%
  mutate(fustat=ifelse(fustat=='Dead',1,0))%>%
  mutate(futime=futime/365) 
alive_patient <-  cli %>%
  dplyr::filter(vital_status == 'Alive') %>%
  dplyr::select(-days_to_death) %>%
  rename(c(bcr_followup_barcode = 'Barcode',
           vital_status = 'fustat',
           days_to_last_followup='futime'
  )) %>%
  mutate(fustat=ifelse(fustat=='Dead',1,0))%>%
  mutate(futime=futime/365) 
survival_data <- rbind(dead_patient,alive_patient)
write.csv(survival_data,file="./STAD_surviv.csv")

整合后跑生存分析

rt =read.table('./STAD_exp.txt',sep = '\t',header = T,check.names = F,row.names = 1)
gene = c('TNF','IL6','MMP9','MYC','HIF1A','PTGS2','MAPK3','IFNG','IL10','PPARG',
         'CCND1','EGF','ICAM1','FOS','IL1A','IL2','CRP','MPO','CDKN1A','SERPINE1')
rt1 = rt[gene,]
exp <- data.frame(t(rt1))
rownames(exp) <- gsub('\\.','\\-',rownames(exp))
cli <- read.csv('./STAD_surviv.csv',header = T,row.names = 1)
exp$ID1 <- substr(rownames(exp), 1, 12)
cli$ID2 <- substr(rownames(cli), 1, 12)
map <- match(exp$ID,cli$ID)

data3 <- cli[map,]##注意看这个
data4 <- cbind(exp,data3)
data5 <- na.omit(data4)
desired_columns <- c("fustat","futime",'ID1','ID2')
all_columns <- c(desired_columns, setdiff(names(data5), desired_columns))
data6 <- data5[all_columns]

生存分析

for (i in (colnames(rt)[5:24])) {
  var=i
  data=rt[,c("futime","fustat",var)]
  group=ifelse(data[,3]>median(data[,3]),"High","Low")
  diff=survdiff(Surv(futime, fustat) ~group,data = data)
  pValue=1-pchisq(diff$chisq,df=1)
  if(pValue<0.001){
    pValue="p<0.001"
  }else{
    pValue=paste0("p=",sprintf("%.03f",pValue))
  }
  fit <- survfit(Surv(futime, fustat) ~ group, data = data)
  surPlot=ggsurvplot(fit, 
                     data=data,
                     conf.int=FALSE,
                     pval=pValue,
                     pval.size=5,
                     legend.labs=c("High", "Low"),
                     legend.title=var,
                     xlab="Time(years)",
                     break.time.by = 1,
                     risk.table.title="",
                     palette=c("#ED545C","#0075C5"),
                     risk.table=T,ggtheme = theme_bw(),
                     risk.table.height=.25)
  filename= paste0('./01.survival/',var,'.pdf')
  pdf(file=filename,onefile = FALSE,width = 6,height =5)
  print(surPlot)
  dev.off()
}

效果如下
在这里插入图片描述

举报

相关推荐

0 条评论