ROC

  1 # -*- coding: utf-8 -*-
  2 # __author__ = "JieYao"
  3
  4 from biocluster.agent import Agent
  5 from biocluster.tool import Tool
  6 import os
  7 import string
  8 import types
  9 import subprocess
 10 from biocluster.core.exceptions import OptionError
 11
 12 class RocAgent(Agent):
 13     """
 14     需要calc_roc.pl
 15     version v1.1
 16     author: JieYao
 17     last_modifued:2016.08.22
 18     """
 19
 20     def __init__(self, parent):
 21         super(RocAgent, self).__init__(parent)
 22         options = [
 23             {"name": "mode", "type": "int", "default":1},
 24             {"name": "genus_table", "type": "string"},
 25             {"name": "group_table", "type": "string"},
 26             {"name": "method", "type": "string", "default":""},
 27             {"name": "name_table", "type": "string", "default":""},
 28             {"name": "top_n", "type": "int", "default": 20}
 29             ]
 30         self.add_option(options)
 31         self.step.add_steps(‘RocAnalysis‘)
 32         self.on(‘start‘, self.step_start)
 33         self.on(‘end‘, self.step_end)
 34
 35     def step_start(self):
 36         self.step.RocAnalysis.start()
 37         self.step.update()
 38
 39     def step_end(self):
 40         self.step.RocAnalysis.finish()
 41         self.step.update()
 42
 43     def check_options(self):
 44         if not os.path.exists(self.option(‘genus_table‘)):
 45             raise OptionError("必须提供Genus Table")
 46         if not os.path.exists(self.option(‘group_table‘)):
 47             raise OptionError("必须提供分组表格")
 48         if self.option(‘method‘):
 49             if self.option(‘method‘) not in [‘sum‘, ‘average‘, ‘median‘]:
 50                 raise OptionError("丰度计算方法只能选择sum,average,median之一")
 51         if self.option(‘mode‘) == 2:
 52             if not os.path.exists(self.option(‘name_table‘)):
 53                 raise OptionError("Mode 2 模式下必须提供物种名列表文件")
 54         os.system(‘cat %s | awk -F "\t" \‘{ print $1 }\‘ > tmp.txt‘ %(self.option(‘genus_table‘)))
 55         genus_data = open("tmp.txt", "r").readlines()[1:]
 56         os.remove(‘tmp.txt‘)
 57         genus_data = map(string.rstrip, genus_data)
 58         sample_data = open(self.option(‘genus_table‘), "r").readline().strip().split()[1:]
 59
 60         group_data = open(self.option(‘group_table‘), "r").readlines()[1:]
 61         group_data = map(string.strip, group_data)
 62         for s in group_data:
 63             if s.split()[0] not in sample_data:
 64                 raise OptionError("物种%s不在Genus Table中" % s.split()[0])
 65             if s.split()[1] not in [‘0‘,‘1‘]:
 66                 raise OptionError("物种分组只能有0和1！")
 67
 68         if self.option(‘mode‘)==2:
 69             name_data = open(self.option(‘name_table‘), "r").readlines()[1:]
 70             name_data = map(string.strip, name_data)
 71             for s in name_data:
 72                 if s not in genus_data:
 73                     raise OptionError("物种%s不在Genus Table中" % s)
 74
 75         if self.option(‘mode‘)==1:
 76             if self.option(‘top_n‘)>len(genus_data):
 77                 raise OptionError("选择丰度前N高物种时，设定的N多于物种总数：%d>%d" %(self.option(‘top_n‘), len(genus_data)))
 78
 79         return True
 80
 81
 82     def set_resource(self):
 83         """
 84         """
 85         self._cpu = 2
 86         self._memory = ‘‘
 87
 88     def end(self):
 89         result_dir = self.add_upload_dir(self.output_dir)
 90         result_dir.add_relpath_rules([
 91                 [".", "", "ROC分析结果目录"],
 92                 ["./roc_curve.pdf", "pdf", "ROC受试者工作特征曲线图"],
 93                 ["./roc_auc.xls", "xls", "ROC受试者工作特征曲线-AUC VALUE"]
 94                 ])
 95         print self.get_upload_files()
 96         super(RocAgent, self).end()
 97
 98 class RocTool(Tool):
 99     def __init__(self, config):
100         super(RocTool, self).__init__(config)
101         self._version = ‘1.0.1‘
102
103     def run(self):
104         """
105         运行
106         """
107         super(RocTool, self).run()
108         self.run_roc_perl()
109
110     def run_roc_perl(self):
111         """
112         运行calc_roc.perl
113         """
114         os.system(‘export PATH=/mnt/ilustre/users/sanger-dev/app/gcc/5.1.0/bin:$PATH‘)
115         os.system(‘export LD_LIBRARY_PATH=/mnt/ilustre/users/sanger-dev/app/gcc/5.1.0/lib64:$LD_LIBRARY_PATH‘)
116         cmd = self.config.SOFTWARE_DIR + ‘/program/perl/perls/perl-5.24.0/bin/perl ‘ + self.config.SOFTWARE_DIR + ‘/bioinfo/meta/scripts/plot_roc.pl ‘
117         cmd += ‘-o %s ‘ %(self.work_dir + ‘/ROC/‘)
118         cmd += ‘-i %s ‘ %(self.option(‘genus_table‘))
119         cmd += ‘-mode %d ‘ %(self.option(‘mode‘))
120         cmd += ‘-group %s ‘ %(self.option(‘group_table‘))
121         if self.option(‘mode‘)==2:
122             cmd += ‘-name %s ‘ %(self.option(‘name_table‘))
123         if self.option(‘method‘):
124             cmd += ‘-method %s ‘ %(self.option(‘method‘))
125         if self.option(‘mode‘)==1:
126             cmd += ‘-n %d ‘ %(self.option(‘top_n‘))
127         cmd += ‘-labels F ‘
128         self.logger.info(‘开始运行calc_roc.pl计算ROC相关数据‘)
129
130         try:
131             subprocess.check_output(cmd, shell=True)
132             self.logger.info(‘生成 roc.cmd.r 成功‘)
133         except subprocess.CalledProcessError:
134             self.logger.info(‘生成 roc.cmd.r 失败‘)
135             self.set_error(‘无法生成 roc.cmd.r 文件‘)
136         try:
137             subprocess.check_output(self.config.SOFTWARE_DIR +
138                                     ‘/program/R-3.3.1/bin/R --restore --no-save < %s/roc.cmd.r‘ % (self.work_dir + ‘/ROC‘), shell=True)
139             self.logger.info(‘ROC计算成功‘)
140         except subprocess.CalledProcessError:
141             self.logger.info(‘ROC计算失败‘)
142             self.set_error(‘R运行计算ROC失败‘)
143             raise "运行R脚本计算ROC相关数据失败"
144         self.logger.info(‘运行calc_roc.pl程序进行ROC计算完成‘)
145         allfiles = self.get_roc_filesname()
146         self.linkfile(self.work_dir + ‘/ROC/‘ + allfiles[0], ‘roc_curve.pdf‘)
147         self.linkfile(self.work_dir + ‘/ROC/‘ + allfiles[1], ‘roc_auc.xls‘)
148         self.end()
149
150     def linkfile(self, oldfile, newname):
151         newpath = os.path.join(self.output_dir, newname)
152         if os.path.exists(newpath):
153             os.remove(newpath)
154         os.link(oldfile, newpath)
155
156     def get_roc_filesname(self):
157         filelist = os.listdir(self.work_dir + ‘/ROC‘)
158         roc_curve = None
159         roc_auc = None
160         for name in filelist:
161             if ‘roc_curve.pdf‘ in name:
162                 roc_curve = name
163             elif ‘roc_aucvalue.xls‘ in name:
164                 roc_auc = name
165         if (roc_curve and roc_auc):
166             return [roc_curve, roc_auc]
167         else:
168             self.set_error("未知原因，ROC计算结果丢失")
169

  1  #!/usr/bin/perl
  2
  3 use strict;
  4 use warnings;
  5 use Getopt::Long;
  6 my %opts;
  7 my $VERSION = "v1.20160817";
  8
  9 GetOptions( \%opts,"mode=s","i=s","group=s","o=s","n=s","method=s","name=s","ncuts=i","labels=s","labelsize=s","rocci=s","siglevel=f","w=f","h=f","facet_wrap=s","theme=s");
 10 my $usage = <<"USAGE";
 11        Program : $0
 12        Discription:
 13        Version : $VERSION
 14        Usage :perl $0 [options]
 15                         -mode    *1 or 2 or 3; The Receiver:1)The relative abundance of the top n  Bacteria(could analysis with more than one group)
 16                                                            2)The relative abundance of special bacteria(one or more)
 17                                                            3)The receiver is any other factors
 18             -i    *Input genus table file(or other Taxonomic level) or any other factors(mode_3).
 19             -group  *Group name in mapping file .
 20                         -o    *Output dir
 21                         -n    (*mode_1)Top n genus or other taxonomic level(relative abundance)
 22                         -method (*mode_1)If you choose the mode_2 to analyze, you can also choose the analysis "methed". If you don‘t have a choice, you will make a separate analysis of them.   Follow method are available:sum, average, median
 23                         -name    (*mode_2)the name of Bacteria
 24             -ncuts    Number of cutpoints to display along each curve.Default:20
 25             -labels    Logical, display cutoff text labels:Default:F
 26             -labelsize    Size of cutoff text labels.Default:3
 27             -rocci    Confidence regions for the ROC curve.Default:F
 28             -siglevel    Significance level for the confidence regions.Default:0.05
 29             -w    default:6
 30             -h    default:5
 31             -facet_wrap      Logical,display group in different panel.Default:F
 32             -theme  themes for display roc.Follow themes are available:theme_bw,theme_classic,theme_dark,theme_gray,theme_light.Default:theme_bw
 33        Example:$0 -mode 1 -i genus.xls -group group.txt -o output_dir -n 30  -labels F -method sum
 34                    $0 -mode 2 -i genus.xls -group group_1.txt -o output_dir -name name.txt  -labels F -w 7.8
 35                    $0 -mode 2 -i genus.xls -group group_1.txt -o output_dir -name name.txt  -labels F -method sum
 36                    $0 -mode 3 -i factor.txt -group group.txt -o output_dir -labels F -w 7.8
 37
 38 USAGE
 39
 40 die $usage if(!($opts{mode}&&$opts{i}&&$opts{group}&&$opts{o}));
 41 #die $usage if(!(($opts{mode}==F)&&$opts{i}&&$opts{group}&&$opts{o}&&$opts{name}));
 42
 43
 44 $opts{n}=defined $opts{n}?$opts{n}:"20";
 45 $opts{method}=defined $opts{method}?$opts{method}:"chengchen.ye";
 46 $opts{name}=defined $opts{name}?$opts{name}:"NULL";
 47 $opts{ncuts}=defined $opts{ncuts}?$opts{ncuts}:20;
 48 $opts{labels}=defined $opts{labels}?$opts{labels}:"F";
 49 $opts{labelsize}=defined $opts{labelsize}?$opts{labelsize}:"3";
 50 $opts{w}=defined $opts{w}?$opts{w}:6;
 51 $opts{h}=defined $opts{h}?$opts{h}:5;
 52 $opts{facet_wrap}=defined $opts{facet_wrap}?$opts{facet_wrap}:"F";
 53 $opts{theme}=defined $opts{theme}?$opts{theme}:"";
 54 $opts{rocci}=defined $opts{rocci}?$opts{rocci}:"F";
 55 $opts{siglevel}=defined $opts{siglevel}?$opts{siglevel}:"0.05";
 56
 57 if(! -e $opts{o}){
 58         `mkdir $opts{o}`;
 59 }
 60
 61
 62 open CMD,">$opts{o}/roc.cmd.r";
 63
 64 print CMD "
 65     library(plotROC)
 66
 67
 68         group <- read.table(\"$opts{group}\",header=T,check.names=F,comment.char=\"\")
 69         otu_table <-read.table(\"$opts{i}\",sep=\"\t\",head=T,check.names = F,row.names=1)
 70         y<-length(otu_table[1,])
 71
 72
 73
 74
 75 ###The Receiver:A)The relative abundance of the Bacteria
 76 if($opts{mode}==1){
 77         group<-as.data.frame(group)
 78         x<-length(group[1,])
 79         ###D
 80         D<-group[,2]
 81
 82         ####M
 83         otu_table<-cbind(otu_table,rowsum=rowSums(otu_table))
 84         otu_table<- otu_table[order(otu_table[,y+1],decreasing=T),]
 85         otu_table_top20<- otu_table[1:$opts{n},]
 86              ####sum
 87 if(\"$opts{method}\"==\"sum\" ){
 88              v<-y+1
 89              m<-data.frame(colSums(otu_table_top20))[-v,]
 90 }
 91
 92
 93              ####average
 94 if(\"$opts{method}\"==\"average\"){
 95              v<-y+1
 96              m<-data.frame(colSums(otu_table_top20)/$opts{n})[-v,]
 97 }
 98
 99              ####median
100 if(\"$opts{method}\"==\"median\"){
101              top20<-otu_table_top20[,1:y]
102              m<-as.matrix(as.matrix(top20[ceiling($opts{n}/2),])[1,])
103 }
104
105          ###Z
106          Z<-c(rep(\"group1\",y))
107          i<-2
108
109          while (i <= x-1) {
110               ###D
111               D<-c(D,group[,i+1])
112               ###M
113               m<-c(m,m)
114               ###Z
115               w<-paste(\"group\",i,sep=\"\")
116               Z<-c(Z,rep(w,y))
117               i <- i +1
118          }
119          #
120          M <- scale(m, center=T,scale=T)  #标准化
121          if(x!=2){
122              rocdata<- data.frame(diagnosis = D, measure = M, group = Z)
123              p <- ggplot(rocdata, aes(m = measure , d = diagnosis ,color=group)) + geom_roc(labels=$opts{labels},labelsize=$opts{labelsize},n.cuts=$opts{ncuts}) + style_roc(major.breaks = c(0, 0.25, 0.5, 0.75, 1), theme=$opts{theme},xlab=\"1-Specificity\",ylab=\"Sensitivity\")
124
125              if($opts{rocci}==T){
126                  p <- p + geom_rocci(sig.level=$opts{siglevel})
127              }
128
129              if($opts{facet_wrap}==T){
130                  p <- p + facet_wrap(~ group) + theme(axis.text = element_text(size = 4))
131              }
132
133          }
134
135          if(x==2){
136
137              rocdata<- data.frame(diagnosis = D, measure = M)
138              p <- ggplot(rocdata, aes(m = measure , d = diagnosis )) + geom_roc(labels=$opts{labels},labelsize=$opts{labelsize},n.cuts=$opts{ncuts}) + style_roc(major.breaks = c(0, 0.25, 0.5, 0.75, 1), theme=$opts{theme},xlab=\"1-Specificity\",ylab=\"Sensitivity\")
139
140          if($opts{rocci}==T){
141              p <- p + geom_rocci(sig.level=$opts{siglevel})
142          }
143
144
145         }
146 x<-x-1
147 }
148
149 ###The Receiver:2)The relative abundance of special bacteria.
150 if($opts{mode}==2){
151
152 if(\"$opts{method}\"==\"chengchen.ye\"){
153        name <- read.table(\"$opts{name}\",header=T,check.names=F,comment.char=\"\")
154        name<-as.data.frame(name)
155        x<-length(name[,1])
156        a<-as.character(name[1,1])
157        D<-group[,2]
158        m<-as.matrix(as.matrix(otu_table)[a,])
159
160        Z<-c(rep(a,y))
161        i<-2
162        while (i <= x) {
163            D<-c(D,group[,2])
164            a<-as.character(name[i,1])
165            m<-c(m,as.matrix(as.matrix(otu_table)[a,]))
166            a<-as.character(name[i,1])
167            Z<-c(Z,rep(a,y))
168            i <- i +1
169        }
170
171
172         M <- scale(m, center=T,scale=T)  #标准化
173 if(x!=1){
174         rocdata<- data.frame(diagnosis = D, measure = M, Bacteria = Z)
175     p <- ggplot(rocdata, aes(m = measure , d = diagnosis ,color=Bacteria)) + geom_roc(labels=$opts{labels},labelsize=$opts{labelsize},n.cuts=$opts{ncuts}) + style_roc(major.breaks = c(0, 0.25, 0.5, 0.75, 1), theme=$opts{theme},xlab=\"1-Specificity\",ylab=\"Sensitivity\")
176
177 if($opts{rocci}==T){
178     p <- p + geom_rocci(sig.level=$opts{siglevel})
179 }
180
181 if($opts{facet_wrap}==T){
182     p <- p + facet_wrap(~ group) + theme(axis.text = element_text(size = 4))
183     }
184
185 }
186
187 if(x==1){
188
189         rocdata<- data.frame(diagnosis = D, measure = M)
190         p <- ggplot(rocdata, aes(m = measure , d = diagnosis )) + geom_roc(labels=$opts{labels},labelsize=$opts{labelsize},n.cuts=$opts{ncuts}) + style_roc(major.breaks = c(0, 0.25, 0.5, 0.75, 1), theme=$opts{theme},xlab=\"1-Specificity\",ylab=\"Sensitivity\")
191
192 if($opts{rocci}==T){
193         p <- p + geom_rocci(sig.level=$opts{siglevel})
194         }
195 }
196 }
197 ######choose method
198 if(\"$opts{method}\"!=\"chengchen.ye\"){
199 name <- read.table(\"name.txt\",header=T,check.names=F,comment.char=\"\")
200 otu_table<-as.data.frame(otu_table)
201 nu<-length(name[,1])
202
203 genus<-otu_table[as.character(name[1,1]),]
204 i<-2
205 while (i <= nu) {
206 genus<-rbind(genus,otu_table[as.character(name[i,1]),])
207 i<-i+1
208 }
209 genus
210 group<-as.data.frame(group)
211 x<-length(group[1,])
212     ###D
213         D<-group[,2]
214
215     ####M
216         genus<-cbind(genus,rowsum=rowSums(genus))
217         genus<- genus[order(genus[,y+1],decreasing=T),]
218
219
220
221              ####sum
222 if(\"$opts{method}\"==\"sum\" ){
223              hh<-y+1
224              m<-data.frame(colSums(genus))[-hh,]
225 }
226
227
228              ####average
229 if(\"$opts{method}\"==\"average\"){
230              hh<-y+1
231              m<-data.frame(colSums(genus)/nu)[-hh,]
232 }
233
234              ####median
235 if(\"$opts{method}\"==\"median\"){
236              genus_order<-genus[,1:y]
237              m<-as.matrix(as.matrix(genus_order[ceiling(nu/2),])[1,])
238 }
239      ###Z
240          Z<-c(rep(\"group1\",y))
241          i<-2
242
243          while (i <= x-1) {
244               ###D
245               D<-c(D,group[,i+1])
246               ###M
247               m<-c(m,m)
248               ###Z
249               w<-paste(\"group\",i,sep=\"\")
250               Z<-c(Z,rep(w,y))
251               i <- i +1
252          }
253          #
254          M <- scale(m, center=T,scale=T)  #标准化
255          if(x!=2){
256              rocdata<- data.frame(diagnosis = D, measure = M, group = Z)
257              p <- ggplot(rocdata, aes(m = measure , d = diagnosis ,color=group)) + geom_roc(labels=$opts{labels},labelsize=$opts{labelsize},n.cuts=$opts{ncuts}) + style_roc(major.breaks = c(0, 0.25, 0.5, 0.75, 1), theme=$opts{theme},xlab=\"1-Specificity\",ylab=\"Sensitivity\")
258
259              if($opts{rocci}==T){
260                  p <- p + geom_rocci(sig.level=$opts{siglevel})
261              }
262
263              if($opts{facet_wrap}==T){
264                  p <- p + facet_wrap(~ group) + theme(axis.text = element_text(size = 4))
265              }
266
267          }
268
269          if(x==2){
270
271              rocdata<- data.frame(diagnosis = D, measure = M)
272              p <- ggplot(rocdata, aes(m = measure , d = diagnosis )) + geom_roc(labels=$opts{labels},labelsize=$opts{labelsize},n.cuts=$opts{ncuts}) + style_roc(major.breaks = c(0, 0.25, 0.5, 0.75, 1), theme=$opts{theme},xlab=\"1-Specificity\",ylab=\"Sensitivity\")
273
274          if($opts{rocci}==T){
275              p <- p + geom_rocci(sig.level=$opts{siglevel})
276          }
277
278
279         }
280
281
282
283
284 x<-x-1
285
286
287
288 }
289 }
290
291
292
293
294 ###The Receiver:3)The receiver is any other factors
295 if($opts{mode}==3){
296
297 factor <-read.table(\"$opts{i}\",sep=\"\t\",head=T,check.names = F,row.names=1)
298 x<-length(factor[1,])
299
300 D<-group[,2]
301
302 m<-factor[,1]
303 ####Z
304 factor_name <-read.table(\"$opts{i}\",sep=\"\t\",check.names = F,row.names=1)
305 name<-as.character(factor_name[1,1])
306 len<-length(factor[,1])
307 Z<-c(rep(name,len))
308 i<-2
309 while (i <= x) {
310     D<-c(D,D)
311         m<-c(m,factor[,i])
312             name<-as.character(factor_name[1,i])
313                 Z<-c(Z,rep(name,len))
314                     i <- i +1
315                     }
316 M <- scale(m, center=T,scale=T)
317
318 if(x!=1){
319         rocdata<- data.frame(diagnosis = D, measure = M, factor = Z)
320         p <- ggplot(rocdata, aes(m = measure , d = diagnosis ,color=factor)) + geom_roc(labels=$opts{labels},labelsize=$opts{labelsize},n.cuts=$opts{ncuts}) + style_roc(major.breaks = c(0, 0.25, 0.5, 0.75, 1), theme=$opts{theme},xlab=\"1-Specificity\",ylab=\"Sensitivity\")
321
322 if($opts{rocci}==T){
323         p <- p + geom_rocci(sig.level=$opts{siglevel})
324 }
325
326 if($opts{facet_wrap}==T){
327         p <- p + facet_wrap(~ group) + theme(axis.text = element_text(size = 4))
328         }
329
330 }
331
332 if(x==1){
333
334         rocdata<- data.frame(diagnosis = D, measure = M)
335         p <- ggplot(rocdata, aes(m = measure , d = diagnosis )) + geom_roc(labels=$opts{labels},labelsize=$opts{labelsize},n.cuts=$opts{ncuts}) + style_roc(major.breaks = c(0, 0.25, 0.5, 0.75, 1), theme=$opts{theme},xlab=\"1-Specificity\",ylab=\"Sensitivity\")
336
337 if($opts{rocci}==T){
338         p <- p + geom_rocci(sig.level=$opts{siglevel})
339         }
340
341 }
342
343
344 }
345
346
347
348
349
350
351
352
353 ### Caculate the Area under the ROC curve
354 p.auc <- calc_auc(p)
355 write.table(p.auc,\"$opts{o}/roc_aucvalue.xls\",col.names=T,row.names=F,sep=\"\t\",quote=F)
356
357
358 if($opts{mode}==1){
359
360
361 ###paste AUC to graph
362 if(x==1){
363
364 test_auc<-paste(\"AUC=\",round(p.auc[1,3] * 100, 2),\"%\",sep = \"\")
365
366 p<-p + geom_text(x=0.8,y=0.1,label=test_auc,size=4)
367
368
369 }
370
371
372
373 if(x==2){
374 test_auc1<-paste(\"AUC_group1=\",round(p.auc[1,3] * 100, 2),\"%\",sep = \"\")
375 test_auc2<-paste(\"AUC_group2=\",round(p.auc[2,3] * 100, 2),\"%\",sep = \"\")
376 p<-p + geom_text(x=0.8,y=0.21,label=test_auc1,size=4,colour=\"black\")
377 p<-p + geom_text(x=0.8,y=0.15,label=test_auc2,size=4,colour=\"black\")
378
379 }
380
381
382 if(x==3){
383 test_auc1<-paste(\"AUC_group1=\",round(p.auc[1,3] * 100, 2),\"%\",sep = \"\")
384 test_auc2<-paste(\"AUC_group2=\",round(p.auc[2,3] * 100, 2),\"%\",sep = \"\")
385 test_auc3<-paste(\"AUC_group3=\",round(p.auc[3,3] * 100, 2),\"%\",sep = \"\")
386 p<-p + geom_text(x=0.8,y=0.18,label=test_auc1,size=4,colour=\"black\")
387 p<-p + geom_text(x=0.8,y=0.12,label=test_auc2,size=4,colour=\"black\")
388 p<-p + geom_text(x=0.8,y=0.06,label=test_auc3,size=4,colour=\"black\")
389 }
390
391 }
392
393 if($opts{mode}==2){
394
395 if(\"$opts{method}\"==\"chengchen.ye\"){
396
397 ###auc_name
398 auc_name<-as.data.frame(sort(name[,1],decreasing=F))
399
400 ###paste AUC to graph
401 if(x==1){
402
403 test_auc<-paste(\"AUC=\",round(p.auc[1,3] * 100, 2),\"%\",sep = \"\")
404
405 p<-p + geom_text(x=0.8,y=0.1,label=test_auc,size=4)
406
407
408 }
409
410
411
412 if(x==2){
413 test_auc1<-paste(\"AUC\(\",as.character(auc_name[1,1]),\"\)=\",round(p.auc[1,3] * 100, 2),\"%\",sep = \"\")
414 test_auc2<-paste(\"AUC\(\",as.character(auc_name[2,1]),\"\)=\",round(p.auc[2,3] * 100, 2),\"%\",sep = \"\")
415 p<-p + geom_text(x=Inf,y=-Inf,hjust=1.2,vjust=-4,label=test_auc1,size=4,colour=\"black\")
416 p<-p + geom_text(x=Inf,y=-Inf,hjust=1.2,vjust=-2,label=test_auc2,size=4,colour=\"black\")
417
418 }
419
420
421 if(x==3){
422 test_auc1<-paste(\"AUC\(\",as.character(auc_name[1,1]),\"\)=\",round(p.auc[1,3] * 100, 2),\"%\",sep = \"\")
423 test_auc2<-paste(\"AUC\(\",as.character(auc_name[2,1]),\"\)=\",round(p.auc[2,3] * 100, 2),\"%\",sep = \"\")
424 test_auc3<-paste(\"AUC\(\",as.character(auc_name[3,1]),\"\)=\",round(p.auc[3,3] * 100, 2),\"%\",sep = \"\")
425 p<-p + geom_text(x=Inf,y=-Inf,hjust=1.1,vjust=-6,label=test_auc1,size=4,colour=\"black\")
426 p<-p + geom_text(x=Inf,y=-Inf,hjust=1.1,vjust=-4,label=test_auc2,size=4,colour=\"black\")
427 p<-p + geom_text(x=Inf,y=-Inf,hjust=1.1,vjust=-2,label=test_auc3,size=4,colour=\"black\")
428 }
429
430
431 }
432
433 if(\"$opts{method}\"!=\"chengchen.ye\"){
434
435 ###paste AUC to graph
436 if(x==1){
437
438 test_auc<-paste(\"AUC=\",round(p.auc[1,3] * 100, 2),\"%\",sep = \"\")
439
440 p<-p + geom_text(x=0.8,y=0.1,label=test_auc,size=4)
441
442
443 }
444
445
446
447 if(x==2){
448 test_auc1<-paste(\"AUC_group1=\",round(p.auc[1,3] * 100, 2),\"%\",sep = \"\")
449 test_auc2<-paste(\"AUC_group2=\",round(p.auc[2,3] * 100, 2),\"%\",sep = \"\")
450 p<-p + geom_text(x=0.8,y=0.3,label=test_auc1,size=4,colour=\"black\")
451 p<-p + geom_text(x=0.8,y=0.25,label=test_auc2,size=4,colour=\"black\")
452
453 }
454
455
456 if(x==3){
457 test_auc1<-paste(\"AUC_group1=\",round(p.auc[1,3] * 100, 2),\"%\",sep = \"\")
458 test_auc2<-paste(\"AUC_group2=\",round(p.auc[2,3] * 100, 2),\"%\",sep = \"\")
459 test_auc3<-paste(\"AUC_group3=\",round(p.auc[3,3] * 100, 2),\"%\",sep = \"\")
460 p<-p + geom_text(x=0.8,y=0.22,label=test_auc1,size=4,colour=\"black\")
461 p<-p + geom_text(x=0.8,y=0.16,label=test_auc2,size=4,colour=\"black\")
462 p<-p + geom_text(x=0.8,y=0.1,label=test_auc3,size=4,colour=\"black\")
463 }
464 }
465 }
466
467
468
469 if($opts{mode}==3){
470
471
472 ###auc_name
473
474 auc_name<-as.data.frame(sort(factor_name[1,],decreasing=F))
475
476
477 ###paste AUC to graph
478 if(x==1){
479
480 test_auc<-paste(\"AUC=\",round(p.auc[1,3] * 100, 2),\"%\",sep = \"\")
481
482 p<-p + geom_text(x=0.8,y=0.1,label=test_auc,size=4)
483
484
485 }
486
487
488
489 if(x==2){
490 test_auc1<-paste(\"AUC_\",as.character(auc_name[1,1]),\"=\",round(p.auc[1,3] * 100, 2),\"%\",sep = \"\")
491 test_auc2<-paste(\"AUC_\",as.character(auc_name[1,2]),\"=\",round(p.auc[2,3] * 100, 2),\"%\",sep = \"\")
492 p<-p + geom_text(x=Inf,y=-Inf,hjust=1.2,vjust=-4,label=test_auc1,size=4,colour=\"black\")
493 p<-p + geom_text(x=Inf,y=-Inf,hjust=1.2,vjust=-2,label=test_auc2,size=4,colour=\"black\")
494
495 }
496
497
498 if(x==3){
499 test_auc1<-paste(\"AUC_\",as.character(auc_name[1,1]),\"=\",round(p.auc[1,3] * 100, 2),\"%\",sep = \"\")
500 test_auc2<-paste(\"AUC_\",as.character(auc_name[1,2]),\"=\",round(p.auc[2,3] * 100, 2),\"%\",sep = \"\")
501 test_auc3<-paste(\"AUC_\",as.character(auc_name[1,3]),\"=\",round(p.auc[3,3] * 100, 2),\"%\",sep = \"\")
502 p<-p + geom_text(x=0.8,y=0.18,label=test_auc1,size=4,colour=\"black\")
503 p<-p + geom_text(x=0.8,y=0.12,label=test_auc2,size=4,colour=\"black\")
504 p<-p + geom_text(x=0.8,y=0.06,label=test_auc3,size=4,colour=\"black\")
505 }
506
507
508
509
510
511
512
513
514 }
515
516
517 pdf(\"$opts{o}/roc_curve.pdf\",width=$opts{w},height=$opts{h})
518 p
519 dev.off()
520
521
522 ";
523
524
525
526
527
528
529 `R --restore --no-save < $opts{o}/roc.cmd.r`;

时间： 2024-10-05 05:05:05

ROC的相关文章

ROC，AUC，Precision，Recall，F1的介绍与计算（转）

1. 基本概念 1.1 ROC与AUC ROC曲线和AUC常被用来评价一个二值分类器(binary classifier)的优劣,ROC曲线称为受试者工作特征曲线 (receiver operating characteristic curve,简称ROC曲线),又称为感受性曲线(sensitivity curve),AUC(Area Under Curve)是ROC曲线下的面积.在计算ROC曲线之前,首先要了解一些基本概念.在二元分类模型的预测结果有四种,以判断人是否有病为例: 真阳性(TP)

ROC和AUC介绍以及如何计算AUC

ROC(Receiver Operating Characteristic)曲线和AUC常被用来评价一个二值分类器(binary classifier)的优劣,对两者的简单介绍见这里.这篇博文简单介绍ROC和AUC的特点,以及更为深入地,讨论如何作出ROC曲线图以及计算AUC. ROC曲线需要提前说明的是,我们这里只讨论二值分类器.对于分类器,或者说分类算法,评价指标主要有precision,recall,F-score1,以及我们今天要讨论的ROC和AUC.下图是一个ROC曲线的示例2. 正

精确率与召回率，RoC曲线与PR曲线

在机器学习的算法评估中,尤其是分类算法评估中,我们经常听到精确率(precision)与召回率(recall),RoC曲线与PR曲线这些概念,那这些概念到底有什么用处呢? 首先,我们需要搞清楚几个拗口的概念: 1. TP, FP, TN, FN True Positives,TP:预测为正样本,实际也为正样本的特征数 False Positives,FP:预测为正样本,实际为负样本的特征数 True Negatives,TN:预测为负样本,实际也为负样本的特征数 False Negatives,

ROC曲线、AUC、Precision、Recall、F-measure理解及Python实现

本文首先从整体上介绍ROC曲线.AUC.Precision.Recall以及F-measure,然后介绍上述这些评价指标的有趣特性,最后给出ROC曲线的一个Python实现示例. 一.ROC曲线.AUC.Precision.Recall以及F-measure 二分类问题的预测结果可能正确,也可能不正确.结果正确存在两种可能:原本对的预测为对,原本错的预测为错:结果错误也存在两种可能:原本对的预测为错,原本错的预测为对,如Fig 1左侧所示.其中Positives代表预测是对的,Negatives

【转】ROC和AUC介绍以及如何计算AUC

转自:https://www.douban.com/note/284051363/ ROC(Receiver Operating Characteristic)曲线和AUC常被用来评价一个二值分类器(binary classifier)的优劣,对两者的简单介绍见[这里](http://bubblexc.com/y2011/148/).这篇博文简单介绍ROC和AUC的特点,以及更为深入地,讨论如何作出ROC曲线图以及计算AUC. # ROC曲线需要提前说明的是,我们这里只讨论二值分类器.对于分类器

机器学习：ACC、ROC和AUC

? ? 引言 ? ? 很多时候我们都用到ROC和AUC来评判一个二值分类器的优劣,其实AUC跟ROC息息相关,AUC就是ROC曲线下部分的面积,所以需要首先知道什么是ROC,ROC怎么得来的.然后我们要知道一般分类器会有个准确率ACC,那么既然有了ACC,为什么还要有ROC呢,ACC和ROC的区别又在哪儿,这是我喜欢的一种既生瑜何生亮问题. ? ? 最后又简单说明了一下有了ROC之后,为什么还要有AUC呢 ? ? ROC简介 ? ? ROC曲线的横坐标为false positive rate(F

【转】AUC(Area Under roc Curve )计算及其与ROC的关系

让我们从头说起,首先AUC是一种用来度量分类模型好坏的一个标准.这样的标准其实有很多,例如:大约10年前在machine learning文献中一统天下的标准:分类精度:在信息检索(IR)领域中常用的recall和precision,等等.其实,度量反应了人们对” 好”的分类结果的追求,同一时期的不同的度量反映了人们对什么是”好”这个最根本问题的不同认识,而不同时期流行的度量则反映了人们认识事物的深度的变化.近年来,随着machine learning的相关技术从实验室走向实际应用,一些实际的

ROC曲线和PR曲线绘制【转】

TPR=TP/P :真正率:判断对的正样本占所有正样本的比例. Precision=TP/(TP+FP) :判断对的正样本占判断出来的所有正样本的比例 FPR=FP/N :负正率:判断错的负样本占所有负样本的比例. Recall = TP/(TP+FN) = TP/P,就是TPR. ROC曲线:横轴是FPR,纵轴是TPR. 绘制出的曲线应该在y=x直线之上,曲线积分的结果就是AUC的值.AUC越大则系统分类性能越好. PR曲线:横轴是Precision,纵轴是recall. precision

ROC曲线

ROC曲线指受试者工作特征曲线 / 接收器操作特性曲线(receiver operating characteristic curve), 是反映敏感性和特异性连续变量的综合指标,是用构图法揭示敏感性和特异性的相互关系,它通过将连续变量设定出多个不同的临界值,从而计算出一系列敏感性和特异性,再以敏感性为纵坐标.(1-特异性)为横坐标绘制成曲线,曲线下面积越大,诊断准确性越高.在ROC曲线上,最靠近坐标图左上方的点为敏感性和特异性均较高的临界值. ROC曲线的例子考虑一个二分问题,即将实例分成正

从TP、FP、TN、FN到ROC曲线、miss rate、行人检测评估

想要在行人检测的evaluation阶段要计算miss rate,就要从True Positive Rate讲起:miss rate = 1 - true positive rate true positive rate毕竟是一个rate,是一个比值.是谁和谁比呢?P 要从TP.FP.TN.FN讲起. 考虑一个二分类问题:一个item,它实际值有0.1两种取值,即负例.正例:而二分类算法预测出来的结果,也只有0.1两种取值,即负例.正例.我们不考虑二分类算法细节,当作黑箱子就好:我们关心的是,预