Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 25 Mar 2022 13:49:39 GMT
From:      Po-Chuan Hsieh <sunpoet@FreeBSD.org>
To:        ports-committers@FreeBSD.org, dev-commits-ports-all@FreeBSD.org, dev-commits-ports-main@FreeBSD.org
Subject:   git: 0ffe3e793684 - main - biology/py-crossmap: Fix build with setuptools 58.0.0+
Message-ID:  <202203251349.22PDndm8027606@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by sunpoet:

URL: https://cgit.FreeBSD.org/ports/commit/?id=0ffe3e7936844995675cbddb450a11aa59aa8393

commit 0ffe3e7936844995675cbddb450a11aa59aa8393
Author:     Po-Chuan Hsieh <sunpoet@FreeBSD.org>
AuthorDate: 2022-03-25 13:32:02 +0000
Commit:     Po-Chuan Hsieh <sunpoet@FreeBSD.org>
CommitDate: 2022-03-25 13:38:04 +0000

    biology/py-crossmap: Fix build with setuptools 58.0.0+
    
    With hat:       python
---
 biology/py-crossmap/files/patch-2to3 | 2955 ++++++++++++++++++++++++++++++++++
 1 file changed, 2955 insertions(+)

diff --git a/biology/py-crossmap/files/patch-2to3 b/biology/py-crossmap/files/patch-2to3
new file mode 100644
index 000000000000..c3b680224b43
--- /dev/null
+++ b/biology/py-crossmap/files/patch-2to3
@@ -0,0 +1,2955 @@
+--- lib/cmmodule/SAM.py.orig	2018-12-17 16:05:26 UTC
++++ lib/cmmodule/SAM.py
+@@ -150,52 +150,52 @@ class ParseSAM:
+ 					forward_SE +=1
+ 				
+ 		if paired:				
+-			print >>sys.stderr,"\n#=================================================="
+-			print >>sys.stderr,"#================Report (pair-end)================="
+-			print >>sys.stderr, "%-25s%d" % ("Total Reads:",total_read)
+-			print >>sys.stderr, "%-25s%d" % ("Total Mapped Reads:", (mapped_read1 + mapped_read2))
+-			print >>sys.stderr, "%-25s%d" % ("Total Unmapped Reads:",(unmapped_read1 + unmapped_read2))
+-			print >>sys.stderr, "%-25s%d" % ("PCR duplicate:",pcr_duplicate)
+-			print >>sys.stderr, "%-25s%d" % ("QC-failed:",low_qual)
+-			print >>sys.stderr, "%-25s%d" % ("Not primary mapping:",secondary_hit)
+-			print >>sys.stderr, "\n",
+-			print >>sys.stderr, "%-25s%d" % ("Unmapped Read-1:",unmapped_read1)
+-			print >>sys.stderr, "%-25s%d" % ("Mapped Read-1:",mapped_read1)
+-			print >>sys.stderr, "%-25s%d" % ("  Forward (+):",forward_read1)
+-			print >>sys.stderr, "%-25s%d" % ("  Reverse (-):",reverse_read1)
++			print("\n#==================================================", file=sys.stderr)
++			print("#================Report (pair-end)=================", file=sys.stderr)
++			print("%-25s%d" % ("Total Reads:",total_read), file=sys.stderr)
++			print("%-25s%d" % ("Total Mapped Reads:", (mapped_read1 + mapped_read2)), file=sys.stderr)
++			print("%-25s%d" % ("Total Unmapped Reads:",(unmapped_read1 + unmapped_read2)), file=sys.stderr)
++			print("%-25s%d" % ("PCR duplicate:",pcr_duplicate), file=sys.stderr)
++			print("%-25s%d" % ("QC-failed:",low_qual), file=sys.stderr)
++			print("%-25s%d" % ("Not primary mapping:",secondary_hit), file=sys.stderr)
++			print("\n", end=' ', file=sys.stderr)
++			print("%-25s%d" % ("Unmapped Read-1:",unmapped_read1), file=sys.stderr)
++			print("%-25s%d" % ("Mapped Read-1:",mapped_read1), file=sys.stderr)
++			print("%-25s%d" % ("  Forward (+):",forward_read1), file=sys.stderr)
++			print("%-25s%d" % ("  Reverse (-):",reverse_read1), file=sys.stderr)
+ 			
+-			print >>sys.stderr, "\n",
+-			print >>sys.stderr, "%-25s%d" % ("Unmapped Read-2:",unmapped_read2)
+-			print >>sys.stderr, "%-25s%d" % ("Mapped Read-2:",mapped_read2)
+-			print >>sys.stderr, "%-25s%d" % ("  Forward (+):",forward_read2)
+-			print >>sys.stderr, "%-25s%d" % ("  Reverse (-):",reverse_read2)
++			print("\n", end=' ', file=sys.stderr)
++			print("%-25s%d" % ("Unmapped Read-2:",unmapped_read2), file=sys.stderr)
++			print("%-25s%d" % ("Mapped Read-2:",mapped_read2), file=sys.stderr)
++			print("%-25s%d" % ("  Forward (+):",forward_read2), file=sys.stderr)
++			print("%-25s%d" % ("  Reverse (-):",reverse_read2), file=sys.stderr)
+ 			
+-			print >>sys.stderr, "\n",
+-			print >>sys.stderr, "%-25s%d" % ("Mapped to (+/-):",plus_minus)
+-			print >>sys.stderr, "%-25s%d" % ("Mapped to (-/+):",minus_plus)
+-			print >>sys.stderr, "%-25s%d" % ("Mapped to (+/+):",plus_plus)
+-			print >>sys.stderr, "%-25s%d" % ("Mapped to (-/-):",minus_minus)
+-			print >>sys.stderr, "\n",
+-			print >>sys.stderr, "%-25s%d" % ("Spliced Hits:",_numSplitHit)
+-			print >>sys.stderr, "%-25s%d" % ("Non-spliced Hits:",_numMonoHit)
+-			print >>sys.stderr, "%-25s%d" % ("Reads have insertion:",_numInsertion)
+-			print >>sys.stderr, "%-25s%d" % ("Reads have deletion:",_numDeletion)
++			print("\n", end=' ', file=sys.stderr)
++			print("%-25s%d" % ("Mapped to (+/-):",plus_minus), file=sys.stderr)
++			print("%-25s%d" % ("Mapped to (-/+):",minus_plus), file=sys.stderr)
++			print("%-25s%d" % ("Mapped to (+/+):",plus_plus), file=sys.stderr)
++			print("%-25s%d" % ("Mapped to (-/-):",minus_minus), file=sys.stderr)
++			print("\n", end=' ', file=sys.stderr)
++			print("%-25s%d" % ("Spliced Hits:",_numSplitHit), file=sys.stderr)
++			print("%-25s%d" % ("Non-spliced Hits:",_numMonoHit), file=sys.stderr)
++			print("%-25s%d" % ("Reads have insertion:",_numInsertion), file=sys.stderr)
++			print("%-25s%d" % ("Reads have deletion:",_numDeletion), file=sys.stderr)
+ 		else:
+-			print >>sys.stderr,"\n#===================================================="
+-			print >>sys.stderr,"#================Report (single-end)================="
+-			print >>sys.stderr, "%-25s%d" % ("Total Reads:",total_read)
+-			print >>sys.stderr, "%-25s%d" % ("Total Mapped Reads:", map_SE)
+-			print >>sys.stderr, "%-25s%d" % ("Total Unmapped Reads:",unmap_SE)
+-			print >>sys.stderr, "%-25s%d" % ("PCR duplicate:",pcr_duplicate)
+-			print >>sys.stderr, "%-25s%d" % ("QC-failed:",low_qual)
+-			print >>sys.stderr, "%-25s%d" % ("Not primary mapping:",secondary_hit)
+-			print >>sys.stderr, "%-25s%d" % ("froward (+):",forward_SE)
+-			print >>sys.stderr, "%-25s%d" % ("reverse (-):",reverse_SE)
+-			print >>sys.stderr, "\n",
+-			print >>sys.stderr, "%-25s%d" % ("Spliced Hits:",_numSplitHit)
+-			print >>sys.stderr, "%-25s%d" % ("Non-spliced Hits:",_numMonoHit)
+-			print >>sys.stderr, "%-25s%d" % ("Reads have insertion:",_numInsertion)
+-			print >>sys.stderr, "%-25s%d" % ("Reads have deletion:",_numDeletion)			
++			print("\n#====================================================", file=sys.stderr)
++			print("#================Report (single-end)=================", file=sys.stderr)
++			print("%-25s%d" % ("Total Reads:",total_read), file=sys.stderr)
++			print("%-25s%d" % ("Total Mapped Reads:", map_SE), file=sys.stderr)
++			print("%-25s%d" % ("Total Unmapped Reads:",unmap_SE), file=sys.stderr)
++			print("%-25s%d" % ("PCR duplicate:",pcr_duplicate), file=sys.stderr)
++			print("%-25s%d" % ("QC-failed:",low_qual), file=sys.stderr)
++			print("%-25s%d" % ("Not primary mapping:",secondary_hit), file=sys.stderr)
++			print("%-25s%d" % ("froward (+):",forward_SE), file=sys.stderr)
++			print("%-25s%d" % ("reverse (-):",reverse_SE), file=sys.stderr)
++			print("\n", end=' ', file=sys.stderr)
++			print("%-25s%d" % ("Spliced Hits:",_numSplitHit), file=sys.stderr)
++			print("%-25s%d" % ("Non-spliced Hits:",_numMonoHit), file=sys.stderr)
++			print("%-25s%d" % ("Reads have insertion:",_numInsertion), file=sys.stderr)
++			print("%-25s%d" % ("Reads have deletion:",_numDeletion), file=sys.stderr)			
+ 			
+ 	def samTobed(self,outfile=None,mergePE=False):
+ 		"""Convert SAM file to BED file. BED file will be saved as xxx.sam.bed unless otherwise specified.
+@@ -204,7 +204,7 @@ class ParseSAM:
+ 		if outfile is None:
+ 			outfile=self.fileName + ".bed"
+ 
+-		print >>sys.stderr,"\tWriting bed entries to\"",outfile,"\"...",
++		print("\tWriting bed entries to\"",outfile,"\"...", end=' ', file=sys.stderr)
+ 		FO=open(outfile,'w')
+ 		for line in self.f:
+ 			if line.startswith(('@','track')):continue		#skip head lines
+@@ -240,14 +240,14 @@ class ParseSAM:
+ 			for i in range(0,len(comb),2):
+ 				blockStart.append(str(sum(comb[:i])))
+ 			blockStarts = ','.join(blockStart)
+-			print >>FO, string.join((str(i) for i in [chrom,chromStart,chromEnd,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts]),sep="\t")			
+-		print >>sys.stderr, "Done"
++			print(string.join((str(i) for i in [chrom,chromStart,chromEnd,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts]),sep="\t"), file=FO)			
++		print("Done", file=sys.stderr)
+ 		FO.close()
+ 		self.f.seek(0)
+ 
+ 		if mergePE:
+ 		#creat another bed file. pair-end reads will be merged into single bed entry
+-			print >>sys.stderr, "Writing consoidated bed file ...",
++			print("Writing consoidated bed file ...", end=' ', file=sys.stderr)
+ 			bedfile = open(outfile,'r')
+ 			outfile_2 = outfile + ".consolidate.bed"
+ 			outfile_3 = outfile + '.filter'
+@@ -292,11 +292,11 @@ class ParseSAM:
+ 					if(blocks[key] ==1):	#single end, single hit
+ 						st = [i - txSt[key] for i in starts[key]]
+ 						st = string.join([str(i) for i in st],',')
+-						print >>FO, chr[key].pop(),"\t",txSt[key],"\t",txEnd[key],"\t",key,"\t","11\t",strand[key][0],"\t",txSt[key],"\t",txEnd[key],"\t","0,255,0\t",blocks[key],"\t",string.join(sizes[key],','),"\t",st
++						print(chr[key].pop(),"\t",txSt[key],"\t",txEnd[key],"\t",key,"\t","11\t",strand[key][0],"\t",txSt[key],"\t",txEnd[key],"\t","0,255,0\t",blocks[key],"\t",string.join(sizes[key],','),"\t",st, file=FO)
+ 					else:
+ 						st = [i - txSt[key] for i in starts[key]]	#single end, spliced hit
+ 						st = string.join([str(i) for i in st],',')
+-						print >>FO, chr[key].pop(),"\t",txSt[key],"\t",txEnd[key],"\t",key,"\t","12\t",strand[key][0],"\t",txSt[key],"\t",txEnd[key],"\t","0,255,0\t",blocks[key],"\t",string.join(sizes[key],','),"\t",st						
++						print(chr[key].pop(),"\t",txSt[key],"\t",txEnd[key],"\t",key,"\t","12\t",strand[key][0],"\t",txSt[key],"\t",txEnd[key],"\t","0,255,0\t",blocks[key],"\t",string.join(sizes[key],','),"\t",st, file=FO)						
+ 								
+ 				elif(count[key]==2):	#pair-end read
+ 					direction = string.join(strand[key],'/')	
+@@ -306,17 +306,17 @@ class ParseSAM:
+ 					#st=[string.atoi(i) for i in st]
+ 					if(len(chr[key])==1):	#pair-end reads mapped to same chromosome
+ 						if blocks[key] ==2:	#pair end, single hits
+-							print >>FO, chr[key].pop(),"\t",txSt[key],"\t",txEnd[key],"\t",key + "|strand=" + direction + "|chrom=same","\t","21\t",'.',"\t",txSt[key],"\t",txEnd[key],"\t","0,255,0\t",blocks[key],"\t",string.join(sz,','),"\t",string.join([str(i) for i in st],',')
++							print(chr[key].pop(),"\t",txSt[key],"\t",txEnd[key],"\t",key + "|strand=" + direction + "|chrom=same","\t","21\t",'.',"\t",txSt[key],"\t",txEnd[key],"\t","0,255,0\t",blocks[key],"\t",string.join(sz,','),"\t",string.join([str(i) for i in st],','), file=FO)
+ 						elif blocks[key] >2:	#
+-							print >>FO, chr[key].pop(),"\t",txSt[key],"\t",txEnd[key],"\t",key + "|strand=" + direction + "|chrom=same","\t","22\t",'.',"\t",txSt[key],"\t",txEnd[key],"\t","0,255,0\t",blocks[key],"\t",string.join(sz,','),"\t",string.join([str(i) for i in st],',')
++							print(chr[key].pop(),"\t",txSt[key],"\t",txEnd[key],"\t",key + "|strand=" + direction + "|chrom=same","\t","22\t",'.',"\t",txSt[key],"\t",txEnd[key],"\t","0,255,0\t",blocks[key],"\t",string.join(sz,','),"\t",string.join([str(i) for i in st],','), file=FO)
+ 					else:
+-						print >>FOF,key,"\t","pair-end mapped, but two ends mapped to different chromosome"
++						print(key,"\t","pair-end mapped, but two ends mapped to different chromosome", file=FOF)
+ 				elif(count[key] >2):	#reads occur more than 2 times
+-					print >>FOF,key,"\t","occurs more than 2 times in sam file"
++					print(key,"\t","occurs more than 2 times in sam file", file=FOF)
+ 					continue
+ 			FO.close()
+ 			FOF.close()
+-			print >>sys.stderr, "Done"
++			print("Done", file=sys.stderr)
+ 
+ 
+ 	def samTowig(self,outfile=None,log2scale=False,header=False,strandSpecific=False):
+@@ -326,7 +326,7 @@ class ParseSAM:
+ 		if outfile is None:
+ 			outfile = self.fileName + ".wig"
+ 		FO=open(outfile,'w')
+-		print >>sys.stderr, "Writing wig file to\"",outfile,"\"..."
++		print("Writing wig file to\"",outfile,"\"...", file=sys.stderr)
+ 		
+ 		headline="track type=wiggle_0 name=" + outfile + " track_label description='' visibility=full color=255,0,0"
+ 		wig=collections.defaultdict(dict)
+@@ -359,24 +359,24 @@ class ParseSAM:
+ 			
+ 			blocks = cigar.fetch_exon(chrom,txStart,field[5])	
+ 			for block in blocks:
+-				hits.extend(range(block[1]+1,block[2]+1))
++				hits.extend(list(range(block[1]+1,block[2]+1)))
+ 				
+ 			if strandSpecific is not True:
+ 				for i in hits:
+-					if wig[chrom].has_key(i):
++					if i in wig[chrom]:
+ 						wig[chrom][i] +=1
+ 					else:
+ 						wig[chrom][i]=1
+ 			else:
+ 				if strand_rule[read_type + strand] == '-':
+ 					for i in hits:
+-						if Nwig[chrom].has_key(i):
++						if i in Nwig[chrom]:
+ 							Nwig[chrom][i] += 1
+ 						else:
+ 							Nwig[chrom][i] = 1
+ 				if strand_rule[read_type + strand] == '+':
+ 					for i in hits:
+-						if Pwig[chrom].has_key(i):
++						if i in Pwig[chrom]:
+ 							Pwig[chrom][i] +=1
+ 						else:
+ 							Pwig[chrom][i]=1					
+@@ -385,17 +385,17 @@ class ParseSAM:
+ 		
+ 		if strandSpecific is not True:
+ 			for chr in sorted(wig.keys()):
+-				print >>sys.stderr, "Writing ",chr, " ..."
++				print("Writing ",chr, " ...", file=sys.stderr)
+ 				FO.write('variableStep chrom='+chr+'\n')
+ 				for coord in sorted(wig[chr]):
+ 					if log2scale:FO.write("%d\t%5.3f\n" % (coord,math.log(wig[chr][coord],2)))
+ 					else:FO.write("%d\t%d\n" % (coord,wig[chr][coord]))
+ 		else:
+-			chroms=set(Pwig.keys() + Nwig.keys())
++			chroms=set(list(Pwig.keys()) + list(Nwig.keys()))
+ 			for chr in sorted(chroms):
+-				print >>sys.stderr, "Writing ",chr, " ..."
++				print("Writing ",chr, " ...", file=sys.stderr)
+ 				FO.write('variableStep chrom='+chr+'\n')
+-				coords=sorted(set(Pwig[chr].keys() + Nwig[chr].keys()))
++				coords=sorted(set(list(Pwig[chr].keys()) + list(Nwig[chr].keys())))
+ 				for coord in coords:
+ 					if ((coord in Pwig[chr]) and (coord not in Nwig[chr])):
+ 						FO.write("%d\t%d\n" % (coord,Pwig[chr][coord]))
+@@ -418,7 +418,7 @@ class ParseSAM:
+ 			else: outfile = self.fileName + ".unmap.fa"
+ 		FO=open(outfile,'w')
+ 		unmapCount=0
+-		print >>sys.stderr, "Writing unmapped reads to\"",outfile,"\"... ",
++		print("Writing unmapped reads to\"",outfile,"\"... ", end=' ', file=sys.stderr)
+ 		
+ 		for line in self.f:
+ 			hits=[]
+@@ -438,7 +438,7 @@ class ParseSAM:
+ 				if fastq: FO.write('@' + seqID + '\n' + seq +'\n' + '+' +'\n' + qual+'\n')
+ 				else: FO.write('>' + seqID + '\n' + seq +'\n')
+ 
+-		print >>sys.stderr, str(unmapCount) + " reads saved!\n"
++		print(str(unmapCount) + " reads saved!\n", file=sys.stderr)
+ 		FO.close()
+ 		self.f.seek(0)
+ 	
+@@ -449,7 +449,7 @@ class ParseSAM:
+ 			outfile = self.fileName + ".PP.sam"
+ 		FO=open(outfile,'w')
+ 		PPcount=0
+-		print >>sys.stderr, "Writing proper paired reads to\"",outfile,"\"... ",
++		print("Writing proper paired reads to\"",outfile,"\"... ", end=' ', file=sys.stderr)
+ 		for line in self.f:
+ 			hits=[]
+ 			if line[0] == '@':continue						#skip head lines	
+@@ -460,7 +460,7 @@ class ParseSAM:
+ 				PPcount +=1
+ 				FO.write(line)
+ 		FO.close()
+-		print >>sys.stderr, str(PPcount) + " reads were saved!\n",
++		print(str(PPcount) + " reads were saved!\n", end=' ', file=sys.stderr)
+ 		self.f.seek(0)
+ 
+ 	def samNVC(self,outfile=None):
+@@ -481,7 +481,7 @@ class ParseSAM:
+ 		c_count=[]
+ 		g_count=[]
+ 		t_count=[]
+-		print >>sys.stderr, "reading sam file ... "
++		print("reading sam file ... ", file=sys.stderr)
+ 		for line in self.f:
+ 			if line.startswith('@'):continue									#skip head lines	
+ 			if ParseSAM._reExpr2.match(line):continue					#skip blank lines
+@@ -492,44 +492,44 @@ class ParseSAM:
+ 				RNA_read = field[9].upper()
+ 			else:
+ 				RNA_read = field[9].upper().translate(transtab)[::-1]
+-			for i in xrange(len(RNA_read)):
++			for i in range(len(RNA_read)):
+ 				key = str(i) + RNA_read[i]
+ 				base_freq[key] += 1
+ 		
+-		print >>sys.stderr, "generating data matrix ..."
+-		print >>FO, "Position\tA\tC\tG\tT\tN\tX"
+-		for i in xrange(len(RNA_read)):
+-			print  >>FO, str(i) + '\t',
+-			print  >>FO, str(base_freq[str(i) + "A"]) + '\t',
++		print("generating data matrix ...", file=sys.stderr)
++		print("Position\tA\tC\tG\tT\tN\tX", file=FO)
++		for i in range(len(RNA_read)):
++			print(str(i) + '\t', end=' ', file=FO)
++			print(str(base_freq[str(i) + "A"]) + '\t', end=' ', file=FO)
+ 			a_count.append(str(base_freq[str(i) + "A"]))
+-			print  >>FO, str(base_freq[str(i) + "C"]) + '\t',
++			print(str(base_freq[str(i) + "C"]) + '\t', end=' ', file=FO)
+ 			c_count.append(str(base_freq[str(i) + "C"]))
+-			print  >>FO, str(base_freq[str(i) + "G"]) + '\t',
++			print(str(base_freq[str(i) + "G"]) + '\t', end=' ', file=FO)
+ 			g_count.append(str(base_freq[str(i) + "G"]))
+-			print  >>FO, str(base_freq[str(i) + "T"]) + '\t',
++			print(str(base_freq[str(i) + "T"]) + '\t', end=' ', file=FO)
+ 			t_count.append(str(base_freq[str(i) + "T"]))
+-			print  >>FO, str(base_freq[str(i) + "N"]) + '\t',
+-			print  >>FO, str(base_freq[str(i) + "X"]) + '\t'
++			print(str(base_freq[str(i) + "N"]) + '\t', end=' ', file=FO)
++			print(str(base_freq[str(i) + "X"]) + '\t', file=FO)
+ 		FO.close()
+ 		
+ 		#generating R scripts
+-		print >>sys.stderr, "generating R script  ..."
+-		print >>RS, "position=c(" + ','.join([str(i) for i in xrange(len(RNA_read))]) + ')'
+-		print >>RS, "A_count=c(" + ','.join(a_count) + ')'
+-		print >>RS, "C_count=c(" + ','.join(c_count) + ')'
+-		print >>RS, "G_count=c(" + ','.join(g_count) + ')'
+-		print >>RS, "T_count=c(" + ','.join(t_count) + ')'
+-		print >>RS, "total= A_count + C_count + G_count + T_count"
+-		print >>RS, "ym=max(A_count/total,C_count/total,G_count/total,T_count/total) + 0.05"
+-		print >>RS, "yn=min(A_count/total,C_count/total,G_count/total,T_count/total)"
++		print("generating R script  ...", file=sys.stderr)
++		print("position=c(" + ','.join([str(i) for i in range(len(RNA_read))]) + ')', file=RS)
++		print("A_count=c(" + ','.join(a_count) + ')', file=RS)
++		print("C_count=c(" + ','.join(c_count) + ')', file=RS)
++		print("G_count=c(" + ','.join(g_count) + ')', file=RS)
++		print("T_count=c(" + ','.join(t_count) + ')', file=RS)
++		print("total= A_count + C_count + G_count + T_count", file=RS)
++		print("ym=max(A_count/total,C_count/total,G_count/total,T_count/total) + 0.05", file=RS)
++		print("yn=min(A_count/total,C_count/total,G_count/total,T_count/total)", file=RS)
+ 		
+-		print >>RS, 'pdf("NVC_plot.pdf")'
+-		print >>RS, 'plot(position,A_count/total,type="o",pch=20,ylim=c(yn,ym),col="dark green",xlab="Position of Read",ylab="Nucleotide Frequency")'
+-		print >>RS, 'lines(position,T_count/total,type="o",pch=20,col="red")'
+-		print >>RS, 'lines(position,G_count/total,type="o",pch=20,col="blue")'
+-		print >>RS, 'lines(position,C_count/total,type="o",pch=20,col="cyan")'
+-		print >>RS, 'legend('+ str(len(RNA_read)-10) + ',ym,legend=c("A","T","G","C"),col=c("dark green","red","blue","cyan"),lwd=2,pch=20,text.col=c("dark green","red","blue","cyan"))'
+-		print >>RS, "dev.off()"
++		print('pdf("NVC_plot.pdf")', file=RS)
++		print('plot(position,A_count/total,type="o",pch=20,ylim=c(yn,ym),col="dark green",xlab="Position of Read",ylab="Nucleotide Frequency")', file=RS)
++		print('lines(position,T_count/total,type="o",pch=20,col="red")', file=RS)
++		print('lines(position,G_count/total,type="o",pch=20,col="blue")', file=RS)
++		print('lines(position,C_count/total,type="o",pch=20,col="cyan")', file=RS)
++		print('legend('+ str(len(RNA_read)-10) + ',ym,legend=c("A","T","G","C"),col=c("dark green","red","blue","cyan"),lwd=2,pch=20,text.col=c("dark green","red","blue","cyan"))', file=RS)
++		print("dev.off()", file=RS)
+ 		
+ 		RS.close()
+ 		#self.f.seek(0)
+@@ -546,7 +546,7 @@ class ParseSAM:
+ 		RS=open(outfile2,'w')
+ 		
+ 		gc_hist=collections.defaultdict(int)	#key is GC percent, value is count of reads
+-		print >>sys.stderr, "reading sam file ... "
++		print("reading sam file ... ", file=sys.stderr)
+ 		for line in self.f:
+ 			if line[0] == '@':continue						#skip head lines	
+ 			if ParseSAM._reExpr2.match(line):continue		#skip blank lines
+@@ -556,18 +556,18 @@ class ParseSAM:
+ 			#print gc_percent
+ 			gc_hist[gc_percent] += 1
+ 		
+-		print >>sys.stderr, "writing GC content ..."
++		print("writing GC content ...", file=sys.stderr)
+ 		
+-		print >>FO, "GC%\tread_count"
+-		for i in gc_hist.keys():
+-			print >>FO, i + '\t' + str(gc_hist[i])
++		print("GC%\tread_count", file=FO)
++		for i in list(gc_hist.keys()):
++			print(i + '\t' + str(gc_hist[i]), file=FO)
+ 			
+-		print >>sys.stderr, "writing R script ..."
+-		print >>RS, "pdf('GC_content.pdf')"
+-		print >>RS, 'gc=rep(c(' + ','.join([i for i in gc_hist.keys()]) + '),' + 'times=c(' + ','.join([str(i) for i in gc_hist.values()]) + '))'
+-		print >>RS, 'hist(gc,probability=T,breaks=%d,xlab="GC content (%%)",ylab="Density of Reads",border="blue",main="")' % 100
++		print("writing R script ...", file=sys.stderr)
++		print("pdf('GC_content.pdf')", file=RS)
++		print('gc=rep(c(' + ','.join([i for i in list(gc_hist.keys())]) + '),' + 'times=c(' + ','.join([str(i) for i in list(gc_hist.values())]) + '))', file=RS)
++		print('hist(gc,probability=T,breaks=%d,xlab="GC content (%%)",ylab="Density of Reads",border="blue",main="")' % 100, file=RS)
+ 		#print >>RS, "lines(density(gc),col='red')"
+-		print >>RS ,"dev.off()"		
++		print("dev.off()", file=RS)		
+ 		#self.f.seek(0)
+ 		
+ 	def samDupRate(self,outfile=None,up_bound=500):
+@@ -589,7 +589,7 @@ class ParseSAM:
+ 		
+ 		seqDup_count=collections.defaultdict(int)
+ 		posDup_count=collections.defaultdict(int)
+-		print >>sys.stderr, "reading sam file ... "
++		print("reading sam file ... ", file=sys.stderr)
+ 		for line in self.f:
+ 			if line[0] == '@':continue						#skip head lines	
+ 			if ParseSAM._reExpr2.match(line):continue		#skip blank lines
+@@ -616,37 +616,37 @@ class ParseSAM:
+ 			coord = chrom + ":" + str(chromStart) + "-" + str(chromEnd) + ":" + blockSizes + ":" + blockStarts
+ 			posDup[coord] +=1
+ 			
+-		print >>sys.stderr, "report duplicte rate based on sequence ..."
+-		print >>SEQ, "Occurrence\tUniqReadNumber"
+-		for i in seqDup.values():			#key is occurence, value is uniq reads number (based on seq)
++		print("report duplicte rate based on sequence ...", file=sys.stderr)
++		print("Occurrence\tUniqReadNumber", file=SEQ)
++		for i in list(seqDup.values()):			#key is occurence, value is uniq reads number (based on seq)
+ 			seqDup_count[i] +=1
+-		for k in sorted(seqDup_count.iterkeys()):	
+-			print >>SEQ, str(k) +'\t'+ str(seqDup_count[k])
++		for k in sorted(seqDup_count.keys()):	
++			print(str(k) +'\t'+ str(seqDup_count[k]), file=SEQ)
+ 		SEQ.close()
+ 		
+-		print >>sys.stderr, "report duplicte rate based on mapping  ..."
+-		print >>POS, "Occurrence\tUniqReadNumber"
+-		for i in posDup.values():			#key is occurence, value is uniq reads number (based on coord)
++		print("report duplicte rate based on mapping  ...", file=sys.stderr)
++		print("Occurrence\tUniqReadNumber", file=POS)
++		for i in list(posDup.values()):			#key is occurence, value is uniq reads number (based on coord)
+ 			posDup_count[i] +=1
+-		for k in sorted(posDup_count.iterkeys()):	
+-			print >>POS, str(k) +'\t'+ str(posDup_count[k])
++		for k in sorted(posDup_count.keys()):	
++			print(str(k) +'\t'+ str(posDup_count[k]), file=POS)
+ 		POS.close()
+ 		
+ 		
+-		print >>sys.stderr, "generate R script ..."
+-		print >>RS, "pdf('duplicateRead.pdf')"
+-		print >>RS, "par(mar=c(5,4,4,5),las=0)"
+-		print >>RS, "seq_occ=c(" + ','.join([str(i) for i in sorted(seqDup_count.iterkeys()) ]) + ')'
+-		print >>RS, "seq_uniqRead=c(" + ','.join([str(seqDup_count[i]) for i in sorted(seqDup_count.iterkeys()) ]) + ')'
+-		print >>RS, "pos_occ=c(" + ','.join([str(i) for i in sorted(posDup_count.iterkeys()) ]) + ')'
+-		print >>RS, "pos_uniqRead=c(" + ','.join([str(posDup_count[i]) for i in sorted(posDup_count.iterkeys()) ]) + ')'
+-		print >>RS, "plot(pos_occ,log10(pos_uniqRead),ylab='Number of Reads (log10)',xlab='Frequency',pch=4,cex=0.8,col='blue',xlim=c(1,%d),yaxt='n')" % up_bound
+-		print >>RS, "points(seq_occ,log10(seq_uniqRead),pch=20,cex=0.8,col='red')"
+-		print >>RS, 'ym=floor(max(log10(pos_uniqRead)))'
+-		print >>RS, "legend(%d,ym,legend=c('Sequence-base','Mapping-base'),col=c('red','blue'),pch=c(4,20))" % max(up_bound-200,1)
+-		print >>RS, 'axis(side=2,at=0:ym,labels=0:ym)'
+-		print >>RS, 'axis(side=4,at=c(log10(pos_uniqRead[1]),log10(pos_uniqRead[2]),log10(pos_uniqRead[3]),log10(pos_uniqRead[4])), labels=c(round(pos_uniqRead[1]*100/sum(pos_uniqRead)),round(pos_uniqRead[2]*100/sum(pos_uniqRead)),round(pos_uniqRead[3]*100/sum(pos_uniqRead)),round(pos_uniqRead[4]*100/sum(pos_uniqRead))))'
+-		print >>RS, 'mtext(4, text = "Reads %", line = 2)'
++		print("generate R script ...", file=sys.stderr)
++		print("pdf('duplicateRead.pdf')", file=RS)
++		print("par(mar=c(5,4,4,5),las=0)", file=RS)
++		print("seq_occ=c(" + ','.join([str(i) for i in sorted(seqDup_count.keys()) ]) + ')', file=RS)
++		print("seq_uniqRead=c(" + ','.join([str(seqDup_count[i]) for i in sorted(seqDup_count.keys()) ]) + ')', file=RS)
++		print("pos_occ=c(" + ','.join([str(i) for i in sorted(posDup_count.keys()) ]) + ')', file=RS)
++		print("pos_uniqRead=c(" + ','.join([str(posDup_count[i]) for i in sorted(posDup_count.keys()) ]) + ')', file=RS)
++		print("plot(pos_occ,log10(pos_uniqRead),ylab='Number of Reads (log10)',xlab='Frequency',pch=4,cex=0.8,col='blue',xlim=c(1,%d),yaxt='n')" % up_bound, file=RS)
++		print("points(seq_occ,log10(seq_uniqRead),pch=20,cex=0.8,col='red')", file=RS)
++		print('ym=floor(max(log10(pos_uniqRead)))', file=RS)
++		print("legend(%d,ym,legend=c('Sequence-base','Mapping-base'),col=c('red','blue'),pch=c(4,20))" % max(up_bound-200,1), file=RS)
++		print('axis(side=2,at=0:ym,labels=0:ym)', file=RS)
++		print('axis(side=4,at=c(log10(pos_uniqRead[1]),log10(pos_uniqRead[2]),log10(pos_uniqRead[3]),log10(pos_uniqRead[4])), labels=c(round(pos_uniqRead[1]*100/sum(pos_uniqRead)),round(pos_uniqRead[2]*100/sum(pos_uniqRead)),round(pos_uniqRead[3]*100/sum(pos_uniqRead)),round(pos_uniqRead[4]*100/sum(pos_uniqRead))))', file=RS)
++		print('mtext(4, text = "Reads %", line = 2)', file=RS)
+ 		#self.f.seek(0)
+ 		
+ 	def getUniqMapRead(self,outfile=None):
+@@ -655,7 +655,7 @@ class ParseSAM:
+ 			outfile = self.fileName + ".uniq.sam"
+ 		FO=open(outfile,'w')
+ 		Uniqcount=0
+-		print >>sys.stderr, "Writing uniquely mapped reads to\"",outfile,"\"... ",
++		print("Writing uniquely mapped reads to\"",outfile,"\"... ", end=' ', file=sys.stderr)
+ 		for line in self.f:
+ 			hits=[]
+ 			if line[0] == '@':continue						#skip head lines	
+@@ -667,11 +667,11 @@ class ParseSAM:
+ 			#else:
+ 				#print >>sys.stderr,line,
+ 			if (ParseSAM._uniqueHit_pat.search(line)):
+-				print >>sys.stderr,line,
++				print(line, end=' ', file=sys.stderr)
+ 				Uniqcount +=1
+ 				FO.write(line)
+ 		FO.close()
+-		print >>sys.stderr, str(Uniqcount) + " reads were saved!\n",
++		print(str(Uniqcount) + " reads were saved!\n", end=' ', file=sys.stderr)
+ 		self.f.seek(0)
+ 
+ 	def getWrongStrand(self,outfile=None):
+@@ -680,7 +680,7 @@ class ParseSAM:
+ 			outfile = self.fileName + ".wrongStrand.sam"
+ 		FO=open(outfile,'w')
+ 		wrongStrand=0
+-		print >>sys.stderr, "Writing incorrectly stranded reads to\"",outfile,"\"... ",
++		print("Writing incorrectly stranded reads to\"",outfile,"\"... ", end=' ', file=sys.stderr)
+ 		for line in self.f:
+ 			hits=[]
+ 			if line.startswith('@'):continue						#skip head lines	
+@@ -701,7 +701,7 @@ class ParseSAM:
+ 				wrongStrand+=1
+ 
+ 		FO.close()
+-		print >>sys.stderr, str(wrongStrand) + " reads were saved!\n",
++		print(str(wrongStrand) + " reads were saved!\n", end=' ', file=sys.stderr)
+ 		self.f.seek(0)
+ 		
+ 	def filterSpliceRead(self,outfile=None,min_overhang=8,min_gap=50,max_gap=1000000):
+@@ -714,7 +714,7 @@ class ParseSAM:
+ 			outfile = self.fileName + ".SR.sam"
+ 			#outfile2 = self.fileName + ".SR.filter.sam"			
+ 		splice_sites=collections.defaultdict(set)
+-		print >>sys.stderr, "\tDetermine splice sites with proper overhang, intron size ... ",
++		print("\tDetermine splice sites with proper overhang, intron size ... ", end=' ', file=sys.stderr)
+ 		for line in self.f:
+ 			if line[0] == '@':continue						#skip head lines	
+ 			if ParseSAM._reExpr2.match(line):continue		#skip blank lines
+@@ -741,12 +741,12 @@ class ParseSAM:
+ 					if (comb[2] >= min_overhang):
+ 						splice_sites[chrom].add(map_st + comb[0] + comb[1])			
+ 		self.f.seek(0)	
+-		print >>sys.stderr, "Done"
++		print("Done", file=sys.stderr)
+ 		
+ 		
+ 		FO=open(outfile,'w')
+ 		#FO2=open(outfile2,'w')
+-		print >>sys.stderr, "\tExtracting splicing reads  ... ",
++		print("\tExtracting splicing reads  ... ", end=' ', file=sys.stderr)
+ 		total_SR =0
+ 		extract_SR =0
+ 		total_read =0
+@@ -778,10 +778,10 @@ class ParseSAM:
+ 				else:
+ 					#FO2.write(line)
+ 					continue
+-		print >>sys.stderr, "Done"
+-		print >>sys.stderr, "\tTotal mapped Read: " + str(total_read)
+-		print >>sys.stderr, "\tTotal Splicing Read: " + str(total_SR)
+-		print >>sys.stderr, "\Usable Splicing Read: " + str(extract_SR)
++		print("Done", file=sys.stderr)
++		print("\tTotal mapped Read: " + str(total_read), file=sys.stderr)
++		print("\tTotal Splicing Read: " + str(total_SR), file=sys.stderr)
++		print("\\Usable Splicing Read: " + str(extract_SR), file=sys.stderr)
+ 		FO.close()
+ 		#FO2.close()
+ 		self.f.seek(0)	
+@@ -792,7 +792,7 @@ class ParseSAM:
+ 		if outfile is None:
+ 			outfile = self.fileName + ".SR.sam"
+ 		FO=open(outfile,'w')
+-		print >>sys.stderr, "\tExtract splicing reads without any filter ...",
++		print("\tExtract splicing reads without any filter ...", end=' ', file=sys.stderr)
+ 		for line in self.f:
+ 			if line[0] == '@':continue						#skip head lines	
+ 			if ParseSAM._reExpr2.match(line):continue		#skip blank lines
+@@ -803,7 +803,7 @@ class ParseSAM:
+ 			if (len(comb)>=3):
+ 				FO.write(line)
+ 		
+-		print >>sys.stderr, "Done"
++		print("Done", file=sys.stderr)
+ 		self.f.seek(0)	
+ 		FO.close()
+ 
+@@ -812,7 +812,7 @@ class ParseSAM:
+ 		The original SAM file must be sorted before hand. if not, using linux command like "sort -k3,3 -k4,4n myfile.sam >myfile.sorted.sam" '''
+ 		if outfile is None:
+ 			outfile = self.fileName + ".collapsed.sam"
+-		print >>sys.stderr, "Writing collapsed SAM file to\"",outfile,"\"... "
++		print("Writing collapsed SAM file to\"",outfile,"\"... ", file=sys.stderr)
+ 		FO=open(outfile,'w')		
+ 		flag=""
+ 		for line in self.f:
+@@ -840,7 +840,7 @@ class ParseSAM:
+ 		else:
+ 			outfile = outfile + ".qual.plot.r"
+ 		FO=open(outfile,'w')
+-		print >>sys.stderr, "\tcalculating quality score ... "
++		print("\tcalculating quality score ... ", file=sys.stderr)
+ 		qual_min={}
+ 		qual_max={}
+ 		qual_sum={}
+@@ -875,16 +875,16 @@ class ParseSAM:
+ 		max_qualities =[str(qual_max[i]) for i in range(0,read_len)]
+ 		avg_qualities = [str(qual_sum[i]/total_read) for i in range(0,read_len)]
+ 		nt_pos = [str(i) for i in range(0,read_len)]
+-		print >>FO, "nt_pos=c(" + ','.join(nt_pos)  + ')'
+-		print >>FO, "max_qual=c(" + ','.join(max_qualities)  + ')'
+-		print >>FO, "min_qual=c(" + ','.join(min_qualities)  + ')'
+-		print >>FO, "avg_qual=c(" + ','.join(avg_qualities)  + ')'
+-		print >>FO, "pdf('phred_qual.pdf')"
+-		print >>FO, "plot(nt_pos,avg_qual, xlab=\"Nucleotide Position (5'->3')\", ylab='Phred Quality',ylim=c(0,97),lwd=2,type='s')"
+-		print >>FO, 'lines(nt_pos,max_qual,type="s",lwd=2,col="red")'
+-		print >>FO, 'lines(nt_pos,min_qual,type="s",lwd=2,col="blue")'
+-		print >>FO, 'legend(0,100,legend=c("Max","Average","Min"),col=c("red","black","blue"),lwd=2)'
+-		print >>FO, 'dev.off()'
++		print("nt_pos=c(" + ','.join(nt_pos)  + ')', file=FO)
++		print("max_qual=c(" + ','.join(max_qualities)  + ')', file=FO)
++		print("min_qual=c(" + ','.join(min_qualities)  + ')', file=FO)
++		print("avg_qual=c(" + ','.join(avg_qualities)  + ')', file=FO)
++		print("pdf('phred_qual.pdf')", file=FO)
++		print("plot(nt_pos,avg_qual, xlab=\"Nucleotide Position (5'->3')\", ylab='Phred Quality',ylim=c(0,97),lwd=2,type='s')", file=FO)
++		print('lines(nt_pos,max_qual,type="s",lwd=2,col="red")', file=FO)
++		print('lines(nt_pos,min_qual,type="s",lwd=2,col="blue")', file=FO)
++		print('legend(0,100,legend=c("Max","Average","Min"),col=c("red","black","blue"),lwd=2)', file=FO)
++		print('dev.off()', file=FO)
+ 		#for i in range(0,read_len):
+ 		#	print >>sys.stderr, str(i) + '\t' + str(qual_max[i]) + '\t' + str(qual_min[i]) + '\t' + str(qual_sum[i]/total_read)
+ 		#self.f.seek(0)
+@@ -918,7 +918,7 @@ class ParseSAM:
+ 						scores[chrom][pos] =1
+ 					else:
+ 						scores[chrom][pos] +=1			
+-			if lines % 10000 == 0: print >>sys.stderr, "%i lines loaded \r" % lines
++			if lines % 10000 == 0: print("%i lines loaded \r" % lines, file=sys.stderr)
+ 		return scores
+ 		self.f.seek(0)
+ 
+@@ -943,7 +943,7 @@ class QCSAM:
+ 		The 5th column is number of reads fallen into the region defined by the first 3 columns'''
+ 		
+ 		if refbed is None:
+-			print >>sys.stderr,"You must specify a bed file representing gene model\n"
++			print("You must specify a bed file representing gene model\n", file=sys.stderr)
+ 			exit(0)
+ 		if outfile is None:
+ 			exon_count = self.fileName + "_exon.count.bed"
+@@ -968,7 +968,7 @@ class QCSAM:
+ 		splicedReads=0
+ 		
+ 		#read SAM 
+-		print >>sys.stderr, "reading "+ self.fileName + '...',
++		print("reading "+ self.fileName + '...', end=' ', file=sys.stderr)
+ 		for line in self.f:
+ 			if line.startswith("@"):continue
+ 			fields=line.rstrip('\n ').split()
+@@ -990,10 +990,10 @@ class QCSAM:
+ 					ranges[chrom].add_interval( Interval( mid, mid ) )
+     			
+ 		self.f.seek(0)
+-		print >>sys.stderr, "Done"
++		print("Done", file=sys.stderr)
+ 		
+ 		#read refbed file
+-		print >>sys.stderr, "Assign reads to "+ refbed + '...',
++		print("Assign reads to "+ refbed + '...', end=' ', file=sys.stderr)
+ 		for line in open(refbed,'r'):
+ 			try:
+ 				if line.startswith('#'):continue
+@@ -1007,14 +1007,14 @@ class QCSAM:
+ 				geneName      = fields[3]
+ 				strand    = fields[5].replace(" ","_")
+ 				
+-				exon_starts = map( int, fields[11].rstrip( ',\n' ).split( ',' ) )
+-				exon_starts = map((lambda x: x + tx_start ), exon_starts)
+-				exon_ends = map( int, fields[10].rstrip( ',\n' ).split( ',' ) )
+-				exon_ends = map((lambda x, y: x + y ), exon_starts, exon_ends);   
++				exon_starts = list(map( int, fields[11].rstrip( ',\n' ).split( ',' ) ))
++				exon_starts = list(map((lambda x: x + tx_start ), exon_starts))
++				exon_ends = list(map( int, fields[10].rstrip( ',\n' ).split( ',' ) ))
++				exon_ends = list(map((lambda x, y: x + y ), exon_starts, exon_ends));   
+ 				intron_starts = exon_ends[:-1]
+ 				intron_ends=exon_starts[1:]
+ 			except:
+-				print >>sys.stderr,"[NOTE:input bed must be 12-column] skipped this line: " + line,
++				print("[NOTE:input bed must be 12-column] skipped this line: " + line, end=' ', file=sys.stderr)
+ 				continue
+ 
+ 				# assign reads to intron				
+@@ -1050,28 +1050,28 @@ class QCSAM:
+ 						EXON_OUT.write(chrom + "\t" + str(st) + "\t" + str(end) + "\t" + geneName + "_exon_" + str(exonNum) + "\t" + str(hits) + "\t" + strand + '\n')
+ 						exonNum += 1		
+ 		intergenicReads=totalReads-exonReads-intronReads-splicedReads		
+-		print >>sys.stderr, "Done." + '\n'
+-		print >>sys.stderr, "Total reads:\t" + str(totalReads)
+-		print >>sys.stderr, "Exonic reads:\t" + str(exonReads) 
+-		print >>sys.stderr, "Intronic reads:\t" + str(intronReads) 
+-		print >>sys.stderr, "Splicing reads:\t" + str(splicedReads)
+-		print >>sys.stderr, "Intergenic reads:\t" + str(intergenicReads)
++		print("Done." + '\n', file=sys.stderr)
++		print("Total reads:\t" + str(totalReads), file=sys.stderr)
++		print("Exonic reads:\t" + str(exonReads), file=sys.stderr) 
++		print("Intronic reads:\t" + str(intronReads), file=sys.stderr) 
++		print("Splicing reads:\t" + str(splicedReads), file=sys.stderr)
++		print("Intergenic reads:\t" + str(intergenicReads), file=sys.stderr)
+ 		
+-		print >>sys.stderr,"writing R script ...",
++		print("writing R script ...", end=' ', file=sys.stderr)
+ 		totalReads=float(totalReads)
+-		print >>R_OUT, "pdf('%s')" % rpdf
+-		print >>R_OUT, "dat=c(%d,%d,%d,%d)" % (exonReads,splicedReads,intronReads,intergenicReads)
+-		print >>R_OUT, "lb=c('exon(%.2f)','junction(%.2f)','intron(%.2f)','intergenic(%.2f)')" % (exonReads/totalReads,splicedReads/totalReads,intronReads/totalReads,intergenicReads/totalReads)
+-		print >>R_OUT, "pie(dat,labels=lb,col=rainbow(4),clockwise=TRUE,main='Total reads = %d')" % int(totalReads)
+-		print >>R_OUT, "dev.off()"
+-		print >>sys.stderr, "Done."
++		print("pdf('%s')" % rpdf, file=R_OUT)
++		print("dat=c(%d,%d,%d,%d)" % (exonReads,splicedReads,intronReads,intergenicReads), file=R_OUT)
++		print("lb=c('exon(%.2f)','junction(%.2f)','intron(%.2f)','intergenic(%.2f)')" % (exonReads/totalReads,splicedReads/totalReads,intronReads/totalReads,intergenicReads/totalReads), file=R_OUT)
++		print("pie(dat,labels=lb,col=rainbow(4),clockwise=TRUE,main='Total reads = %d')" % int(totalReads), file=R_OUT)
++		print("dev.off()", file=R_OUT)
++		print("Done.", file=sys.stderr)
+ 	
+ 	
+ 	def coverageGeneBody(self,refbed,outfile=None):
+ 		'''Calculate reads coverage over gene body, from 5'to 3'. each gene will be equally divied
+ 		into 100 regsions'''
+ 		if refbed is None:
+-			print >>sys.stderr,"You must specify a bed file representing gene model\n"
++			print("You must specify a bed file representing gene model\n", file=sys.stderr)
+ 			exit(0)
+ 		if outfile is None:
+ 			outfile1 = self.fileName + ".geneBodyCoverage_plot.r"
+@@ -1088,7 +1088,7 @@ class QCSAM:
+ 		rpkm={}
+ 		
+ 		#read SAM 
+-		print >>sys.stderr, "reading "+ self.fileName + '...',
++		print("reading "+ self.fileName + '...', end=' ', file=sys.stderr)
+ 		for line in self.f:
+ 			if line.startswith("@"):continue
+ 			fields=line.rstrip('\n ').split()
+@@ -1114,9 +1114,9 @@ class QCSAM:
+ 					ranges[chrom] = Intersecter()
+ 				else:
+ 					ranges[chrom].add_interval( Interval( st, st+size ) )
+-		print >>sys.stderr, "Done"   	
++		print("Done", file=sys.stderr)   	
+ 
+-		print >>sys.stderr, "calculating coverage over gene body ..."
++		print("calculating coverage over gene body ...", file=sys.stderr)
+ 		coverage=collections.defaultdict(int)
+ 		flag=0
+ 		for line in open(refbed,'r'):
+@@ -1130,19 +1130,19 @@ class QCSAM:
+ 				geneName      = fields[3]
+ 				strand    = fields[5]
+ 				
+-				exon_starts = map( int, fields[11].rstrip( ',\n' ).split( ',' ) )
+-				exon_starts = map((lambda x: x + tx_start ), exon_starts)
+-				exon_ends = map( int, fields[10].rstrip( ',\n' ).split( ',' ) )
+-				exon_ends = map((lambda x, y: x + y ), exon_starts, exon_ends);   
++				exon_starts = list(map( int, fields[11].rstrip( ',\n' ).split( ',' ) ))
++				exon_starts = list(map((lambda x: x + tx_start ), exon_starts))
++				exon_ends = list(map( int, fields[10].rstrip( ',\n' ).split( ',' ) ))
++				exon_ends = list(map((lambda x, y: x + y ), exon_starts, exon_ends));   
+ 			except:
+-				print >>sys.stderr,"[NOTE:input bed must be 12-column] skipped this line: " + line,
++				print("[NOTE:input bed must be 12-column] skipped this line: " + line, end=' ', file=sys.stderr)
+ 				continue
+ 			gene_all_base=[]
+ 			percentile_base=[]
+ 			mRNA_len =0
+ 			flag=0
+ 			for st,end in zip(exon_starts,exon_ends):
+-				gene_all_base.extend(range(st+1,end+1))		#0-based coordinates on genome
++				gene_all_base.extend(list(range(st+1,end+1)))		#0-based coordinates on genome
+ 				mRNA_len = len(gene_all_base)
+ 				if mRNA_len <100:
+ 					flag=1
+@@ -1159,18 +1159,18 @@ class QCSAM:
+ 					coverage[i] += len(ranges[chrom].find(percentile_base[i], percentile_base[i]+1))
+ 		x_coord=[]
+ 		y_coord=[]
+-		print >>OUT2, "Total reads: " + str(totalReads)
+-		print >>OUT2, "Fragment number: " + str(fragment_num)
+-		print >>OUT2, "percentile\tcount"
++		print("Total reads: " + str(totalReads), file=OUT2)
++		print("Fragment number: " + str(fragment_num), file=OUT2)
++		print("percentile\tcount", file=OUT2)
+ 		for i in coverage:
+ 			x_coord.append(str(i))
+ 			y_coord.append(str(coverage[i]))
+-			print >>OUT2, str(i) + '\t' + str(coverage[i])
+-		print >>OUT1, "pdf('geneBody_coverage.pdf')"
+-		print >>OUT1, "x=0:100"
+-		print >>OUT1, "y=c(" + ','.join(y_coord) + ')'
+-		print >>OUT1, "plot(x,y,xlab=\"percentile of gene body (5'->3')\",ylab='read number',type='s')"
+-		print >>OUT1, "dev.off()"
++			print(str(i) + '\t' + str(coverage[i]), file=OUT2)
++		print("pdf('geneBody_coverage.pdf')", file=OUT1)
++		print("x=0:100", file=OUT1)
++		print("y=c(" + ','.join(y_coord) + ')', file=OUT1)
++		print("plot(x,y,xlab=\"percentile of gene body (5'->3')\",ylab='read number',type='s')", file=OUT1)
++		print("dev.off()", file=OUT1)
+ 			
+ 	def calculateRPKM(self,refbed,outfile=None):
+ 		'''calculate RPKM values for each gene in refbed. Only uniquely aligned reads are used. 
+@@ -1178,7 +1178,7 @@ class QCSAM:
+ 		exon per Million mapped reads) for each exon, intron and mRNA'''
+ 		
+ 		if refbed is None:
+-			print >>sys.stderr,"You must specify a bed file representing gene model\n"
++			print("You must specify a bed file representing gene model\n", file=sys.stderr)
+ 			exit(0)
+ 		if outfile is None:
+ 			rpkm_file = self.fileName + ".rpkm.xls"
+@@ -1194,7 +1194,7 @@ class QCSAM:
+ 		rpkm={}
+ 		
+ 		#read SAM 
+-		print >>sys.stderr, "reading "+ self.fileName + '...',
++		print("reading "+ self.fileName + '...', end=' ', file=sys.stderr)
+ 		for line in self.f:
+ 			if line.startswith("@"):continue
+ 			fields=line.rstrip('\n ').split()
+@@ -1228,17 +1228,17 @@ class QCSAM:
+ 					ranges[chrom].add_interval( Interval( mid, mid ) )
+     			
+ 		self.f.seek(0)
+-		print >>sys.stderr, "Done"
+-		print >>RPKM_OUT, "Total mapped reads (TR): " + str(totalReads) 
+-		print >>RPKM_OUT, "Multiple mapped reads (MR): " + str(multiMapReads)
+-		print >>RPKM_OUT, "Uniquely mapped reads (UR): " + str(totalReads - multiMapReads)
+-		print >>RPKM_OUT, "Spliced  mapped reads (SR): " + str(sR)
+-		print >>RPKM_OUT, "Corrected uniquely mapped reads (cUR): " + str(cUR)
++		print("Done", file=sys.stderr)
++		print("Total mapped reads (TR): " + str(totalReads), file=RPKM_OUT) 
++		print("Multiple mapped reads (MR): " + str(multiMapReads), file=RPKM_OUT)
++		print("Uniquely mapped reads (UR): " + str(totalReads - multiMapReads), file=RPKM_OUT)
++		print("Spliced  mapped reads (SR): " + str(sR), file=RPKM_OUT)
++		print("Corrected uniquely mapped reads (cUR): " + str(cUR), file=RPKM_OUT)
+ 		if totalReads ==0:
+ 			sys.exit(1)
+ 		
+ 		#read refbed file
+-		print >>sys.stderr, "Assign reads to "+ refbed + '...',
++		print("Assign reads to "+ refbed + '...', end=' ', file=sys.stderr)
+ 		for line in open(refbed,'r'):
+ 			try:
+ 				if line.startswith('#'):continue
+@@ -1252,16 +1252,16 @@ class QCSAM:
+ 				geneName      = fields[3]
+ 				strand    = fields[5].replace(" ","_")
+ 				
+-				exon_starts = map( int, fields[11].rstrip( ',\n' ).split( ',' ) )
+-				exon_starts = map((lambda x: x + tx_start ), exon_starts)
+-				exon_ends = map( int, fields[10].rstrip( ',\n' ).split( ',' ) )
+-				exon_ends = map((lambda x, y: x + y ), exon_starts, exon_ends)
+-				exon_sizes = map(int,fields[10].rstrip(',\n').split(','))
++				exon_starts = list(map( int, fields[11].rstrip( ',\n' ).split( ',' ) ))
++				exon_starts = list(map((lambda x: x + tx_start ), exon_starts))
++				exon_ends = list(map( int, fields[10].rstrip( ',\n' ).split( ',' ) ))
++				exon_ends = list(map((lambda x, y: x + y ), exon_starts, exon_ends))
++				exon_sizes = list(map(int,fields[10].rstrip(',\n').split(',')))
+ 				intron_starts = exon_ends[:-1]
+ 				intron_ends=exon_starts[1:]
+ 				key='\t'.join((chrom.lower(),str(tx_start),str(tx_end),geneName,'0',strand))
+ 			except:
+-				print >>sys.stderr,"[NOTE:input bed must be 12-column] skipped this line: " + line,
++				print("[NOTE:input bed must be 12-column] skipped this line: " + line, end=' ', file=sys.stderr)
+ 				continue
+ 
+ 			# assign reads to intron				
+@@ -1309,7 +1309,7 @@ class QCSAM:
+ 				except:
+ 					RPKM_OUT.write(chrom.lower() + "\t" + str(tx_start) + "\t" + str(tx_end) + "\t" + geneName + "_mRNA" + "\t" + str(0) + "\t" + strand + '\t' +  str(0) +'\n')
+ 					rpkm[key] = 0
+-		print >>sys.stderr, "Done"
++		print("Done", file=sys.stderr)
+ 		return rpkm
+ 		self.f.seek(0)
+ 
+@@ -1320,7 +1320,7 @@ class QCSAM:
+ 		NOTE: intronic reads are not counted as part of total reads'''
+ 		
+ 		if refbed is None:
+-			print >>sys.stderr,"You must specify a bed file representing gene model\n"
++			print("You must specify a bed file representing gene model\n", file=sys.stderr)
+ 			exit(0)
+ 		if outfile is None:
+ 			rpkm_file = self.fileName + ".rpkm.xls"
+@@ -1338,7 +1338,7 @@ class QCSAM:
+ 		rpkm={}
+ 		
+ 		#read gene model file, the purpose is to remove intronic reads
+-		print >>sys.stderr, "Reading reference gene model "+ refbed + '...'
++		print("Reading reference gene model "+ refbed + '...', file=sys.stderr)
+ 		for line in open(refbed,'r'):
+ 			try:
+ 				if line.startswith(('#','track','browser')):continue
+@@ -1351,12 +1351,12 @@ class QCSAM:
+ 				geneName      = fields[3]
+ 				strand    = fields[5].replace(" ","_")
+ 				
+-				exon_starts = map( int, fields[11].rstrip( ',\n' ).split( ',' ) )
+-				exon_starts = map((lambda x: x + tx_start ), exon_starts)
+-				exon_ends = map( int, fields[10].rstrip( ',\n' ).split( ',' ) )
+-				exon_ends = map((lambda x, y: x + y ), exon_starts, exon_ends);   
++				exon_starts = list(map( int, fields[11].rstrip( ',\n' ).split( ',' ) ))
++				exon_starts = list(map((lambda x: x + tx_start ), exon_starts))
++				exon_ends = list(map( int, fields[10].rstrip( ',\n' ).split( ',' ) ))
++				exon_ends = list(map((lambda x, y: x + y ), exon_starts, exon_ends));   
+ 			except:
+-				print >>sys.stderr,"[NOTE:input bed must be 12-column] skipped this line: " + line,
++				print("[NOTE:input bed must be 12-column] skipped this line: " + line, end=' ', file=sys.stderr)
+ 				continue	
+ 
+ 			for st,end in zip(exon_starts,exon_ends):				
+@@ -1366,7 +1366,7 @@ class QCSAM:
+ 					exon_ranges[chrom].add_interval( Interval( st, end ) )		
+ 
+ 		#read SAM 
+-		print >>sys.stderr, "reading "+ self.fileName + '...',
++		print("reading "+ self.fileName + '...', end=' ', file=sys.stderr)
+ 		for line in self.f:
+ 			if line.startswith("@"):continue
+ 			fields=line.rstrip('\n ').split()
+@@ -1401,22 +1401,22 @@ class QCSAM:
+ 						ranges[chrom] = Intersecter()
+ 					else:
+ 						ranges[chrom].add_interval( Interval( mid, mid ) )
+-    			else:																			#if this framgnet is intronic, skip it.
++			else:																			#if this framgnet is intronic, skip it.
+     				#intronic +=1
+-    				continue	
++ 				continue	
+ 		self.f.seek(0)
+-		print >>sys.stderr, "Done"
+-		print >>RPKM_OUT, "Total mapped reads (TR): " + str(totalReads) 
+-		print >>RPKM_OUT, "Multiple mapped reads (MR): " + str(multiMapReads)
+-		print >>RPKM_OUT, "Uniquely mapped reads (UR): " + str(totalReads - multiMapReads)
+-		print >>RPKM_OUT, "Spliced  mapped reads (SR): " + str(sR)
+-		print >>RPKM_OUT, "Corrected uniquely mapped reads (cUR, non-intronic fragments): " + str(cUR)
++		print("Done", file=sys.stderr)
++		print("Total mapped reads (TR): " + str(totalReads), file=RPKM_OUT) 
++		print("Multiple mapped reads (MR): " + str(multiMapReads), file=RPKM_OUT)
++		print("Uniquely mapped reads (UR): " + str(totalReads - multiMapReads), file=RPKM_OUT)
++		print("Spliced  mapped reads (SR): " + str(sR), file=RPKM_OUT)
++		print("Corrected uniquely mapped reads (cUR, non-intronic fragments): " + str(cUR), file=RPKM_OUT)
+ 		#print >>RPKM_OUT, "Intronic Fragments (IF): " + str(intronic)
+ 		if totalReads ==0:
+ 			sys.exit(1)
+ 		
+ 		#read refbed file
+-		print >>sys.stderr, "Assign reads to "+ refbed + '...',
++		print("Assign reads to "+ refbed + '...', end=' ', file=sys.stderr)
+ 		for line in open(refbed,'r'):
+ 			try:
+ 				if line.startswith('#'):continue
+@@ -1430,16 +1430,16 @@ class QCSAM:
+ 				geneName      = fields[3]
+ 				strand    = fields[5].replace(" ","_")
+ 				
+-				exon_starts = map( int, fields[11].rstrip( ',\n' ).split( ',' ) )
+-				exon_starts = map((lambda x: x + tx_start ), exon_starts)
+-				exon_ends = map( int, fields[10].rstrip( ',\n' ).split( ',' ) )
+-				exon_ends = map((lambda x, y: x + y ), exon_starts, exon_ends)
+-				exon_sizes = map(int,fields[10].rstrip(',\n').split(','))
++				exon_starts = list(map( int, fields[11].rstrip( ',\n' ).split( ',' ) ))
++				exon_starts = list(map((lambda x: x + tx_start ), exon_starts))
++				exon_ends = list(map( int, fields[10].rstrip( ',\n' ).split( ',' ) ))
++				exon_ends = list(map((lambda x, y: x + y ), exon_starts, exon_ends))
++				exon_sizes = list(map(int,fields[10].rstrip(',\n').split(',')))
+ 				intron_starts = exon_ends[:-1]
+ 				intron_ends=exon_starts[1:]
+ 				key='\t'.join((chrom.lower(),str(tx_start),str(tx_end),geneName,'0',strand))
+ 			except:
+-				print >>sys.stderr,"[NOTE:input bed must be 12-column] skipped this line: " + line,
++				print("[NOTE:input bed must be 12-column] skipped this line: " + line, end=' ', file=sys.stderr)
+ 				continue
+ 
+ 			# assign reads to intron				
+@@ -1487,7 +1487,7 @@ class QCSAM:
+ 				except:
+ 					RPKM_OUT.write(chrom.lower() + "\t" + str(tx_start) + "\t" + str(tx_end) + "\t" + geneName + "_mRNA" + "\t" + str(0) + "\t" + strand + '\t' +  str(0) +'\n')
+ 					rpkm[key] = 0
+-		print >>sys.stderr, "Done"
++		print("Done", file=sys.stderr)
+ 		return rpkm
+ 		self.f.seek(0)
+ 
+@@ -1499,7 +1499,7 @@ class QCSAM:
+ 		unknownReads=0
+ 		ranges={}
+ 		if refbed is None:
+-			print >>sys.stderr,"You must specify a bed file representing gene model\n"
++			print("You must specify a bed file representing gene model\n", file=sys.stderr)
+ 			exit(0)
+ 		
+ 		if outfile is None:
+@@ -1508,7 +1508,7 @@ class QCSAM:
+ 			out_file = outfile + ".unknownReads.SAM"	
+ 		OUT=open(out_file,'w')
*** 1991 LINES SKIPPED ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202203251349.22PDndm8027606>