Friday 12 February 2016

How is it possible python to be faster than pypy

I run a piece of code with python2.7 and the cProfile says 35s while cProfile on pypy says 73s ! How is that possible assuming pypy is faster interpreter. The code implements the BWT transformation on input a bitstream. I have two files: bwt.py which is called in fm.py. I called the function as:




pypy -m cProfle fm.py inputfile


and then



python -m cProfle fm.py inputfile


The code from bwt.py is the following:




def rotations(t):
''' Return list of rotations of input string t '''
tt = t * 2
return [ tt[i:i+len(t)] for i in xrange(0, len(t)) ]

def bwm(t):
return sorted(rotations(t))

def bwtViaBwm(t):
''' Given T, returns BWT(T) by way of the BWM '''

return ''.join(map(lambda x: x[-1], bwm(t)))

def rankBwt(bw):
''' Given BWT string bw, return parallel list of B-ranks. Also
returns tots: map from character to # times it appears. '''
tots = dict()
ranks = []
for c in bw:
if c not in tots: tots[c] = 0
ranks.append(tots[c])

tots[c] += 1
return ranks, tots
def firstCol(tots):
''' Return map from character to the range of rows prefixed by
the character. '''
first = {}
totc = 0
for c, count in sorted(tots.iteritems()):
first[c] = (totc, totc + count)
totc += count

return first

def reverseBwt(bw):
''' Make T from BWT(T) '''
ranks, tots = rankBwt(bw)
first = firstCol(tots)
rowi = 0 # start in first row
t = '$' # start with rightmost character
while bw[rowi] != '$':
c = bw[rowi]

t = c + t # prepend to answer
# jump to row that starts with c of same rank
rowi = first[c][0] + ranks[rowi]
return t



def suffixArray(s):
satups = sorted([(s[i:], i) for i in xrange(0, len(s))])
print satups

return map(lambda x: x[1], satups)

def bwtViaSa(t):
# Given T, returns BWT(T) by way of the suffix array
bw = []
for si in suffixArray(t):
if si == 0:
bw.append('$')
else:
bw.append(t[si-1])

return ''.join(bw) # return string-ized version of list bw



def readfile(sd):
s=""
with open(sd,'r') as myfile:
s =myfile.read()
return s.rstrip('\n')
def writefile(sd,N):

with open(sd, "wb") as sink:
sink.write(''.join(random.choice(string.ascii_uppercase + string.digits) for _ in xrange(N)))
sink.write('$')
return



def main():
data=readfile('inp')
b=bwtViaBwm(data)

ranks,tots = rankBwt(b)
print "Input stream = "+ data
print "BWT = " + bwtViaSa(data)
print '\n'.join(bwm(data))
print ("Lc ranking:")
print zip(b,ranks)

fc=[x[0] for x in bwm(data)]
fc= ''.join(fc)
print ("First column="+ fc)

ranks,tots = rankBwt(fc)
print("Fc ranking:")
print zip(fc,ranks)

print reverseBwt(bwtViaSa(data))

if __name__=='__main__':
main()



And this is the code form the fm.py which i call it through pypy:



import bwt
import sys
from collections import Counter

def build_FM(fname):
stream=bwt.readfile(fname)
#print bwt.suffixArray(stream)
b=bwt.bwtViaBwm(stream)

ranks,tots = bwt.rankBwt(b)
lc=zip(b,ranks)
fc=[x[0] for x in bwt.bwm(stream)]
fc= ''.join(fc)
fc= zip(fc,ranks)
#print lc,fc


def main():
fname= sys.argv[1]

build_FM(fname)
return


if __name__=='__main__':
main()

No comments:

Post a Comment

c++ - Does curly brackets matter for empty constructor?

Those brackets declare an empty, inline constructor. In that case, with them, the constructor does exist, it merely does nothing more than t...