Friday, December 6, 2013

Converting Python OrderedDict To Bencoded Binary File

This post polishes up the two previous posts.  The idea here was to be able to open a torrent file and make a few modifications.  I had a torrent go down on me and hadn't download all the files.  I thought I would try removing the tracker and 'private' status and try to connect via DHT.  So far this hasn't been successful.  I think other peers would need to also enable DHT for their torrents :/ Regardless, it seemed natural to be able to create a file from the python ordered dictionry.  The python ordered dictionary is described in this post

from collections import OrderedDict
...
def PyToBEncode(pydict):
    bytestringlist = []
    def writed(dic):
        bytestringlist.append(b'd')
        for key in dic:
            strlen = len(key)
            bytestringlist.append(bytes(str(strlen), encoding='utf8'))
            bytestringlist.append(b':')
            bytestringlist.append(bytes(key))
            value = dic[key]
            if type(value) == type(OrderedDict()):
                writed(value)
            elif type(value) == type([]):
                writel(value)
            elif type(value) == type(int()):
                writei(value)
            else:
                bytestringlist.append(bytes(str(len(value)), encoding='utf8'))
                bytestringlist.append(b':')
                bytestringlist.append(bytes(value))
        bytestringlist.append(b'e')   
    def writel(lis):
        bytestringlist.append(b'l')
        for el in lis:
            if type(el) == type(OrderedDict()):
                writed(el)
            elif type(el) == type([]):
                writel(el)
            elif type(el) == type(int()):
                writei(el)
            else:
                bytestringlist.append(bytes(str(len(el)), encoding='utf8'))
                bytestringlist.append(b':')
                bytestringlist.append(bytes(el))
        bytestringlist.append(b'e')
    def writei(num):
        bytestringlist.append(b'i')
        bytestringlist.append(bytes(str(num), encoding='utf8'))
        bytestringlist.append(b'e')
    writed(pydict)
    return b''.join(bytestringlist)
   
Here is a graphic of creating a new torrent.  I am still waiting to make a connection.

Wednesday, December 4, 2013

Matching SHA1 Hashes to Torrent Hashlist

This uses the python ordered dictionary from the previous post.  Before starting the check, navigate to the directory holding the files downloaded via the torrent.  This utility is useful if you have the entire download as it does not utilize .dat files to fill in piece edges.  Running this function will return a zero indexed list of binary values that indicate pieces sha1 match status.


def HashCheck(pydict):
    '''
    This compares the hashlist in the torrent to the hashes of files in the correct directory
    Ultimately I will need some kind of db for partial files that have not been downloaded.
    '''
   
    def CheckInfo(pydict):
        '''returns list with tuples of form (filesize, relativefilepath) and last element is piece length'''
        if b'files' in pydict[b'info']:
            checkinfo = [(i[b'length'], b'/'.join(i[b'path']))  for i in pydict[b'info'][b'files'] ]
            checkinfo.append(pydict[b'info'][b'piece length'])
        else:
            checkinfo = [ (pydict[b'info'][b'length'], pydict[b'info'][b'name']), pydict[b'info'][b'piece length'] ]
        return checkinfo
   
    infolist = CheckInfo(pydict)
    remaining = infolist[-1]
    piecelist = []
    hashdex = 0
    h = hashlib.sha1()
    for fileinfo in infolist[:-1]:
        if fileinfo[0] < remaining:
            remaining -= fileinfo[0]
            f = open(fileinfo[1].decode('utf8'), 'rb')
            h.update(f.read())
            f.close()
        elif fileinfo[0] == remaining:
            remaining = infolist[-1]
            f = open(fileinfo[1].decode('utf8'), 'rb')
            h.update(f.read())
            f.close()
            hlisthash = ''.join([ell[-2:] for ell in ['0' + str(hex(el))[2:] for el in list(pydict[b'info'][b'pieces'][hashdex*20 : (hashdex + 1)*20])] ])
            print(hlisthash, h.hexdigest())
            if hlisthash == h.hexdigest():
                print('Hashes Match')
                piecelist.append(True)
            else:
                print('Bad Piece', hashdex+1)
                piecelist.append(False)
            hashdex += 1
            h = hashlib.sha1()
        elif fileinfo[0] > remaining:
            tohash = fileinfo[0]
            f = open(fileinfo[1].decode('utf8'), 'rb')
            while tohash >= remaining:
                tohash -= remaining
                h.update(f.read(remaining))
                remaining = infolist[-1]
                hlisthash = ''.join([ell[-2:] for ell in ['0' + str(hex(el))[2:] for el in list(pydict[b'info'][b'pieces'][hashdex*20 : (hashdex + 1)*20])] ])
                print(hlisthash, h.hexdigest())
                if hlisthash == h.hexdigest():
                    print('Hashes Match')
                    piecelist.append(True)
                else:
                    print('Bad Piece', hashdex+1)
                    piecelist.append(False)
                hashdex += 1
                h = hashlib.sha1()
            if tohash:
                remaining = infolist[-1] - tohash
                h.update(f.read())
               #todo: if last file, h.hexdigest() is last hash   
    return piecelist



Converting Bencode to Python Data Structure

This post is motivated by a desire for a custom Bittorrent client.  This code will generate a python data structure that mirrors that of the bencoded data using python's ordered dictionary and lists.  I have tested this on traditional .torrent files where the bencoded data is a human readable byte string.  Other formats, such as some (all?) DHT torrents, are not supported.

from collections import OrderedDict

def BEncodeToPy(bytestring):
    '''
    #Convertes Bencoded bytestring into python data structure, mirroring data types (doesn't work with magnet torrents)
    #Start with the d.
    #Next there will be a string that is the dictionaries key
    #Then the next entry is one of: dic, list, integer, or string.  This is the value for the key
    #Handle or call appropriate handler recursively
    '''
    dex = 0
    def OpenList():
        nonlocal dex
        l = []
        dex += 1
        while chr(bytestring[dex]) != 'e':
            if chr(bytestring[dex]) == 'd':
                l.append(OpenDict())
            elif chr(bytestring[dex]) == 'l':
                l.append(OpenList())
            elif chr(bytestring[dex]) == 'i':
                dex += 1
                l.append(GetInt())
            elif chr(bytestring[dex]).isdigit():
                strlen = GetStrLen()
                l.append(bytestring[dex: dex + strlen])
                dex += strlen
        dex += 1    #skip over e
        return l
    def OpenDict():
        nonlocal dex
        d = OrderedDict()
        dex += 1 #GetStrLen should start at beginning of integer
        while chr(bytestring[dex]) != 'e':
            strlen = GetStrLen()
            key = bytestring[dex: dex + strlen]
            dex += strlen
            if chr(bytestring[dex]) == 'd':
                value = OpenDict()
            elif chr(bytestring[dex]) == 'l':
                value = OpenList()
            elif chr(bytestring[dex]) == 'i':
                dex += 1    #get int will start with first digit
                value = GetInt()
            elif chr(bytestring[dex]).isdigit():
                strlen = GetStrLen()
                value = bytestring[dex: dex + strlen]
                dex += strlen
            d[key] = value
        dex += 1    #skip over e
        return d
    def GetStrLen():
        nonlocal dex
        chardex = bytestring.find(b':', dex)
        num = int(bytestring[dex:chardex])
        dex = chardex + 1
        return num
    def GetInt():
        nonlocal dex
        endintdex = bytestring.find(b'e', dex)
        num = int(bytestring[dex:endintdex])
        dex = endintdex + 1
        return num
    try:
        return OpenDict()
    except Exception as e:
        print('could not parse')

This graphic compares a binary view of a torrent file with the contents converted to a ordered dictionary in python.