Wednesday, December 4, 2013

Converting Bencode to Python Data Structure

This post is motivated by a desire for a custom Bittorrent client.  This code will generate a python data structure that mirrors that of the bencoded data using python's ordered dictionary and lists.  I have tested this on traditional .torrent files where the bencoded data is a human readable byte string.  Other formats, such as some (all?) DHT torrents, are not supported.

from collections import OrderedDict

def BEncodeToPy(bytestring):
    '''
    #Convertes Bencoded bytestring into python data structure, mirroring data types (doesn't work with magnet torrents)
    #Start with the d.
    #Next there will be a string that is the dictionaries key
    #Then the next entry is one of: dic, list, integer, or string.  This is the value for the key
    #Handle or call appropriate handler recursively
    '''
    dex = 0
    def OpenList():
        nonlocal dex
        l = []
        dex += 1
        while chr(bytestring[dex]) != 'e':
            if chr(bytestring[dex]) == 'd':
                l.append(OpenDict())
            elif chr(bytestring[dex]) == 'l':
                l.append(OpenList())
            elif chr(bytestring[dex]) == 'i':
                dex += 1
                l.append(GetInt())
            elif chr(bytestring[dex]).isdigit():
                strlen = GetStrLen()
                l.append(bytestring[dex: dex + strlen])
                dex += strlen
        dex += 1    #skip over e
        return l
    def OpenDict():
        nonlocal dex
        d = OrderedDict()
        dex += 1 #GetStrLen should start at beginning of integer
        while chr(bytestring[dex]) != 'e':
            strlen = GetStrLen()
            key = bytestring[dex: dex + strlen]
            dex += strlen
            if chr(bytestring[dex]) == 'd':
                value = OpenDict()
            elif chr(bytestring[dex]) == 'l':
                value = OpenList()
            elif chr(bytestring[dex]) == 'i':
                dex += 1    #get int will start with first digit
                value = GetInt()
            elif chr(bytestring[dex]).isdigit():
                strlen = GetStrLen()
                value = bytestring[dex: dex + strlen]
                dex += strlen
            d[key] = value
        dex += 1    #skip over e
        return d
    def GetStrLen():
        nonlocal dex
        chardex = bytestring.find(b':', dex)
        num = int(bytestring[dex:chardex])
        dex = chardex + 1
        return num
    def GetInt():
        nonlocal dex
        endintdex = bytestring.find(b'e', dex)
        num = int(bytestring[dex:endintdex])
        dex = endintdex + 1
        return num
    try:
        return OpenDict()
    except Exception as e:
        print('could not parse')

This graphic compares a binary view of a torrent file with the contents converted to a ordered dictionary in python.


No comments:

Post a Comment