This post is motivated by a desire for a custom Bittorrent client. This code will generate a python data structure that mirrors that of the bencoded data using python's ordered dictionary and lists. I have tested this on traditional .torrent files where the bencoded data is a human readable byte string. Other formats, such as some (all?) DHT torrents, are not supported.
from collections import OrderedDict
def BEncodeToPy(bytestring):
'''
#Convertes Bencoded bytestring into python data structure, mirroring data types (doesn't work with magnet torrents)
#Start with the d.
#Next there will be a string that is the dictionaries key
#Then the next entry is one of: dic, list, integer, or string. This is the value for the key
#Handle or call appropriate handler recursively
'''
dex = 0
def OpenList():
nonlocal dex
l = []
dex += 1
while chr(bytestring[dex]) != 'e':
if chr(bytestring[dex]) == 'd':
l.append(OpenDict())
elif chr(bytestring[dex]) == 'l':
l.append(OpenList())
elif chr(bytestring[dex]) == 'i':
dex += 1
l.append(GetInt())
elif chr(bytestring[dex]).isdigit():
strlen = GetStrLen()
l.append(bytestring[dex: dex + strlen])
dex += strlen
dex += 1 #skip over e
return l
def OpenDict():
nonlocal dex
d = OrderedDict()
dex += 1 #GetStrLen should start at beginning of integer
while chr(bytestring[dex]) != 'e':
strlen = GetStrLen()
key = bytestring[dex: dex + strlen]
dex += strlen
if chr(bytestring[dex]) == 'd':
value = OpenDict()
elif chr(bytestring[dex]) == 'l':
value = OpenList()
elif chr(bytestring[dex]) == 'i':
dex += 1 #get int will start with first digit
value = GetInt()
elif chr(bytestring[dex]).isdigit():
strlen = GetStrLen()
value = bytestring[dex: dex + strlen]
dex += strlen
d[key] = value
dex += 1 #skip over e
return d
def GetStrLen():
nonlocal dex
chardex = bytestring.find(b':', dex)
num = int(bytestring[dex:chardex])
dex = chardex + 1
return num
def GetInt():
nonlocal dex
endintdex = bytestring.find(b'e', dex)
num = int(bytestring[dex:endintdex])
dex = endintdex + 1
return num
try:
return OpenDict()
except Exception as e:
print('could not parse')
This graphic compares a binary view of a torrent file with the contents converted to a ordered dictionary in python.

No comments:
Post a Comment