Python list index out of range regarding massive iterations only
I have a big text file,
where each line is according to a define syntax (which been handled with a
regex).
I'm getting the following error:
remainder = '{} {} '.format(*pieces[-1])
IndexError: list index out of range
on this code:
def open_delimited(filename, args):
with open(filename, args, encoding="UTF-16") as infile:
chunksize = 10000
remainder = ''
for chunk in iter(lambda: infile.read(chunksize), ''):
pieces =
re.findall(r"(\d+)\s+(\d+_\d+)\s+(((post)\s+1)|((\d+_\d+_\d+)\s+(comment)\s+2))(.+)(\d{4}-\d{2}-\d{2})\s+(\d{2}:\d{2}:\d{2})(.*)",
remainder + chunk, re.IGNORECASE)
for piece in pieces[:-1]:
yield piece
remainder = '{} {} '.format(*pieces[-1])
if remainder:
yield remainder
filename = 'data/AllData_2000001_3000000.txt'
for chunk in open_delimited(filename, 'r'):
for j in range(len(chunk)):
print(chunk[j])
When I'm limiting the number of iterations, the code work fine.
i = 0
for chunk in open_delimited(filename, 'r'):
if (i <= 1000):
for j in range(len(chunk)):
print(chunk[j])
else:
break
i += 1
No comments:
Post a Comment