Monday, February 8, 2010

Reading files into a dictionary in python

This one i use all the time to read massive datasets, it is equivalent to what you can do in perl with the @

import os,sys
import linecache
import string

if __name__ == "__main__":

#print len(sys.argv)
if (len(sys.argv) == 2): #arg 0 = name, arg 1 = argument
file=sys.argv[1] #get the pointer to file

rfp = open('results.txt',"w") #open a file to print results

arr = linecache.getlines(file) #get the file into the dictionary called arr
lines = len(arr)

for i in xrange(lines):
arr[i]= arr[i].strip().split() #by default split by \t, but can be changed to whatever symbol

for i in xrange(lines): #remember the line number will be the key to access the list of fields (columns if spreedsheet)
print arr[i]
print arr[i][0], arr[i][n]