What is the enconding of the names in nameList?
Since I have spent a lot of time debugging this, I`m sharing it.
If someone knows better way to do so, please speak:
from csv import reader
def loadNameList(path):
with open(path, 'r') as f:
data = []
for row in reader(f):
data.append(unicode(''.join(row), 'utf-8').lower().strip())
return data
def loadTestUsers(path):
with open(path, 'r') as f:
data = []
for line in f:
tokens = line.split('\t')
userID = int(tokens[0])
data.append(userID)
return data
def loadTrain(path):
with open(path) as f:
r = reader(f, delimiter='\t')
data = []
for userID, activity, name, _ in r:
# print userID, activity, name, timestamp
data.append([int(userID), activity, unicode(name, 'utf-8').lower().strip()])
return data