i write this python to read dataset into panda data frame but im getting NameError: name 'true' is not defined. Below is the code:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Feb 25 23:57:39 2020
@author: OLATUNJI
"""
import pandas as pd
import gzip
def parse(path):
g = gzip.open(path, 'rb')
for l in g:
yield eval(l)
def getDF(path):
i = 0
df = {}
for d in parse(path):
df[i] = d
i += 1
return pd.DataFrame.from_dict(df, orient='index')
fashion_data = getDF('Amazon_Fashion.json.gz')
beauty_data = getDF('All_Beauty.json.gz')
appliances_data = getDF('Appliances.json.gz')
arts_data = getDF('Arts_Crafts_and_Sewing.json.gz')
automotive_data = getDF('Automotive.json.gz')
frames = [fashion_data.reviewText[:1000],beauty_data.reviewText[:1000], \
appliances_data.reviewText[:1000],arts_data.reviewText[:1000],\
automotive_data.reviewText[:1000]]
complete_data = pd.concat(frames, keys = ['video','auto','musical','office','patio'])
labels = []
for i in range(5):
labels += [i] * 1000
fashion_data,beauty_data,appliances_data,arts_data,automotive_data = [],[],[],[],[]
print(complete_data.loc['Fashion'])