#! /usr/bin/python# -*- coding:utf-8 -*-# 将不规则的JSON转化为规则的DataFrame=缺省值给默认值importpandasaspdimportjsonfindata_list=[]count=0filename="C:\\Usein.json"withopen(filename,mode='r',encoding='utf-8')asfileobj:forlineinfileobj.readlines():sdata=json.loads(line)event=dict()# 利用dict内置的get(key[,default])方法,如果key存在,则返回其value,否则返回defaultcount=count+1print(count)tmp=(sdata.get("id","not-exist"),sdata.get("a",0),sdata.get("b",0))findata_list.append(tmp)dfquery=pd.DataFrame(findata_list)# 迭代版本二# 列表推导式findata_list2=[]count2=0withopen(filename,mode='r',encoding='utf-8')asfileobj:sdata2=[json.loads(line)forlineinfileobj.readlines()]foriinrange(len(sdata2)):count2=count2+1print(count2)tmp=(sdata2[i].get("id","not-exist"),sdata2[i].get("a",0),sdata2[i].get("b",0))findata_list2.append(tmp)dfquery=pd.DataFrame(findata_list2)#filenameDF="C:\\UseinDF.csv"print("ready to write data to File")dfquery.to_csv(filenameDF,index=False,header=False)#读取数据print("ready to read data ")DataDF=pd.read_csv(filenameDF,sep=',',names=['id','a','b'],header=0)#产生新列## reindex()方法可以添加一列或多列数据,并且可以指定列的位置,也可以对原先存在的列进行重排ratio=pd.DataFrame((DataDF['a']-DataDF['b'])/DataDF['a'],columns=['ratio'])dataDiff=pd.DataFrame((DataDF['a']-DataDF['b']),columns=['dataDiff'])result=pd.concat([DataDF,ratio,dataDiff],axis=1)#过滤和筛选print("filter the data")#resData = result[result["ratio"]<0.1 & result["dataDiff"]<10 & result["a"]>0].reset_index(drop=True)resData=result.query('ratio<0.1 & dataDiff<10 & a> 0')# 保存数据Dataout="C:\\UserlastData.csv"resData.to_csv(Dataout,index=False,header=True)