【python 数据标准化】利用sklearn做标准化

# -*- coding: utf-8 -*-
from __future__ import division
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import pandas as pd
from sklearn import preprocessing
import numpy as np
# pandas读取 Excel
data = pd.read_excel('F:/simple_data.xls', 'Sheet1')


data['L']=(data["end_time"]-data["login_time"]).values

new_L=[]
for i in range(0,len(data)):
    # print round(int(str(data.iloc[i,6]).replace(" days 00:00:00",""))/30,2)
    new_L.append(round(int(str(data.iloc[i,6]).replace(" days 00:00:00",""))/30,2))


data1=pd.DataFrame({"L":new_L,"R":data['recent_interval']/30,"F":data['flight_count'],"M":data['mileage_sum'],"C":data['avg_discount']})

print data1


# 标准化(Z-Score)(X-mean)/std
data1_scaled = preprocessing.scale(data1)
data2=pd.DataFrame(data1_scaled,columns=['C','F','L','M','R'])
print data2


# ######pandas写出excel
data1.to_excel('E:/data1.xlsx', index=False)

data2.to_excel('E:/data2.xlsx', index=False)
已标记关键词 清除标记
©️2020 CSDN 皮肤主题: 编程工作室 设计师:CSDN官方博客 返回首页