Python 数据处理






from sklearn.preprocessing import MinMaxScaler# -*- coding: utf-8 -*-

import pandas

data = pandas.read_csv('D:\\PDM\\6.1\\data1.csv')


#Min-Max标准化

scaler = MinMaxScaler()

data['标准化累计票房'] = scaler.fit_transform(data['累计票房'])

data['标准化豆瓣评分'] = scaler.fit_transform(data['豆瓣评分'])


#Z-Score标准化

from sklearn.preprocessing import scale

data['标准化累计票房'] = scale(data['累计票房'])

data['标准化豆瓣评分'] = scale(data['豆瓣评分'])

#Normalizer归一化

from sklearn.preprocessing import Normalizer

scaler = Normalizer()

data['归一化累计票房'] = scaler.fit_transform(

data['累计票房']

)[0]

data['归一化豆瓣评分'] = scaler.fit_transform(

data['豆瓣评分']

)[0]



# -*- coding: utf-8 -*-

import pandas

data = pandas.read_csv('D:\\PDM\\6.1\\data2.csv')


data['症状'] = data['症状'].astype('category')

dummiesData = pandas.get_dummies(

data,

columns=['症状'],

prefix=['症状'],

prefix_sep="_"

)



import pandas

data = pandas.read_csv('D:\\PDM\\6.1\\data3.csv')


from sklearn.preprocessing import Imputer;

#'mean', 'median', 'most_frequent'

imputer = Imputer(strategy='mean')

imputer.fit_transform(data[['累计票房']])

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容