import numpy as np import matplotlib.pyplot as plt import pandas as pd # Importing the dataset dataset = pd.read_csv(‘Churn_Modelling.csv‘) #导入该csv文件 X = dataset.iloc[:, 3:13].values #将该表格的所有列以及3到12行的值取出来。作为一个二维数组 y = dataset.iloc[:, 13].values #将该表格的所有列以及第十三行取出来。作为一个一维数组。从0开始数 # Encoding categorical data from sklearn.preprocessing import LabelEncoder, OneHotEncoder #从sklearn.preprocessing 中导入LabelEncoder,和OneHotEncoder labelencoder_X_1 = LabelEncoder() #新建一个实例labelencoder_X_1 X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1]) #重新赋值X的第一列(0开始)。用labelencoder实例中的fit_transform方法 labelencoder_X_2 = LabelEncoder() #同样赋值第二列。fit_transform() 对X【:1】 列进行赋值。0,1,2.。。。 X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2]) #第二列是性别。0赋值给femal,1赋值给male onehotencoder = OneHotEncoder(categorical_features = [1]) X = onehotencoder.fit_transform(X).toarray() X = X[:, 1:] # Splitting the dataset into the Training set and Test set from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) # Feature Scaling from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test)
Encode labels with value between 0 and n_classes-1.可以理解将数据做一个标签。同样重复的用一个。
本例子中Geography是法国 德国西班牙。 他就将0,1,2分别赋值这个
Fit label encoder and return encoded labels
时间: 2024-10-05 05:07:16