原项目地址:https://github.com/MLEveryday/100-Days-Of-ML-Code/blob/master/Code/Day%201_Data_Preprocessing.md
(实际上50多天原作者就弃坑了
-目录-
[TOC]
Day1:数据预处理

1 2 3 4 5
| import numpy as np import pandas as pd import sklearn.preprocessing import dataset=pd.read_csv("C:/Users/匿了匿了/Documents/GitHub/100-Days-Of-ML-Code/datasets/Data.csv")
|
查看dataset
1 2 3 4 5
| x=dataset.iloc[:,:-1].values y=dataset.iloc[:,3].values
|
1 2 3 4 5
| from sklearn.impute import SimpleImputer imputer=SimpleImputer(missing_values=np.nan,strategy='mean') imputer=imputer.fit(x[:,1:3]) x[:,1:3]=imputer.transform(x[:,1:3])
|
1 2 3 4
| from sklearn.preprocessing import LabelEncoder,OneHotEncoder labelencoder_x=LabelEncoder() x[:,0]=labelencoder_x.fit_transform(x[:,0])
|
1 2 3 4 5 6 7
| onehotencoder=OneHotEncoder(categories='auto') from sklearn.compose import ColumnTransformer t=ColumnTransformer([('encoder',OneHotEncoder(),[0])],remainder='passthrough') temp=np.array(t.fit_transform(x)) labelencoder_y=LabelEncoder() y=labelencoder_y.fit_transform(y)
|
1 2 3
| from sklearn.model_selection import train_test_split x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)
|
1 2 3 4 5
| from sklearn.preprocessing import StandardScaler sc_x=StandardScaler() x_train=sc_x.fit_transform(x_train) x_test=sc_x.transform(x_test)
|
Day2:简单线性回归模型
1 2 3 4 5 6 7 8 9 10 11 12
| import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split
plt.ion() dataset=pd.read_csv("C:/Users/匿了匿了/Documents/GitHub/100-Days-Of-ML-Code/datasets/studentscores.csv") x=dataset.iloc[:,:1].values y=dataset.iloc[:,1].values x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=1/4,random_state=0)
|
1 2 3 4
| from sklearn.linear_model import LinearRegression regressor=LinearRegression() regressor=regressor.fit(x_train,y_train)
|

1 2
| y_pred=regressor.predict(x_test)
|

1 2 3 4
|
plt.scatter(x_train,y_train,color='red') plt.plot(x_train,regressor.predict(x_train),color='blue')
|
1 2 3
| plt.scatter(x_test,y_test,color='pink') plt.plot(x_test,regressor.predict(x_test),color='green')
|
Day3:多元线性回归

1 2 3 4 5 6 7 8
|
import pandas as pd import numpy as np
dataset=pd.read_csv('C:/Users/匿了匿了/Documents/GitHub/100-Days-Of-ML-Code/datasets/50_Startups.csv') x = dataset.iloc[ : , :-1].values y = dataset.iloc[ : , 4 ].values
|
1 2 3 4 5 6 7 8
| from sklearn.preprocessing import LabelEncoder,OneHotEncoder from sklearn.compose import ColumnTransformer labelencoder=LabelEncoder() x[:,3]=labelencoder.fit_transform(x[:,3]) onehotencoder=OneHotEncoder(categories='auto') t=ColumnTransformer([('encoder',OneHotEncoder(),[3])],remainder='passthrough') x=np.array(t.fit_transform(x))
|
1 2
| x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)
|
1 2 3 4 5
| from sklearn.linear_model import LinearRegression regressor=LinearRegression() regressor.fit(x_train,y_train) y_pred=regressor.predict(x_test)
|
1 2
| y_pred=regressor.predict(x_test)
|