課程
                    
                        /后端開發(fā)
                        
                            /Python
                        
                        /Python實現(xiàn)線性回歸

自己跑通的代碼pyhon3.7，包括生成數(shù)據(jù)文件，沒問題。

#?生成數(shù)據(jù)文件
import?random


#?函數(shù)前要有空白兩行?否則報錯
def?f(X1,?X2,?X3):
????return?0.65?*?X1?+?0.70?*?X2?-?0.55?*?X3?+?1.95


#?函數(shù)前要有空白兩行?否則報錯
def?produce():
????c?=?0
????with?open('data.csv',?'w')?as?file:??#?直接寫文件名去找到相對路徑，即當(dāng)前項目所在路徑中創(chuàng)建名為data的文件?C:\Users\Pen\PycharmProjects\demo
????????file.write('0,Y,X1,X2,X3\n')
????????for?i?in?range(200):??#?通過縮進表示循環(huán)語句的范圍
????????????random.seed()??#?設(shè)置隨機種子數(shù)seed，默認(rèn)參數(shù)seed()下?每次生成的隨機數(shù)都不同
????????????#?random.seed(2)?????#?設(shè)定seed后的數(shù)字，用random.random()生成的隨機數(shù)將唯一，如設(shè)定random.seed(2)?后面將會輸出0.95?，設(shè)定random.seed(3)后將會輸出0.88
????????????print(random.random())
????????????x1?=?random.random()?*?10
????????????x2?=?random.random()?*?10
????????????x3?=?random.random()?*?10
????????????y?=?f(x1,?x2,?x3)
????????????c?=?c?+?1
????????????try:
????????????????file.write(str(c)?+?','?+?str(y)?+?','?+?str(x1)?+?','?+?str(x2)?+?','?+?str(x3)?+?'\n')
????????????except?IOError:
????????????????print('Write?Error')


#?空白兩行
if?__name__?==?'__main__':
????produce()
????
????
import?numpy?as?np
from?numpy.linalg?import?inv
from?numpy?import?dot
from?numpy?import?mat

import?pandas?as?pd

dataSet?=?pd.read_csv('data.csv')??#?讀取文件
print(dataSet)
temp?=?dataSet.iloc[:,?2:5]??#?輸出?2?3?4?列
#?print(temp)
temp['x0']?=?1??#?temp的第0?1?2列是原來的??x1?x2?x3?x0
X?=?temp.iloc[:,?[3,?0,?1,?2]]??#?將上述的?x1?x2?x3?x0?變?yōu)?x0?x1?x2?x3
Y?=?dataSet.iloc[:,?1].values.reshape(200,?1)??#?輸出第0列
#?Z?=?dataSet.iloc[:,?2]??#?輸出第1列
#?print(X)
#?print(Y)
#?print(Z)

#?1.用公式法直接求theta??缺點：比較耗資源
theta?=?dot(dot(inv(dot(X.T,?X)),?X.T),?Y)
print(theta.reshape(4,?1))

#?2.用梯度下降法求theta??記得用temp去存變化值實現(xiàn)迭代
theta?=?np.array([1.,?1.,?1.,?1.]).reshape(4,?1)???#?將1行4列的矩陣?轉(zhuǎn)?4行1列
alpha?=?0.02??#?learning?rate?學(xué)習(xí)率，越小越慢。太大會導(dǎo)致過了極值點
temp?=?theta
#?X0_r?=?X.iloc[:,?0]
X0?=?X.iloc[:,?0].values.reshape(200,?1)??#?將csv文件的第0列取出，構(gòu)建一個200行1列的矩陣
#?print(X0_r)??是csv的列?默認(rèn)帶序號
#?print(X0)????是矩陣?去掉默認(rèn)
X1?=?X.iloc[:,?1].values.reshape(200,?1)??#?X1表示表格中第1列的數(shù)據(jù)構(gòu)成200行1列的矩陣
X2?=?X.iloc[:,?2].values.reshape(200,?1)??#?X2表示表格中第2列的數(shù)據(jù)構(gòu)成200行1列的矩陣
X3?=?X.iloc[:,?3].values.reshape(200,?1)??#?X3表示表格中第3列的數(shù)據(jù)構(gòu)成200行1列的矩陣
for?i?in?range(10000):
????temp[0]?=?theta[0]?+?alpha?*?np.sum((Y?-?dot(X,?theta))?*?X0)?/?200.
????temp[1]?=?theta[1]?+?alpha?*?np.sum((Y?-?dot(X,?theta))?*?X1)?/?200.
????temp[2]?=?theta[2]?+?alpha?*?np.sum((Y?-?dot(X,?theta))?*?X2)?/?200.
????temp[3]?=?theta[3]?+?alpha?*?np.sum((Y?-?dot(X,?theta))?*?X3)?/?200.
????theta?=?temp
print(theta)