pcaを使わないでpcaを実装

NISHIO Hirokazu [Translate]
PCAを使わないでPCAを実装
pythonimport numpy as np
# 適当なデータを用意する 5次元6件のデータを用意した
data = np.random.random((6, 5))
# 平均を引く
d2 = data - data.mean(axis=0)
# 共分散行列を作る
cov = np.einsum("ij,ik->jk", d2, d2)

# 共分散行列はデータの次元の正方行列
>>> cov.shape
(5, 5)

# 共分散行列を固有値分解する
>>> np.linalg.eig(cov)
(array([1.34012189, 0.77663231, 0.58201555, 0.08694709, 0.03991043]),
 array([[ 0.67869952,  0.35340645, -0.37436676,  0.50602025,  0.13514392],
        [ 0.45792096,  0.20519401,  0.77490418, -0.08403411, -0.37505412],
        [-0.46695028,  0.17178326,  0.04681577,  0.68316922, -0.53248104],
        [-0.26671759,  0.89310368, -0.08178549, -0.34559589,  0.07142937],
        [-0.20123254,  0.07652205,  0.50049221,  0.38823327,  0.74325791]]))

# 比較対象のPCA実装
from sklearn.decomposition import PCA
# データにフィットさせる
m = PCA().fit(data)

# 主成分軸を表示 
>>> m.components_
array([[ 0.67869952,  0.45792096, -0.46695028, -0.26671759, -0.20123254],
        [ 0.35340645,  0.20519401,  0.17178326,  0.89310368,  0.07652205],
        [-0.37436676,  0.77490418,  0.04681577, -0.08178549,  0.50049221],
        [ 0.50602025, -0.08403411,  0.68316922, -0.34559589,  0.38823327],
        [ 0.13514392, -0.37505412, -0.53248104,  0.07142937,  0.74325791]])

# 共分散行列の固有値分解の結果と、PCAの主成分軸を比較すると、差がほぼゼロ
>>> np.linalg.eig(cov)[1] - m.components_.T
array([[-4.44089210e-16,  1.11022302e-16,  4.99600361e-16,
         1.11022302e-16,  1.66533454e-16],
       [-2.22044605e-16, -6.66133815e-16,  4.44089210e-16,
        -3.88578059e-16, -1.11022302e-16],
       [ 1.11022302e-16, -3.05311332e-16, -2.56739074e-16,
        -3.33066907e-16,  3.33066907e-16],
       [-2.22044605e-16,  0.00000000e+00,  2.08166817e-15,
         3.33066907e-16, -3.60822483e-16],
       [ 2.22044605e-16, -1.02695630e-15, -1.11022302e-16,
         6.66133815e-16,  0.00000000e+00]])

# 特異値分類(SVD)との関係(軸の一部の符号が反転していることを除けば同じもの)
>>> -np.linalg.svd(d2)[2]
array([[ 0.67869952,  0.45792096, -0.46695028, -0.26671759, -0.20123254],
       [ 0.35340645,  0.20519401,  0.17178326,  0.89310368,  0.07652205],
       [-0.37436676,  0.77490418,  0.04681577, -0.08178549,  0.50049221],
       [-0.50602025,  0.08403411, -0.68316922,  0.34559589, -0.38823327],
       [-0.13514392,  0.37505412,  0.53248104, -0.07142937, -0.74325791]])

>>> m.components_
array([[ 0.67869952,  0.45792096, -0.46695028, -0.26671759, -0.20123254],
       [ 0.35340645,  0.20519401,  0.17178326,  0.89310368,  0.07652205],
       [-0.37436676,  0.77490418,  0.04681577, -0.08178549,  0.50049221],
       [ 0.50602025, -0.08403411,  0.68316922, -0.34559589,  0.38823327],
       [ 0.13514392, -0.37505412, -0.53248104,  0.07142937,  0.74325791]])

#主成分分析 #特異値分解 #共分散行列 #PCA

共分散行列を作る
共分散行列はデータの次元の正方行列
共分散行列を固有値分解する
PCAの主成分軸を表示 
共分散行列の固有値分解の結果と、PCAの主成分軸を比較すると、差がほぼゼロ
特異値分類(SVD)との関係(軸の一部の符号が反転していることを除けば同じもの)
"Engineer's way of creating knowledge" the English version of my book is now available on [Engineer's way of creating knowledge]
(C)NISHIO Hirokazu / Converted from [Scrapbox] at 11/23/2025, 4:48:59 PM[Edit]
Related Pages