核心数学原理
贝叶斯定理
P(y|X) = [P(X|y)P(y)] / P(X)
高斯分布
P(x_i|y) = (1/√(2πσ²))e^(-(x_i-μ)²/(2σ²))
Python实现示例
# 高斯朴素贝叶斯分类器实现
import numpy as np
class GaussianNB:
def fit(self, X, y):
self.classes = np.unique(y)
self.parameters = []
for i in self.classes:
X_class = X[y == i]
self.parameters.append({
'mean': X_class.mean(axis=0),
'var': X_class.var(axis=0)
})
def _pdf(self, X, mean, var):
eps = 1e-4
return np.exp(-(X - mean)**2 / (2 * var + eps)) / np.sqrt(2 * np.pi * var + eps)
def predict(self, X):
posteriors = []
for params in self.parameters:
likelihood = np.sum(np.log(self._pdf(X, params['mean'], params['var'])), axis=1)
posteriors.append(likelihood)
return self.classes[np.argmax(posteriors, axis=0)]