��Python��ѧϰר��쳣��㷨��Python�е�ʵ��

2024-04-30 57

��Ȩ

��Ȩ��

��ɰ��ʵ��ע��û��Է��ף��Ȩ��ԭ��У��ƿ��ӵ��Ȩ��಻�е��Ӧ��Ρ��鿴�� ƿ��û��Э�� ƿ��֪ʶ��Ȩ��ָ��ֱ��ӳ�Ϯ��ݣ��д ��ȨͶ�߱��оٱ��һ��ʵ��ɾ��Ȩ��ݡ�

��飺 ��4�¸��ս��30�졿��Ľ��쳣��Ҫ�Ժ��ڲ�ͬ��Ӧ�ã��թ��簲ȫ��¸��ֳ��쳣��㷨��ͳ�ơ��롢�ܶȺ�ģ�͵ķ��Pythonʵ��У�ʹ��scikit-learn��չʾ��ʵ��Щ�㷨��̬�ֲ��ϡ�K-means��ࡢ�ֲ��쳣��(LOF)�͹��ɭ��(Isolation Forest)��ͨ��ܶȡ��롢LOFֵ��ݵ��ƽ��·��ʶ��쳣ֵ��

�ڻ��ѧϰ�Ĺ��У��쳣��⣨Outlier Detection��һ��Ҫ�ķ�֧��ʶ��ݼ��ͬ�Ĺ۲�ֵ��Щ�쳣ֵ��ɲ����¼��ĳ��δ֪��̲��ġ��쳣��թ��⡢ҽ��ϡ��簲ȫ��򷢻��Źؼ��á��Ľ��ܼ��ֳ��쳣��㷨��ͨ��Pythonʵ��Щ�㷨��չʾ��ʵ��е�Ӧ�á�

һ��쳣��㷨��

�쳣��㷨ͨ��Է�Ϊ��ͳ�Ƶķ��ھ��ķ��ܶȵķ��ͻ��ģ�͵ķ��ȼ��ࡣ��¼�Ҫ��ܼ��ֳ��쳣��㷨��

��ͳ�Ƶķ��ͨ��ݷ��ĳ�ָ��ʷֲ��̬�ֲ��ݵ�ĸ��ܶȻ��ۻ��ֲ��ֵ��ĳ��ֵ�ĵ��Ϊ�쳣ֵ��
��ھ��ķ��K-means��㷨��ͨ��ݵ㵽��ĵľ��ж��ݵ��Ƿ�Ϊ�쳣ֵ��Զ�ĵ��ܱ��Ϊ�쳣ֵ��
��ܶȵķ��ֲ��쳣��ӣ�Local Outlier Factor, LOF��㷨��ͨ��ݵ��ھӵľֲ��ܶȱ�ֵ��ж��ݵ��Ƿ�Ϊ�쳣ֵ��LOFֵ�ϴ�ĵ��ܱ��Ϊ�쳣ֵ��
��ģ�͵ķ��ɭ�֣�Isolation Forest��㷨��ͨ��ݵ㣬��ݵ��ƽ��·��ж��Ƿ�Ϊ�쳣ֵ��·��϶̵ĵ��ܱ��Ϊ�쳣ֵ��
��쳣��㷨��Python�е�ʵ��

��ǽ�ʹ��Python��scikit-learn��ʵ��쳣��㷨��Ӧ��һ��򵥵��ݼ��

��ͳ�Ƶķ��
��̬�ֲ�Ϊ��ǿ��ʹ��scipy��е��̬�ֲ��ݣ��ÿ��ݵ�ĸ��ܶ�ֵ��Ȼ��ǿ��һ��ֵ��ܶ�ֵ��ڸ��ֵ��ݵ��Ϊ�쳣ֵ��

python
import numpy as np
from scipy.stats import norm

��ݷ��̬�ֲ�

data = np.random.normal(0, 1, 1000)
data = np.append(data, [5, -5]) # ��쳣ֵ

��̬�ֲ�

mu, std = norm.fit(data)

��ÿ��ݵ�ĸ��ܶ�ֵ

pdf_values = norm.pdf(data, mu, std)

��ֵ��ʶ��쳣ֵ

threshold = 0.01
outliers = data[pdf_values < threshold]
print(f"�쳣ֵ: {outliers}")
��ھ��ķ��K-meansΪ��
ʹ��scikit-learn�е�KMeans��K-means��࣬��ÿ��ݵ㵽��ĵľ��롣Ȼ��ǿ��һ��ֵ��ϴ��ݵ��Ϊ�쳣ֵ��

python
from sklearn.cluster import KMeans

ʹ��K-means��

kmeans = KMeans(n_clusters=3, randomstate=0).fit(data.reshape(-1, 1))
distances = np.sort(kmeans.transform(data.reshape(-1, 1)), axis=0)
distances = distances[:, kmeans.labels]

��ֵ��ʶ��쳣ֵ

threshold = np.mean(distances) + 2 * np.std(distances)
outliers = data[distances > threshold]
print(f"�쳣ֵ: {outliers}")
��ܶȵķ��LOFΪ��
scikit-learn��local_outlier_factor��ṩ��LOF�㷨��ʵ�֡��ǿ��ֱ��ʹ�ø÷��ÿ��ݵ��LOFֵ��ֵ��ʶ��쳣ֵ��

python
from sklearn.neighbors import LocalOutlierFactor

��LOFֵ

lof = LocalOutlierFactor(n_neighbors=20, contamination=0.1)
y_pred = lof.fit_predict(data.reshape(-1, 1))

��ֵ��ʶ��쳣ֵ

����ǽ�LOFֵС��-1�ĵ��Ϊ�쳣ֵ��Ϊ��LOFֵͨ��ӽ�1��

outliers = data[y_pred == -1]
print(f"�쳣ֵ: {outliers}")
��ģ�͵ķ��Թ��ɭ��Ϊ��
scikit-learn��IsolationForest��ṩ�˹��ɭ��㷨��ʵ�֡��ǿ��ֱ��ʹ�ø÷��쳣ֵ��

python
from sklearn.ensemble import IsolationForest

ʹ�ù��ɭ�ּ��쳣ֵ

iforest

��Python��ѧϰר��쳣��㷨��Python�е�ʵ��

��ݷ��̬�ֲ�

��̬�ֲ�

��ÿ��ݵ�ĸ��ܶ�ֵ

��ֵ��ʶ��쳣ֵ

ʹ��K-means��

��ֵ��ʶ��쳣ֵ

��LOFֵ

��ֵ��ʶ��쳣ֵ

����ǽ�LOFֵС��-1�ĵ��Ϊ�쳣ֵ��Ϊ��LOFֵͨ��ӽ�1��

ʹ�ù��ɭ�ּ��쳣ֵ

��

��

��ؿγ�

��ص��

��ʵ�鳡��

��Python����ѧϰר�����쳣����㷨��Python�е�ʵ��

�������ݷ�����̬�ֲ�

�����̬�ֲ�

����ÿ�����ݵ�ĸ����ܶ�ֵ

������ֵ��ʶ���쳣ֵ

ʹ��K-means����

������ֵ��ʶ���쳣ֵ

����LOFֵ

������ֵ��ʶ���쳣ֵ

��������ǽ�LOFֵС��-1�ĵ���Ϊ�쳣ֵ����Ϊ�������LOFֵͨ���ӽ�1��

ʹ�ù���ɭ�ּ���쳣ֵ

��������

��������

��ؿγ�

��ص�����

���ʵ�鳡��

��Python��ѧϰר��쳣��㷨��Python�е�ʵ��

��ݷ��̬�ֲ�

��̬�ֲ�

��ÿ��ݵ�ĸ��ܶ�ֵ

��ֵ��ʶ��쳣ֵ

ʹ��K-means��

��ֵ��ʶ��쳣ֵ

��LOFֵ

��ֵ��ʶ��쳣ֵ

����ǽ�LOFֵС��-1�ĵ��Ϊ�쳣ֵ��Ϊ��LOFֵͨ��ӽ�1��

ʹ�ù��ɭ�ּ��쳣ֵ

��

��

��ص��

��ʵ�鳡��