基于RFM模型对用户进行价值分群,识别高价值用户。
| user_id | recency | frequency | monetary |
|---|---|---|---|
| 1001 | 1 | 10 | 1000 |
| 1002 | 7 | 5 | 500 |
| 1003 | 30 | 2 | 200 |
| 1004 | 2 | 8 | 800 |
| 1005 | 14 | 3 | 300 |
| 1006 | 5 | 7 | 700 |
| 1007 | 21 | 1 | 100 |
| 1008 | 3 | 9 | 900 |
| 1009 | 10 | 4 | 400 |
| 1010 | 45 | 1 | 50 |
import pandas as pd
import numpy as np
# 读取数据
data = '''user_id,recency,frequency,monetary
1001,1,10,1000
1002,7,5,500
1003,30,2,200
1004,2,8,800
1005,14,3,300
1006,5,7,700
1007,21,1,100
1008,3,9,900
1009,10,4,400
1010,45,1,50
'''
# 转换为DataFrame
from io import StringIO
df = pd.read_csv(StringIO(data))
print("原始数据:")
print(df)
print("\n数据基本信息:")
print(df.info())
print("\n数据描述性统计:")
print(df.describe())
# RFM分箱
# 计算R、F、M的分位数
df['r_score'] = pd.qcut(df['recency'], 4, labels=[4, 3, 2, 1]) # 越小越好
# 计算F的分位数
df['f_score'] = pd.qcut(df['frequency'], 4, labels=[1, 2, 3, 4]) # 越大越好
# 计算M的分位数
df['m_score'] = pd.qcut(df['monetary'], 4, labels=[1, 2, 3, 4]) # 越大越好
# 计算RFM总分
df['rfm_score'] = df['r_score'].astype(int) + df['f_score'].astype(int) + df['m_score'].astype(int)
# 用户分群
def rfm_segment(row):
if row['rfm_score'] >= 10:
return '高价值用户'
elif row['rfm_score'] >= 7:
return '中价值用户'
else:
return '低价值用户'
df['segment'] = df.apply(rfm_segment, axis=1)
print("\nRFM分析结果:")
print(df)
# 分群统计
print("\n分群统计:")
print(df['segment'].value_counts())
# 各分群的RFM均值
print("\n各分群的RFM均值:")
print(df.groupby('segment').agg({
'recency': 'mean',
'frequency': 'mean',
'monetary': 'mean',
'rfm_score': 'mean'
}))
# 按RFM总分排序
print("\n按RFM总分排序的用户:")
print(df.sort_values('rfm_score', ascending=False))
# 更详细的用户分群
def detailed_segment(row):
r = row['r_score'].astype(int)
f = row['f_score'].astype(int)
m = row['m_score'].astype(int)
if r >= 3 and f >= 3 and m >= 3:
return '超级用户'
elif r >= 3 and (f >= 2 or m >= 2):
return '忠诚用户'
elif (r >= 2 and f >= 2) or (r >= 2 and m >= 2) or (f >= 2 and m >= 2):
return '潜在用户'
else:
return '流失用户'
df['detailed_segment'] = df.apply(detailed_segment, axis=1)
print("\n详细分群结果:")
print(df[['user_id', 'r_score', 'f_score', 'm_score', 'rfm_score', 'detailed_segment']])
print("\n详细分群统计:")
print(df['detailed_segment'].value_counts())