分析用户消费行为,包括消费频次、消费金额等指标的统计分析。
| user_id | order_date | amount | freq | avg_amount |
|---|---|---|---|---|
| 1001 | 2023-01-01 | 200.0 | 2 | 100.0 |
| 1002 | 2023-01-02 | 50.0 | 1 | 50.0 |
| 1003 | 2023-01-03 | 200.0 | 1 | 200.0 |
| 1001 | 2023-01-04 | 225.0 | 3 | 75.0 |
| 1002 | 2023-01-05 | 240.0 | 2 | 120.0 |
| 1003 | 2023-01-06 | 150.0 | 5 | 30.0 |
| 1001 | 2023-01-07 | 150.0 | 1 | 150.0 |
| 1002 | 2023-01-08 | 120.0 | 2 | 60.0 |
| 1003 | 2023-01-09 | 180.0 | 1 | 180.0 |
| 1001 | 2023-01-10 | 180.0 | 2 | 90.0 |
import pandas as pd
import numpy as np
# 读取数据
data = '''user_id,order_date,amount,freq,avg_amount
1001,2023-01-01,200.0,2,100.0
1002,2023-01-02,50.0,1,50.0
1003,2023-01-03,200.0,1,200.0
1001,2023-01-04,225.0,3,75.0
1002,2023-01-05,240.0,2,120.0
1003,2023-01-06,150.0,5,30.0
1001,2023-01-07,150.0,1,150.0
1002,2023-01-08,120.0,2,60.0
1003,2023-01-09,180.0,1,180.0
1001,2023-01-10,180.0,2,90.0
'''
# 转换为DataFrame
from io import StringIO
df = pd.read_csv(StringIO(data))
print("原始数据:")
print(df)
print("\n数据基本信息:")
print(df.info())
print("\n数据描述性统计:")
print(df.describe())
# 按用户分组分析
print("\n按用户分组分析:")
user_analysis = df.groupby('user_id').agg({
'amount': ['sum', 'mean', 'max', 'min'],
'freq': ['sum', 'mean'],
'avg_amount': 'mean'
})
print(user_analysis)
# 计算用户总消费金额和总消费频次
df['total_spent'] = df['amount']
df['total_freq'] = df['freq']
# 按日期分析消费趋势
print("\n按日期分析消费趋势:")
daily_analysis = df.groupby('order_date').agg({
'amount': 'sum',
'freq': 'sum',
'user_id': 'nunique'
}).rename(columns={'user_id': 'unique_users'})
print(daily_analysis)
# 计算用户消费等级
df['spending_level'] = pd.cut(df['amount'], bins=[0, 100, 200, float('inf')], labels=['低消费', '中消费', '高消费'])
print("\n用户消费等级分布:")
print(df['spending_level'].value_counts())
# 计算用户忠诚度指标(基于消费频次)
df['loyalty_score'] = df['freq'] * 0.5 + df['amount'] / 100
print("\n用户忠诚度评分:")
print(df[['user_id', 'loyalty_score']])
# 分析用户消费行为模式
print("\n用户消费行为模式分析:")
user_pattern = df.groupby('user_id').agg({
'amount': 'sum',
'freq': 'sum',
'order_date': 'count'
}).rename(columns={'order_date': 'purchase_count'})
user_pattern['avg_spent_per_purchase'] = user_pattern['amount'] / user_pattern['purchase_count']
user_pattern['avg_freq_per_purchase'] = user_pattern['freq'] / user_pattern['purchase_count']
print(user_pattern)
# 识别高价值用户
print("\n高价值用户识别:")
high_value_users = user_pattern[user_pattern['amount'] > user_pattern['amount'].quantile(0.75)]
print(high_value_users)