项目2:用户消费行为基础分析

分析用户消费行为,包括消费频次、消费金额等指标的统计分析。

数据预览

user_id order_date amount freq avg_amount
10012023-01-01200.02100.0
10022023-01-0250.0150.0
10032023-01-03200.01200.0
10012023-01-04225.0375.0
10022023-01-05240.02120.0
10032023-01-06150.0530.0
10012023-01-07150.01150.0
10022023-01-08120.0260.0
10032023-01-09180.01180.0
10012023-01-10180.0290.0

代码编辑器

参考答案

import pandas as pd
import numpy as np

# 读取数据
data = '''user_id,order_date,amount,freq,avg_amount
1001,2023-01-01,200.0,2,100.0
1002,2023-01-02,50.0,1,50.0
1003,2023-01-03,200.0,1,200.0
1001,2023-01-04,225.0,3,75.0
1002,2023-01-05,240.0,2,120.0
1003,2023-01-06,150.0,5,30.0
1001,2023-01-07,150.0,1,150.0
1002,2023-01-08,120.0,2,60.0
1003,2023-01-09,180.0,1,180.0
1001,2023-01-10,180.0,2,90.0
'''

# 转换为DataFrame
from io import StringIO
df = pd.read_csv(StringIO(data))

print("原始数据:")
print(df)
print("\n数据基本信息:")
print(df.info())
print("\n数据描述性统计:")
print(df.describe())

# 按用户分组分析
print("\n按用户分组分析:")
user_analysis = df.groupby('user_id').agg({
    'amount': ['sum', 'mean', 'max', 'min'],
    'freq': ['sum', 'mean'],
    'avg_amount': 'mean'
})
print(user_analysis)

# 计算用户总消费金额和总消费频次
df['total_spent'] = df['amount']
df['total_freq'] = df['freq']

# 按日期分析消费趋势
print("\n按日期分析消费趋势:")
daily_analysis = df.groupby('order_date').agg({
    'amount': 'sum',
    'freq': 'sum',
    'user_id': 'nunique'
}).rename(columns={'user_id': 'unique_users'})
print(daily_analysis)

# 计算用户消费等级
df['spending_level'] = pd.cut(df['amount'], bins=[0, 100, 200, float('inf')], labels=['低消费', '中消费', '高消费'])
print("\n用户消费等级分布:")
print(df['spending_level'].value_counts())

# 计算用户忠诚度指标(基于消费频次)
df['loyalty_score'] = df['freq'] * 0.5 + df['amount'] / 100
print("\n用户忠诚度评分:")
print(df[['user_id', 'loyalty_score']])

# 分析用户消费行为模式
print("\n用户消费行为模式分析:")
user_pattern = df.groupby('user_id').agg({
    'amount': 'sum',
    'freq': 'sum',
    'order_date': 'count'
}).rename(columns={'order_date': 'purchase_count'})
user_pattern['avg_spent_per_purchase'] = user_pattern['amount'] / user_pattern['purchase_count']
user_pattern['avg_freq_per_purchase'] = user_pattern['freq'] / user_pattern['purchase_count']
print(user_pattern)

# 识别高价值用户
print("\n高价值用户识别:")
high_value_users = user_pattern[user_pattern['amount'] > user_pattern['amount'].quantile(0.75)]
print(high_value_users)
返回主页