综合运用多种分析方法,对电商数据进行全面分析。
| user_id | order_id | amount | order_date | payment_method |
|---|---|---|---|---|
| 1001 | 2001 | 1000 | 2023-01-01 | Credit Card |
| 1002 | 2002 | 500 | 2023-01-02 | PayPal |
| 1003 | 2003 | 1500 | 2023-01-03 | Credit Card |
| 1001 | 2004 | 800 | 2023-01-04 | PayPal |
| 1002 | 2005 | 1200 | 2023-01-05 | Credit Card |
| 1003 | 2006 | 600 | 2023-01-06 | Credit Card |
| 1001 | 2007 | 900 | 2023-01-07 | PayPal |
| 1002 | 2008 | 700 | 2023-01-08 | Credit Card |
| 1003 | 2009 | 1100 | 2023-01-09 | PayPal |
| 1001 | 2010 | 1300 | 2023-01-10 | Credit Card |
import pandas as pd
import numpy as np
# 读取数据
data = '''user_id,order_id,amount,order_date,payment_method
1001,2001,1000,2023-01-01,Credit Card
1002,2002,500,2023-01-02,PayPal
1003,2003,1500,2023-01-03,Credit Card
1001,2004,800,2023-01-04,PayPal
1002,2005,1200,2023-01-05,Credit Card
1003,2006,600,2023-01-06,Credit Card
1001,2007,900,2023-01-07,PayPal
1002,2008,700,2023-01-08,Credit Card
1003,2009,1100,2023-01-09,PayPal
1001,2010,1300,2023-01-10,Credit Card
'''
# 转换为DataFrame
from io import StringIO
df = pd.read_csv(StringIO(data))
# 转换日期列
df['order_date'] = pd.to_datetime(df['order_date'])
print("原始数据:")
print(df)
print("\n数据基本信息:")
print(df.info())
print("\n数据描述性统计:")
print(df.describe())
# 1. 用户消费分析
print("\n1. 用户消费分析:")
user_analysis = df.groupby('user_id').agg({
'order_id': 'count',
'amount': ['sum', 'mean', 'max', 'min']
}).rename(columns={
'order_id': 'order_count'
})
print(user_analysis)
# 2. 日期分析
print("\n2. 日期分析:")
daily_analysis = df.groupby('order_date').agg({
'order_id': 'count',
'amount': 'sum',
'user_id': 'nunique'
}).rename(columns={
'order_id': 'order_count',
'user_id': 'unique_users'
})
print(daily_analysis)
# 3. 支付方式分析
print("\n3. 支付方式分析:")
payment_analysis = df.groupby('payment_method').agg({
'order_id': 'count',
'amount': ['sum', 'mean']
}).rename(columns={
'order_id': 'order_count'
})
print(payment_analysis)
# 4. 消费趋势分析
print("\n4. 消费趋势分析:")
df['month'] = df['order_date'].dt.month
df['day'] = df['order_date'].dt.day
df['day_of_week'] = df['order_date'].dt.day_name()
# 按天分析
day_trend = df.groupby('day')['amount'].sum()
print("按天消费趋势:")
print(day_trend)
# 按星期几分析
weekday_trend = df.groupby('day_of_week')['amount'].sum()
print("\n按星期几消费趋势:")
print(weekday_trend)
# 5. 用户价值分析
print("\n5. 用户价值分析:")
user_value = df.groupby('user_id').agg({
'amount': 'sum',
'order_id': 'count'
}).rename(columns={
'amount': 'total_spent',
'order_id': 'order_count'
})
user_value['avg_order_value'] = user_value['total_spent'] / user_value['order_count']
print(user_value)
# 6. 综合指标计算
print("\n6. 综合指标计算:")
total_orders = len(df)
total_revenue = df['amount'].sum()
average_order_value = df['amount'].mean()
total_users = df['user_id'].nunique()
average_user_spent = total_revenue / total_users
print(f"总订单数: {total_orders}")
print(f"总收入: {total_revenue}")
print(f"平均订单价值: {average_order_value:.2f}")
print(f"总用户数: {total_users}")
print(f"平均用户消费: {average_user_spent:.2f}")
# 7. 识别高价值用户
print("\n7. 高价值用户识别:")
high_value_users = user_value[user_value['total_spent'] > user_value['total_spent'].quantile(0.75)]
print("高价值用户:")
print(high_value_users)
# 8. 支付方式偏好分析
print("\n8. 支付方式偏好分析:")
user_payment_preference = df.groupby(['user_id', 'payment_method'])['order_id'].count().unstack(fill_value=0)
print(user_payment_preference)
# 9. 消费分布分析
print("\n9. 消费分布分析:")
df['amount_range'] = pd.cut(df['amount'], bins=[0, 500, 1000, 1500, 2000], labels=['低消费', '中消费', '高消费', '超高消费'])
amount_distribution = df['amount_range'].value_counts()
print("消费金额分布:")
print(amount_distribution)
# 10. 每日活跃用户分析
print("\n10. 每日活跃用户分析:")
daily_active_users = df.groupby('order_date')['user_id'].nunique()
print("每日活跃用户数:")
print(daily_active_users)