项目10:电商数据综合分析实战

综合运用多种分析方法,对电商数据进行全面分析。

数据预览

user_id order_id amount order_date payment_method
1001200110002023-01-01Credit Card
100220025002023-01-02PayPal
1003200315002023-01-03Credit Card
100120048002023-01-04PayPal
1002200512002023-01-05Credit Card
100320066002023-01-06Credit Card
100120079002023-01-07PayPal
100220087002023-01-08Credit Card
1003200911002023-01-09PayPal
1001201013002023-01-10Credit Card

代码编辑器

参考答案

import pandas as pd
import numpy as np

# 读取数据
data = '''user_id,order_id,amount,order_date,payment_method
1001,2001,1000,2023-01-01,Credit Card
1002,2002,500,2023-01-02,PayPal
1003,2003,1500,2023-01-03,Credit Card
1001,2004,800,2023-01-04,PayPal
1002,2005,1200,2023-01-05,Credit Card
1003,2006,600,2023-01-06,Credit Card
1001,2007,900,2023-01-07,PayPal
1002,2008,700,2023-01-08,Credit Card
1003,2009,1100,2023-01-09,PayPal
1001,2010,1300,2023-01-10,Credit Card
'''

# 转换为DataFrame
from io import StringIO
df = pd.read_csv(StringIO(data))

# 转换日期列
df['order_date'] = pd.to_datetime(df['order_date'])

print("原始数据:")
print(df)
print("\n数据基本信息:")
print(df.info())
print("\n数据描述性统计:")
print(df.describe())

# 1. 用户消费分析
print("\n1. 用户消费分析:")
user_analysis = df.groupby('user_id').agg({
    'order_id': 'count',
    'amount': ['sum', 'mean', 'max', 'min']
}).rename(columns={
    'order_id': 'order_count'
})
print(user_analysis)

# 2. 日期分析
print("\n2. 日期分析:")
daily_analysis = df.groupby('order_date').agg({
    'order_id': 'count',
    'amount': 'sum',
    'user_id': 'nunique'
}).rename(columns={
    'order_id': 'order_count',
    'user_id': 'unique_users'
})
print(daily_analysis)

# 3. 支付方式分析
print("\n3. 支付方式分析:")
payment_analysis = df.groupby('payment_method').agg({
    'order_id': 'count',
    'amount': ['sum', 'mean']
}).rename(columns={
    'order_id': 'order_count'
})
print(payment_analysis)

# 4. 消费趋势分析
print("\n4. 消费趋势分析:")
df['month'] = df['order_date'].dt.month
df['day'] = df['order_date'].dt.day
df['day_of_week'] = df['order_date'].dt.day_name()

# 按天分析
day_trend = df.groupby('day')['amount'].sum()
print("按天消费趋势:")
print(day_trend)

# 按星期几分析
weekday_trend = df.groupby('day_of_week')['amount'].sum()
print("\n按星期几消费趋势:")
print(weekday_trend)

# 5. 用户价值分析
print("\n5. 用户价值分析:")
user_value = df.groupby('user_id').agg({
    'amount': 'sum',
    'order_id': 'count'
}).rename(columns={
    'amount': 'total_spent',
    'order_id': 'order_count'
})
user_value['avg_order_value'] = user_value['total_spent'] / user_value['order_count']
print(user_value)

# 6. 综合指标计算
print("\n6. 综合指标计算:")
total_orders = len(df)
total_revenue = df['amount'].sum()
average_order_value = df['amount'].mean()
total_users = df['user_id'].nunique()
average_user_spent = total_revenue / total_users

print(f"总订单数: {total_orders}")
print(f"总收入: {total_revenue}")
print(f"平均订单价值: {average_order_value:.2f}")
print(f"总用户数: {total_users}")
print(f"平均用户消费: {average_user_spent:.2f}")

# 7. 识别高价值用户
print("\n7. 高价值用户识别:")
high_value_users = user_value[user_value['total_spent'] > user_value['total_spent'].quantile(0.75)]
print("高价值用户:")
print(high_value_users)

# 8. 支付方式偏好分析
print("\n8. 支付方式偏好分析:")
user_payment_preference = df.groupby(['user_id', 'payment_method'])['order_id'].count().unstack(fill_value=0)
print(user_payment_preference)

# 9. 消费分布分析
print("\n9. 消费分布分析:")
df['amount_range'] = pd.cut(df['amount'], bins=[0, 500, 1000, 1500, 2000], labels=['低消费', '中消费', '高消费', '超高消费'])
amount_distribution = df['amount_range'].value_counts()
print("消费金额分布:")
print(amount_distribution)

# 10. 每日活跃用户分析
print("\n10. 每日活跃用户分析:")
daily_active_users = df.groupby('order_date')['user_id'].nunique()
print("每日活跃用户数:")
print(daily_active_users)
返回主页