项目7:多表数据合并与综合指标计算

学习如何合并多个数据表,并计算综合指标。

数据预览

订单表

order_id user_id product_id price quantity order_date
1101201100.022023-01-01
210220250.012023-01-02
3101203150.012023-01-03
4103201100.032023-01-04
510220250.022023-01-05

代码编辑器

参考答案

import pandas as pd
import numpy as np

# 读取订单数据
order_data = '''order_id,user_id,product_id,price,quantity,order_date
1,101,201,100.0,2,2023-01-01
2,102,202,50.0,1,2023-01-02
3,101,203,150.0,1,2023-01-03
4,103,201,100.0,3,2023-01-04
5,102,202,50.0,2,2023-01-05
'''

# 读取用户数据
user_data = '''user_id,username,age,gender
101,Alice,25,Female
102,Bob,30,Male
103,Charlie,35,Male
'''

# 读取产品数据
product_data = '''product_id,product_name,category
201,iPhone,Electronics
202,MacBook,Electronics
203,iPad,Electronics
'''

# 转换为DataFrame
from io import StringIO
orders = pd.read_csv(StringIO(order_data))
users = pd.read_csv(StringIO(user_data))
products = pd.read_csv(StringIO(product_data))

print("订单数据:")
print(orders)
print("\n用户数据:")
print(users)
print("\n产品数据:")
print(products)

# 合并订单和用户数据
order_user = pd.merge(orders, users, on='user_id', how='left')
print("\n订单-用户合并数据:")
print(order_user)

# 合并订单-用户和产品数据
order_user_product = pd.merge(order_user, products, on='product_id', how='left')
print("\n订单-用户-产品合并数据:")
print(order_user_product)

# 计算总销售额
order_user_product['total_amount'] = order_user_product['price'] * order_user_product['quantity']
print("\n添加总销售额后的数据:")
print(order_user_product)

# 按用户分组计算综合指标
user_metrics = order_user_product.groupby('user_id').agg({
    'order_id': 'count',
    'quantity': 'sum',
    'total_amount': 'sum',
    'price': 'mean'
}).rename(columns={
    'order_id': 'order_count',
    'price': 'avg_price'
})
print("\n用户综合指标:")
print(user_metrics)

# 按产品分组计算综合指标
product_metrics = order_user_product.groupby('product_name').agg({
    'order_id': 'count',
    'quantity': 'sum',
    'total_amount': 'sum'
}).rename(columns={
    'order_id': 'order_count'
})
print("\n产品综合指标:")
print(product_metrics)

# 按日期分组计算销售额
order_user_product['order_date'] = pd.to_datetime(order_user_product['order_date'])
daily_sales = order_user_product.groupby('order_date')['total_amount'].sum()
print("\n每日销售额:")
print(daily_sales)

# 计算每个用户的平均订单金额
user_metrics['avg_order_amount'] = user_metrics['total_amount'] / user_metrics['order_count']
print("\n用户平均订单金额:")
print(user_metrics[['order_count', 'total_amount', 'avg_order_amount']])

# 按性别分组计算消费情况
gender_metrics = order_user_product.groupby('gender').agg({
    'order_id': 'count',
    'quantity': 'sum',
    'total_amount': 'sum'
}).rename(columns={
    'order_id': 'order_count'
})
print("\n性别消费统计:")
print(gender_metrics)

# 计算产品的平均销售价格
product_metrics['avg_selling_price'] = product_metrics['total_amount'] / product_metrics['quantity']
print("\n产品平均销售价格:")
print(product_metrics[['order_count', 'quantity', 'total_amount', 'avg_selling_price']])
返回主页