项目5:商品销量时间序列分析

分析商品销量的时间趋势和周期性,预测未来销量。

数据预览

date product_id sales quantity
2023-01-01101100020
2023-01-02101120024
2023-01-0310190018
2023-01-04101110022
2023-01-05101130026
2023-01-06101150030
2023-01-07101120024
2023-01-08101110022
2023-01-09101140028
2023-01-10101160032

代码编辑器

参考答案

import pandas as pd
import numpy as np

# 读取数据
data = '''date,product_id,sales,quantity
2023-01-01,101,1000,20
2023-01-02,101,1200,24
2023-01-03,101,900,18
2023-01-04,101,1100,22
2023-01-05,101,1300,26
2023-01-06,101,1500,30
2023-01-07,101,1200,24
2023-01-08,101,1100,22
2023-01-09,101,1400,28
2023-01-10,101,1600,32
'''

# 转换为DataFrame
from io import StringIO
df = pd.read_csv(StringIO(data))

# 转换日期列
df['date'] = pd.to_datetime(df['date'])
# 设置日期为索引
df.set_index('date', inplace=True)

print("原始数据:")
print(df)
print("\n数据基本信息:")
print(df.info())
print("\n数据描述性统计:")
print(df.describe())

# 分析销量趋势
print("\n销量趋势分析:")
print("平均日销量:", df['sales'].mean())
print("销量最大值:", df['sales'].max())
print("销量最小值:", df['sales'].min())
print("销量标准差:", df['sales'].std())

# 计算每日销量增长率
df['sales_growth'] = df['sales'].pct_change() * 100
print("\n每日销量增长率:")
print(df['sales_growth'])

# 计算移动平均值
df['sales_ma3'] = df['sales'].rolling(window=3).mean()
df['sales_ma5'] = df['sales'].rolling(window=5).mean()
print("\n3日移动平均销量:")
print(df['sales_ma3'])
print("\n5日移动平均销量:")
print(df['sales_ma5'])

# 分析销量周期性(假设数据有周期性)
print("\n销量周期性分析:")
print("按周几分析销量:")
df['day_of_week'] = df.index.day_name()
day_of_week_sales = df.groupby('day_of_week')['sales'].mean()
print(day_of_week_sales)

# 简单线性回归预测
def linear_regression(x, y):
    n = len(x)
    slope = (n * np.sum(x*y) - np.sum(x) * np.sum(y)) / (n * np.sum(x**2) - np.sum(x)**2)
    intercept = (np.sum(y) - slope * np.sum(x)) / n
    return slope, intercept

# 准备数据
x = np.arange(len(df))
y = df['sales'].values

# 计算回归系数
slope, intercept = linear_regression(x, y)
print("\n线性回归结果:")
print(f"斜率: {slope:.2f}")
print(f"截距: {intercept:.2f}")

# 预测未来3天销量
future_days = 3
future_x = np.arange(len(df), len(df) + future_days)
future_sales = slope * future_x + intercept
print("\n未来3天销量预测:")
for i, sales in enumerate(future_sales):
    future_date = df.index[-1] + pd.Timedelta(days=i+1)
    print(f"{future_date.date()}: {sales:.0f}")

# 计算销量与数量的关系
print("\n销量与数量的关系:")
correlation = df['sales'].corr(df['quantity'])
print(f"相关系数: {correlation:.2f}")

# 计算每日平均单价
df['avg_price'] = df['sales'] / df['quantity']
print("\n每日平均单价:")
print(df['avg_price'])
返回主页