分析商品销量的时间趋势和周期性,预测未来销量。
| date | product_id | sales | quantity |
|---|---|---|---|
| 2023-01-01 | 101 | 1000 | 20 |
| 2023-01-02 | 101 | 1200 | 24 |
| 2023-01-03 | 101 | 900 | 18 |
| 2023-01-04 | 101 | 1100 | 22 |
| 2023-01-05 | 101 | 1300 | 26 |
| 2023-01-06 | 101 | 1500 | 30 |
| 2023-01-07 | 101 | 1200 | 24 |
| 2023-01-08 | 101 | 1100 | 22 |
| 2023-01-09 | 101 | 1400 | 28 |
| 2023-01-10 | 101 | 1600 | 32 |
import pandas as pd
import numpy as np
# 读取数据
data = '''date,product_id,sales,quantity
2023-01-01,101,1000,20
2023-01-02,101,1200,24
2023-01-03,101,900,18
2023-01-04,101,1100,22
2023-01-05,101,1300,26
2023-01-06,101,1500,30
2023-01-07,101,1200,24
2023-01-08,101,1100,22
2023-01-09,101,1400,28
2023-01-10,101,1600,32
'''
# 转换为DataFrame
from io import StringIO
df = pd.read_csv(StringIO(data))
# 转换日期列
df['date'] = pd.to_datetime(df['date'])
# 设置日期为索引
df.set_index('date', inplace=True)
print("原始数据:")
print(df)
print("\n数据基本信息:")
print(df.info())
print("\n数据描述性统计:")
print(df.describe())
# 分析销量趋势
print("\n销量趋势分析:")
print("平均日销量:", df['sales'].mean())
print("销量最大值:", df['sales'].max())
print("销量最小值:", df['sales'].min())
print("销量标准差:", df['sales'].std())
# 计算每日销量增长率
df['sales_growth'] = df['sales'].pct_change() * 100
print("\n每日销量增长率:")
print(df['sales_growth'])
# 计算移动平均值
df['sales_ma3'] = df['sales'].rolling(window=3).mean()
df['sales_ma5'] = df['sales'].rolling(window=5).mean()
print("\n3日移动平均销量:")
print(df['sales_ma3'])
print("\n5日移动平均销量:")
print(df['sales_ma5'])
# 分析销量周期性(假设数据有周期性)
print("\n销量周期性分析:")
print("按周几分析销量:")
df['day_of_week'] = df.index.day_name()
day_of_week_sales = df.groupby('day_of_week')['sales'].mean()
print(day_of_week_sales)
# 简单线性回归预测
def linear_regression(x, y):
n = len(x)
slope = (n * np.sum(x*y) - np.sum(x) * np.sum(y)) / (n * np.sum(x**2) - np.sum(x)**2)
intercept = (np.sum(y) - slope * np.sum(x)) / n
return slope, intercept
# 准备数据
x = np.arange(len(df))
y = df['sales'].values
# 计算回归系数
slope, intercept = linear_regression(x, y)
print("\n线性回归结果:")
print(f"斜率: {slope:.2f}")
print(f"截距: {intercept:.2f}")
# 预测未来3天销量
future_days = 3
future_x = np.arange(len(df), len(df) + future_days)
future_sales = slope * future_x + intercept
print("\n未来3天销量预测:")
for i, sales in enumerate(future_sales):
future_date = df.index[-1] + pd.Timedelta(days=i+1)
print(f"{future_date.date()}: {sales:.0f}")
# 计算销量与数量的关系
print("\n销量与数量的关系:")
correlation = df['sales'].corr(df['quantity'])
print(f"相关系数: {correlation:.2f}")
# 计算每日平均单价
df['avg_price'] = df['sales'] / df['quantity']
print("\n每日平均单价:")
print(df['avg_price'])