项目3:购物车(购物篮)分析

使用关联规则分析购物篮数据,发现商品之间的关联关系。

数据预览

order_id product_id product_name quantity price
1101牛奶250.0
1102面包130.0
1103鸡蛋120.0
2101牛奶150.0
2102面包230.0
3103鸡蛋220.0
3104黄油140.0
4101牛奶150.0
4103鸡蛋120.0
4104黄油140.0
5102面包130.0
5103鸡蛋120.0
6101牛奶150.0
6102面包130.0
6103鸡蛋120.0
6104黄油140.0

代码编辑器

参考答案

import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules

# 读取数据
data = '''order_id,product_id,product_name,quantity,price
1,101,牛奶,2,50.0
1,102,面包,1,30.0
1,103,鸡蛋,1,20.0
2,101,牛奶,1,50.0
2,102,面包,2,30.0
3,103,鸡蛋,2,20.0
3,104,黄油,1,40.0
4,101,牛奶,1,50.0
4,103,鸡蛋,1,20.0
4,104,黄油,1,40.0
5,102,面包,1,30.0
5,103,鸡蛋,1,20.0
6,101,牛奶,1,50.0
6,102,面包,1,30.0
6,103,鸡蛋,1,20.0
6,104,黄油,1,40.0
'''

# 转换为DataFrame
from io import StringIO
df = pd.read_csv(StringIO(data))

print("原始数据:")
print(df)

# 数据预处理:转换为购物篮格式
basket = df.groupby(['order_id', 'product_name'])['quantity'].sum().unstack().reset_index().fillna(0).set_index('order_id')

# 转换为0-1编码(是否购买)
basket_encoded = basket.applymap(lambda x: 1 if x > 0 else 0)
print("\n购物篮编码数据:")
print(basket_encoded)

# 使用Apriori算法发现频繁项集
frequent_itemsets = apriori(basket_encoded, min_support=0.3, use_colnames=True)
print("\n频繁项集:")
print(frequent_itemsets)

# 生成关联规则
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
print("\n关联规则:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

# 按lift值排序
rules_sorted = rules.sort_values('lift', ascending=False)
print("\n按Lift值排序的关联规则:")
print(rules_sorted[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head(10))

# 分析最常见的商品组合
print("\n最常见的商品组合:")
print(frequent_itemsets.sort_values('support', ascending=False).head(10))

# 分析支持度大于0.5的项集
print("\n支持度大于0.5的项集:")
print(frequent_itemsets[frequent_itemsets['support'] > 0.5])

# 分析置信度大于0.7的规则
print("\n置信度大于0.7的规则:")
print(rules[rules['confidence'] > 0.7][['antecedents', 'consequents', 'confidence']])
返回主页