huice/large_orders/analyze_total_orders_extended.py
Your Name 7f4f88e853 feat: 添加期货数据播放器及相关测试和文档
新增期货数据动态播放器功能,包括基础版和增强版实现,添加测试脚本和详细文档说明。主要变更包括:
1. 实现买卖盘深度可视化播放功能
2. 添加播放控制、速度调节和跳转功能
3. 提供统一价格轴显示优化版本
4. 添加测试脚本验证功能
5. 编写详细使用文档和README说明
2025-11-02 23:57:10 +08:00

347 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import os
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
def analyze_total_orders_extended():
"""分析买挂合计和卖挂合计大于200的数据点后不同长度的成交价走势"""
print("正在读取数据文件...")
# 读取数据从上级目录的data文件夹
df = pd.read_parquet('data/au2512_20251013.parquet')
print(f"数据总行数: {len(df)}")
print(f"数据列名: {df.columns.tolist()}")
# 查找买1-5量和卖1-5量列的实际名称
buy_vol_cols = []
sell_vol_cols = []
for col in df.columns:
if '' in str(col) and '' in str(col):
buy_vol_cols.append(col)
if '' in str(col) and '' in str(col):
sell_vol_cols.append(col)
print(f"找到买量列: {buy_vol_cols}")
print(f"找到卖量列: {sell_vol_cols}")
if len(buy_vol_cols) < 5:
print("警告: 未找到完整的买1-5量列")
return
if len(sell_vol_cols) < 5:
print("警告: 未找到完整的卖1-5量列")
return
# 获取成交价列名
price_col = None
for col in df.columns:
if '成交价' in str(col) or 'price' in str(col).lower():
price_col = col
break
if price_col is None:
print("未找到成交价列")
return
print(f"使用成交价列: {price_col}")
# 计算买挂合计和卖挂合计
print("正在计算买挂合计和卖挂合计...")
df['买挂合计'] = df[buy_vol_cols].sum(axis=1)
df['卖挂合计'] = df[sell_vol_cols].sum(axis=1)
# 筛选大额买挂和卖挂订单
large_buy_mask = df['买挂合计'] > 200
large_sell_mask = df['卖挂合计'] > 200
large_buy_indices = df[large_buy_mask].index.tolist()
large_sell_indices = df[large_sell_mask].index.tolist()
print(f"找到买挂合计大于200的数据点数量: {len(large_buy_indices)}")
print(f"找到卖挂合计大于200的数据点数量: {len(large_sell_indices)}")
# 显示一些统计信息
if len(large_buy_indices) > 0:
buy_total_volumes = df.loc[large_buy_indices, '买挂合计']
print(f"买挂合计统计: 最小={buy_total_volumes.min():.0f}, 最大={buy_total_volumes.max():.0f}, 平均={buy_total_volumes.mean():.0f}")
if len(large_sell_indices) > 0:
sell_total_volumes = df.loc[large_sell_indices, '卖挂合计']
print(f"卖挂合计统计: 最小={sell_total_volumes.min():.0f}, 最大={sell_total_volumes.max():.0f}, 平均={sell_total_volumes.mean():.0f}")
# 提取价格序列的函数
def extract_price_sequences(indices, order_type, max_points):
sequences = []
sequence_info = []
for idx in indices:
remaining_points = len(df) - idx - 1
take_points = min(max_points, remaining_points)
if take_points > 0:
base_price = df.loc[idx, price_col]
future_prices = df.loc[idx + 1: idx + take_points, price_col].values
price_changes = future_prices - base_price
sequences.append(price_changes)
volume_col = '买挂合计' if order_type == 'buy' else '卖挂合计'
sequence_info.append({
'start_index': idx,
'volume': df.loc[idx, volume_col],
'base_price': base_price,
'sequence_length': take_points
})
return sequences, sequence_info
# 分析不同时间长度的数据
analysis_lengths = [100, 200, 500] # 100, 200, 500个数据点
for length in analysis_lengths:
print(f"\n{'='*60}")
print(f"分析后{length}个数据点的价格走势")
print(f"{'='*60}")
# 提取买挂和卖挂的价格序列
buy_sequences, buy_info = extract_price_sequences(large_buy_indices, 'buy', length)
sell_sequences, sell_info = extract_price_sequences(large_sell_indices, 'sell', length)
print(f"成功提取 {len(buy_sequences)} 个买挂价格序列 (最大长度: {length})")
print(f"成功提取 {len(sell_sequences)} 个卖挂价格序列 (最大长度: {length})")
# 创建综合对比图表
fig, axes = plt.subplots(2, 2, figsize=(20, 16))
fig.suptitle(f'大额订单对比分析:买挂合计>200 vs 卖挂合计>200 (后{length}个数据点)',
fontsize=16, fontweight='bold')
# 1. 买挂价格变化图
ax1 = axes[0, 0]
if buy_sequences:
colors_buy = plt.cm.Blues(np.linspace(0.3, 0.9, len(buy_sequences)))
for i, (sequence, info) in enumerate(zip(buy_sequences, buy_info)):
x_axis = range(len(sequence))
ax1.plot(x_axis, sequence, color=colors_buy[i], alpha=0.6, linewidth=0.8)
ax1.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
ax1.set_xlabel('数据点序号')
ax1.set_ylabel('相对价格变化')
ax1.set_title(f'买挂合计>200的价格变化走势 (后{length}点)\n{len(buy_sequences)}个序列')
ax1.grid(True, alpha=0.3)
ax1.text(0.02, 0.98, f'序列数: {len(buy_sequences)}', transform=ax1.transAxes,
verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.8))
# 2. 卖挂价格变化图
ax2 = axes[0, 1]
if sell_sequences:
colors_sell = plt.cm.Reds(np.linspace(0.3, 0.9, len(sell_sequences)))
for i, (sequence, info) in enumerate(zip(sell_sequences, sell_info)):
x_axis = range(len(sequence))
ax2.plot(x_axis, sequence, color=colors_sell[i], alpha=0.6, linewidth=0.8)
ax2.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
ax2.set_xlabel('数据点序号')
ax2.set_ylabel('相对价格变化')
ax2.set_title(f'卖挂合计>200的价格变化走势 (后{length}点)\n{len(sell_sequences)}个序列')
ax2.grid(True, alpha=0.3)
ax2.text(0.02, 0.98, f'序列数: {len(sell_sequences)}', transform=ax2.transAxes,
verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightcoral', alpha=0.8))
# 3. 平均变化对比图
ax3 = axes[1, 0]
def calculate_avg_changes(sequences):
if not sequences:
return []
max_len = max(len(seq) for seq in sequences)
avg_changes = []
for i in range(max_len):
point_changes = [seq[i] for seq in sequences if len(seq) > i]
if point_changes:
avg_changes.append(np.mean(point_changes))
return avg_changes
buy_avg_changes = calculate_avg_changes(buy_sequences)
sell_avg_changes = calculate_avg_changes(sell_sequences)
if buy_avg_changes:
ax3.plot(range(len(buy_avg_changes)), buy_avg_changes,
color='blue', linewidth=2.5, label=f'买挂合计>200 (n={len(buy_sequences)})')
if sell_avg_changes:
ax3.plot(range(len(sell_avg_changes)), sell_avg_changes,
color='red', linewidth=2.5, label=f'卖挂合计>200 (n={len(sell_sequences)})')
ax3.axhline(y=0, color='black', linestyle='--', alpha=0.7, linewidth=1.5)
ax3.set_xlabel('数据点序号')
ax3.set_ylabel('平均相对价格变化')
ax3.set_title(f'平均价格变化对比 (后{length}点)')
ax3.legend(fontsize=12)
ax3.grid(True, alpha=0.3)
# 4. 统计信息文本框
ax4 = axes[1, 1]
ax4.axis('off')
# 计算统计信息
def calculate_stats(sequences, name):
if not sequences:
return {}
final_changes = [seq[-1] for seq in sequences if len(seq) > 0]
if final_changes:
return {
'name': name,
'count': len(sequences),
'avg_final_change': np.mean(final_changes),
'std_final_change': np.std(final_changes),
'max_rise': np.max(final_changes),
'max_fall': np.min(final_changes)
}
return {}
buy_stats = calculate_stats(buy_sequences, '买挂合计>200')
sell_stats = calculate_stats(sell_sequences, '卖挂合计>200')
# 显示统计信息
stats_text = f"=== 统计信息对比 (后{length}点) ===\n\n"
if buy_stats:
stats_text += f"【买挂合计>200】\n"
stats_text += f"序列数量: {buy_stats['count']}\n"
stats_text += f"平均最终变化: {buy_stats['avg_final_change']:.4f}\n"
stats_text += f"变化标准差: {buy_stats['std_final_change']:.4f}\n"
stats_text += f"最大上涨: {buy_stats['max_rise']:.4f}\n"
stats_text += f"最大下跌: {buy_stats['max_fall']:.4f}\n\n"
if sell_stats:
stats_text += f"【卖挂合计>200】\n"
stats_text += f"序列数量: {sell_stats['count']}\n"
stats_text += f"平均最终变化: {sell_stats['avg_final_change']:.4f}\n"
stats_text += f"变化标准差: {sell_stats['std_final_change']:.4f}\n"
stats_text += f"最大上涨: {sell_stats['max_rise']:.4f}\n"
stats_text += f"最大下跌: {sell_stats['max_fall']:.4f}\n\n"
# 添加关键时间点对比
if buy_avg_changes and sell_avg_changes:
stats_text += f"=== 关键时间点对比 (后{length}点) ===\n"
# 根据数据长度选择关键点
if length >= 500:
points_to_check = [49, 199, 499] # 第50、200、500点
point_names = ['第50点', '第200点', '第500点']
elif length >= 200:
points_to_check = [49, 199] # 第50、200点
point_names = ['第50点', '第200点']
else:
points_to_check = [49] # 第50点
point_names = ['第50点']
for i, point in enumerate(points_to_check):
if point < len(buy_avg_changes) and point < len(sell_avg_changes):
stats_text += f"{point_names[i]}: "
stats_text += f"买挂={buy_avg_changes[point]:.4f}, "
stats_text += f"卖挂={sell_avg_changes[point]:.4f}, "
stats_text += f"差值={buy_avg_changes[point] - sell_avg_changes[point]:.4f}\n"
ax4.text(0.05, 0.95, stats_text, transform=ax4.transAxes, fontsize=11,
verticalalignment='top',
bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8))
plt.tight_layout()
# 保存综合图表
output_file = f'total_orders_comprehensive_analysis_{length}points.png'
plt.savefig(output_file, dpi=300, bbox_inches='tight')
print(f"\n{length}点综合分析图表已保存为: {output_file}")
print(f"完整路径: {os.path.abspath(output_file)}")
# 保存单独的买挂和卖挂图表
# 买挂单独图表
fig_buy, ax_buy = plt.subplots(figsize=(15, 10))
if buy_sequences:
colors_buy = plt.cm.Blues(np.linspace(0.3, 0.9, len(buy_sequences)))
for i, (sequence, info) in enumerate(zip(buy_sequences, buy_info)):
x_axis = range(len(sequence))
ax_buy.plot(x_axis, sequence, color=colors_buy[i], alpha=0.6, linewidth=1)
ax_buy.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
ax_buy.set_xlabel(f'数据点序号 (相对于大额买挂订单, 后{length}点)', fontsize=12)
ax_buy.set_ylabel('相对价格变化 (相对于基准点)', fontsize=12)
ax_buy.set_title(f'买挂合计>200的数据点后{length}个相对价格变化走势\n所有序列从基准点(0)开始\n{len(buy_sequences)}个序列',
fontsize=14, fontweight='bold')
ax_buy.grid(True, alpha=0.3)
plt.tight_layout()
buy_output_file = f'total_buy_relative_price_changes_{length}points.png'
plt.savefig(buy_output_file, dpi=300, bbox_inches='tight')
print(f"买挂{length}点分析图表已保存为: {buy_output_file}")
plt.close()
# 卖挂单独图表
fig_sell, ax_sell = plt.subplots(figsize=(15, 10))
if sell_sequences:
colors_sell = plt.cm.Reds(np.linspace(0.3, 0.9, len(sell_sequences)))
for i, (sequence, info) in enumerate(zip(sell_sequences, sell_info)):
x_axis = range(len(sequence))
ax_sell.plot(x_axis, sequence, color=colors_sell[i], alpha=0.6, linewidth=1)
ax_sell.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
ax_sell.set_xlabel(f'数据点序号 (相对于大额卖挂订单, 后{length}点)', fontsize=12)
ax_sell.set_ylabel('相对价格变化 (相对于基准点)', fontsize=12)
ax_sell.set_title(f'卖挂合计>200的数据点后{length}个相对价格变化走势\n所有序列从基准点(0)开始\n{len(sell_sequences)}个序列',
fontsize=14, fontweight='bold')
ax_sell.grid(True, alpha=0.3)
plt.tight_layout()
sell_output_file = f'total_sell_relative_price_changes_{length}points.png'
plt.savefig(sell_output_file, dpi=300, bbox_inches='tight')
print(f"卖挂{length}点分析图表已保存为: {sell_output_file}")
plt.close()
# 显示统计信息
print(f"\n{'='*50}")
print(f"详细统计信息 (后{length}点):")
print(f"{'='*50}")
if buy_stats:
print(f"\n【买挂合计>200】")
print(f"序列数量: {buy_stats['count']}")
print(f"平均最终变化: {buy_stats['avg_final_change']:.4f}")
print(f"变化标准差: {buy_stats['std_final_change']:.4f}")
print(f"最大上涨: {buy_stats['max_rise']:.4f}")
print(f"最大下跌: {buy_stats['max_fall']:.4f}")
if sell_stats:
print(f"\n【卖挂合计>200】")
print(f"序列数量: {sell_stats['count']}")
print(f"平均最终变化: {sell_stats['avg_final_change']:.4f}")
print(f"变化标准差: {sell_stats['std_final_change']:.4f}")
print(f"最大上涨: {sell_stats['max_rise']:.4f}")
print(f"最大下跌: {sell_stats['max_fall']:.4f}")
# 关键时间点分析
if buy_avg_changes and sell_avg_changes:
print(f"\n关键时间点对比:")
if length >= 500:
key_points = [(49, '第50点'), (199, '第200点'), (499, '第500点')]
elif length >= 200:
key_points = [(49, '第50点'), (199, '第200点')]
else:
key_points = [(49, '第50点')]
for point, name in key_points:
if point < len(buy_avg_changes) and point < len(sell_avg_changes):
diff = buy_avg_changes[point] - sell_avg_changes[point]
print(f"{name}: 买挂={buy_avg_changes[point]:.4f}, 卖挂={sell_avg_changes[point]:.4f}, 差值={diff:.4f}")
plt.close('all') # 关闭所有图形以释放内存
print(f"\n{'='*60}")
print("所有分析完成!")
print(f"{'='*60}")
if __name__ == "__main__":
analyze_total_orders_extended()