huice/large_orders/analyze_large_orders_extended.py
Your Name 7f4f88e853 feat: 添加期货数据播放器及相关测试和文档
新增期货数据动态播放器功能,包括基础版和增强版实现,添加测试脚本和详细文档说明。主要变更包括:
1. 实现买卖盘深度可视化播放功能
2. 添加播放控制、速度调节和跳转功能
3. 提供统一价格轴显示优化版本
4. 添加测试脚本验证功能
5. 编写详细使用文档和README说明
2025-11-02 23:57:10 +08:00

333 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import os
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
def analyze_large_orders_extended():
"""分析买1量和卖1量大于99的数据点后不同长度的成交价走势"""
print("正在读取数据文件...")
# 读取数据从上级目录的data文件夹
df = pd.read_parquet('data/au2512_20251013.parquet')
print(f"数据总行数: {len(df)}")
print(f"数据列名: {df.columns.tolist()}")
# 查找买1量和卖1量列的实际名称
buy1_vol_col = None
sell1_vol_col = None
for col in df.columns:
if '买1量' in str(col) or 'buy1' in str(col).lower():
buy1_vol_col = col
if '卖1量' in str(col) or 'sell1' in str(col).lower():
sell1_vol_col = col
if buy1_vol_col is None:
print("未找到买1量列")
return
if sell1_vol_col is None:
print("未找到卖1量列")
return
print(f"使用买1量列: {buy1_vol_col}")
print(f"使用卖1量列: {sell1_vol_col}")
# 获取成交价列名
price_col = None
for col in df.columns:
if '成交价' in str(col) or 'price' in str(col).lower():
price_col = col
break
if price_col is None:
print("未找到成交价列")
return
print(f"使用成交价列: {price_col}")
# 筛选大额买1和卖1订单
large_buy1_mask = df[buy1_vol_col] > 99
large_sell1_mask = df[sell1_vol_col] > 99
large_buy1_indices = df[large_buy1_mask].index.tolist()
large_sell1_indices = df[large_sell1_mask].index.tolist()
print(f"找到买1量大于99的数据点数量: {len(large_buy1_indices)}")
print(f"找到卖1量大于99的数据点数量: {len(large_sell1_indices)}")
# 提取价格序列的函数
def extract_price_sequences(indices, order_type, max_points):
sequences = []
sequence_info = []
for idx in indices:
remaining_points = len(df) - idx - 1
take_points = min(max_points, remaining_points)
if take_points > 0:
base_price = df.loc[idx, price_col]
future_prices = df.loc[idx + 1: idx + take_points, price_col].values
price_changes = future_prices - base_price
sequences.append(price_changes)
volume_col = buy1_vol_col if order_type == 'buy' else sell1_vol_col
sequence_info.append({
'start_index': idx,
'volume': df.loc[idx, volume_col],
'base_price': base_price,
'sequence_length': take_points
})
return sequences, sequence_info
# 分析不同时间长度的数据
analysis_lengths = [100, 200, 500] # 100, 200, 500个数据点
for length in analysis_lengths:
print(f"\n{'='*60}")
print(f"分析后{length}个数据点的价格走势")
print(f"{'='*60}")
# 提取买1和卖1的价格序列
buy1_sequences, buy1_info = extract_price_sequences(large_buy1_indices, 'buy', length)
sell1_sequences, sell1_info = extract_price_sequences(large_sell1_indices, 'sell', length)
print(f"成功提取 {len(buy1_sequences)} 个买1价格序列 (最大长度: {length})")
print(f"成功提取 {len(sell1_sequences)} 个卖1价格序列 (最大长度: {length})")
# 创建综合对比图表
fig, axes = plt.subplots(2, 2, figsize=(20, 16))
fig.suptitle(f'大额订单对比分析买1量>99 vs 卖1量>99 (后{length}个数据点)',
fontsize=16, fontweight='bold')
# 1. 买1量价格变化图
ax1 = axes[0, 0]
if buy1_sequences:
colors_buy = plt.cm.Blues(np.linspace(0.3, 0.9, len(buy1_sequences)))
for i, (sequence, info) in enumerate(zip(buy1_sequences, buy1_info)):
x_axis = range(len(sequence))
ax1.plot(x_axis, sequence, color=colors_buy[i], alpha=0.6, linewidth=0.8)
ax1.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
ax1.set_xlabel('数据点序号')
ax1.set_ylabel('相对价格变化')
ax1.set_title(f'买1量>99的价格变化走势 (后{length}点)\n{len(buy1_sequences)}个序列')
ax1.grid(True, alpha=0.3)
ax1.text(0.02, 0.98, f'序列数: {len(buy1_sequences)}', transform=ax1.transAxes,
verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.8))
# 2. 卖1量价格变化图
ax2 = axes[0, 1]
if sell1_sequences:
colors_sell = plt.cm.Reds(np.linspace(0.3, 0.9, len(sell1_sequences)))
for i, (sequence, info) in enumerate(zip(sell1_sequences, sell1_info)):
x_axis = range(len(sequence))
ax2.plot(x_axis, sequence, color=colors_sell[i], alpha=0.6, linewidth=0.8)
ax2.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
ax2.set_xlabel('数据点序号')
ax2.set_ylabel('相对价格变化')
ax2.set_title(f'卖1量>99的价格变化走势 (后{length}点)\n{len(sell1_sequences)}个序列')
ax2.grid(True, alpha=0.3)
ax2.text(0.02, 0.98, f'序列数: {len(sell1_sequences)}', transform=ax2.transAxes,
verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightcoral', alpha=0.8))
# 3. 平均变化对比图
ax3 = axes[1, 0]
def calculate_avg_changes(sequences):
if not sequences:
return []
max_len = max(len(seq) for seq in sequences)
avg_changes = []
for i in range(max_len):
point_changes = [seq[i] for seq in sequences if len(seq) > i]
if point_changes:
avg_changes.append(np.mean(point_changes))
return avg_changes
buy1_avg_changes = calculate_avg_changes(buy1_sequences)
sell1_avg_changes = calculate_avg_changes(sell1_sequences)
if buy1_avg_changes:
ax3.plot(range(len(buy1_avg_changes)), buy1_avg_changes,
color='blue', linewidth=2.5, label=f'买1量>99 (n={len(buy1_sequences)})')
if sell1_avg_changes:
ax3.plot(range(len(sell1_avg_changes)), sell1_avg_changes,
color='red', linewidth=2.5, label=f'卖1量>99 (n={len(sell1_sequences)})')
ax3.axhline(y=0, color='black', linestyle='--', alpha=0.7, linewidth=1.5)
ax3.set_xlabel('数据点序号')
ax3.set_ylabel('平均相对价格变化')
ax3.set_title(f'平均价格变化对比 (后{length}点)')
ax3.legend(fontsize=12)
ax3.grid(True, alpha=0.3)
# 4. 统计信息文本框
ax4 = axes[1, 1]
ax4.axis('off')
# 计算统计信息
def calculate_stats(sequences, name):
if not sequences:
return {}
final_changes = [seq[-1] for seq in sequences if len(seq) > 0]
if final_changes:
return {
'name': name,
'count': len(sequences),
'avg_final_change': np.mean(final_changes),
'std_final_change': np.std(final_changes),
'max_rise': np.max(final_changes),
'max_fall': np.min(final_changes)
}
return {}
buy1_stats = calculate_stats(buy1_sequences, '买1量>99')
sell1_stats = calculate_stats(sell1_sequences, '卖1量>99')
# 显示统计信息
stats_text = f"=== 统计信息对比 (后{length}点) ===\n\n"
if buy1_stats:
stats_text += f"【买1量>99】\n"
stats_text += f"序列数量: {buy1_stats['count']}\n"
stats_text += f"平均最终变化: {buy1_stats['avg_final_change']:.4f}\n"
stats_text += f"变化标准差: {buy1_stats['std_final_change']:.4f}\n"
stats_text += f"最大上涨: {buy1_stats['max_rise']:.4f}\n"
stats_text += f"最大下跌: {buy1_stats['max_fall']:.4f}\n\n"
if sell1_stats:
stats_text += f"【卖1量>99】\n"
stats_text += f"序列数量: {sell1_stats['count']}\n"
stats_text += f"平均最终变化: {sell1_stats['avg_final_change']:.4f}\n"
stats_text += f"变化标准差: {sell1_stats['std_final_change']:.4f}\n"
stats_text += f"最大上涨: {sell1_stats['max_rise']:.4f}\n"
stats_text += f"最大下跌: {sell1_stats['max_fall']:.4f}\n\n"
# 添加关键时间点对比
if buy1_avg_changes and sell1_avg_changes:
stats_text += f"=== 关键时间点对比 (后{length}点) ===\n"
# 根据数据长度选择关键点
if length >= 500:
points_to_check = [49, 199, 499] # 第50、200、500点
point_names = ['第50点', '第200点', '第500点']
elif length >= 200:
points_to_check = [49, 199] # 第50、200点
point_names = ['第50点', '第200点']
else:
points_to_check = [49] # 第50点
point_names = ['第50点']
for i, point in enumerate(points_to_check):
if point < len(buy1_avg_changes) and point < len(sell1_avg_changes):
stats_text += f"{point_names[i]}: "
stats_text += f"买1={buy1_avg_changes[point]:.4f}, "
stats_text += f"卖1={sell1_avg_changes[point]:.4f}, "
stats_text += f"差值={buy1_avg_changes[point] - sell1_avg_changes[point]:.4f}\n"
ax4.text(0.05, 0.95, stats_text, transform=ax4.transAxes, fontsize=11,
verticalalignment='top',
bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8))
plt.tight_layout()
# 保存综合图表
output_file = f'large_orders_comprehensive_analysis_{length}points.png'
plt.savefig(output_file, dpi=300, bbox_inches='tight')
print(f"\n{length}点综合分析图表已保存为: {output_file}")
print(f"完整路径: {os.path.abspath(output_file)}")
# 保存单独的买1和卖1图表
# 买1量单独图表
fig_buy, ax_buy = plt.subplots(figsize=(15, 10))
if buy1_sequences:
colors_buy = plt.cm.Blues(np.linspace(0.3, 0.9, len(buy1_sequences)))
for i, (sequence, info) in enumerate(zip(buy1_sequences, buy1_info)):
x_axis = range(len(sequence))
ax_buy.plot(x_axis, sequence, color=colors_buy[i], alpha=0.6, linewidth=1)
ax_buy.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
ax_buy.set_xlabel(f'数据点序号 (相对于大额买1订单, 后{length}点)', fontsize=12)
ax_buy.set_ylabel('相对价格变化 (相对于基准点)', fontsize=12)
ax_buy.set_title(f'买1量>99的数据点后{length}个相对价格变化走势\n所有序列从基准点(0)开始\n{len(buy1_sequences)}个序列',
fontsize=14, fontweight='bold')
ax_buy.grid(True, alpha=0.3)
plt.tight_layout()
buy_output_file = f'large_buy1_relative_price_changes_{length}points.png'
plt.savefig(buy_output_file, dpi=300, bbox_inches='tight')
print(f"买1量{length}点分析图表已保存为: {buy_output_file}")
plt.close()
# 卖1量单独图表
fig_sell, ax_sell = plt.subplots(figsize=(15, 10))
if sell1_sequences:
colors_sell = plt.cm.Reds(np.linspace(0.3, 0.9, len(sell1_sequences)))
for i, (sequence, info) in enumerate(zip(sell1_sequences, sell1_info)):
x_axis = range(len(sequence))
ax_sell.plot(x_axis, sequence, color=colors_sell[i], alpha=0.6, linewidth=1)
ax_sell.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
ax_sell.set_xlabel(f'数据点序号 (相对于大额卖1订单, 后{length}点)', fontsize=12)
ax_sell.set_ylabel('相对价格变化 (相对于基准点)', fontsize=12)
ax_sell.set_title(f'卖1量>99的数据点后{length}个相对价格变化走势\n所有序列从基准点(0)开始\n{len(sell1_sequences)}个序列',
fontsize=14, fontweight='bold')
ax_sell.grid(True, alpha=0.3)
plt.tight_layout()
sell_output_file = f'large_sell1_relative_price_changes_{length}points.png'
plt.savefig(sell_output_file, dpi=300, bbox_inches='tight')
print(f"卖1量{length}点分析图表已保存为: {sell_output_file}")
plt.close()
# 显示统计信息
print(f"\n{'='*50}")
print(f"详细统计信息 (后{length}点):")
print(f"{'='*50}")
if buy1_stats:
print(f"\n【买1量>99】")
print(f"序列数量: {buy1_stats['count']}")
print(f"平均最终变化: {buy1_stats['avg_final_change']:.4f}")
print(f"变化标准差: {buy1_stats['std_final_change']:.4f}")
print(f"最大上涨: {buy1_stats['max_rise']:.4f}")
print(f"最大下跌: {buy1_stats['max_fall']:.4f}")
if sell1_stats:
print(f"\n【卖1量>99】")
print(f"序列数量: {sell1_stats['count']}")
print(f"平均最终变化: {sell1_stats['avg_final_change']:.4f}")
print(f"变化标准差: {sell1_stats['std_final_change']:.4f}")
print(f"最大上涨: {sell1_stats['max_rise']:.4f}")
print(f"最大下跌: {sell1_stats['max_fall']:.4f}")
# 关键时间点分析
if buy1_avg_changes and sell1_avg_changes:
print(f"\n关键时间点对比:")
if length >= 500:
key_points = [(49, '第50点'), (199, '第200点'), (499, '第500点')]
elif length >= 200:
key_points = [(49, '第50点'), (199, '第200点')]
else:
key_points = [(49, '第50点')]
for point, name in key_points:
if point < len(buy1_avg_changes) and point < len(sell1_avg_changes):
diff = buy1_avg_changes[point] - sell1_avg_changes[point]
print(f"{name}: 买1={buy1_avg_changes[point]:.4f}, 卖1={sell1_avg_changes[point]:.4f}, 差值={diff:.4f}")
plt.close('all') # 关闭所有图形以释放内存
print(f"\n{'='*60}")
print("所有分析完成!")
print(f"{'='*60}")
if __name__ == "__main__":
analyze_large_orders_extended()