huice/large_orders/analyze_large_orders_extended.py
Your Name e5dd5b5593 feat: 期货数据分析工具集 v2.0
## 核心功能
### 1. 成交量序列分析 (volume_price_sequence.py)
- 按累计成交量排序的价格趋势分析
- 三合一综合图表:价格序列+成交量分布+时间序列
- 关键价格水平自动标注

### 2. 成交量分布深度分析 (volume_distribution_analysis.py)
- 7种专业可视化图表
- 统计特征分析和分布拟合
- 交易模式识别和业务洞察

### 3. 大额订单分析工具集 (large_orders/)
- 买1/卖1量大单分析 (阈值99)
- 买卖挂单合计分析 (阈值200)
- 当前成交量分析 (阈值150)
- 信号抑制优化算法 (38%抑制率)

## 技术特性
- 信号抑制算法:有效减少重复信号干扰
- 多维度分析:支持多种信号类型
- 专业可视化:四宫格综合分析图
- 业务洞察:基于数据的交易建议

## 分析结果
- 卖1量大单:短期下跌,长期大幅上涨反转
- 买挂合计:各时间窗口小幅正收益
- 信号抑制:短期收益从-0.0778提升至+0.1347

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-02 15:15:53 +08:00

333 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import os
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
def analyze_large_orders_extended():
"""分析买1量和卖1量大于99的数据点后不同长度的成交价走势"""
print("正在读取数据文件...")
# 读取数据从上级目录的data文件夹
df = pd.read_parquet('../data/au2512_20251013.parquet')
print(f"数据总行数: {len(df)}")
print(f"数据列名: {df.columns.tolist()}")
# 查找买1量和卖1量列的实际名称
buy1_vol_col = None
sell1_vol_col = None
for col in df.columns:
if '买1量' in str(col) or 'buy1' in str(col).lower():
buy1_vol_col = col
if '卖1量' in str(col) or 'sell1' in str(col).lower():
sell1_vol_col = col
if buy1_vol_col is None:
print("未找到买1量列")
return
if sell1_vol_col is None:
print("未找到卖1量列")
return
print(f"使用买1量列: {buy1_vol_col}")
print(f"使用卖1量列: {sell1_vol_col}")
# 获取成交价列名
price_col = None
for col in df.columns:
if '成交价' in str(col) or 'price' in str(col).lower():
price_col = col
break
if price_col is None:
print("未找到成交价列")
return
print(f"使用成交价列: {price_col}")
# 筛选大额买1和卖1订单
large_buy1_mask = df[buy1_vol_col] > 99
large_sell1_mask = df[sell1_vol_col] > 99
large_buy1_indices = df[large_buy1_mask].index.tolist()
large_sell1_indices = df[large_sell1_mask].index.tolist()
print(f"找到买1量大于99的数据点数量: {len(large_buy1_indices)}")
print(f"找到卖1量大于99的数据点数量: {len(large_sell1_indices)}")
# 提取价格序列的函数
def extract_price_sequences(indices, order_type, max_points):
sequences = []
sequence_info = []
for idx in indices:
remaining_points = len(df) - idx - 1
take_points = min(max_points, remaining_points)
if take_points > 0:
base_price = df.loc[idx, price_col]
future_prices = df.loc[idx + 1: idx + take_points, price_col].values
price_changes = future_prices - base_price
sequences.append(price_changes)
volume_col = buy1_vol_col if order_type == 'buy' else sell1_vol_col
sequence_info.append({
'start_index': idx,
'volume': df.loc[idx, volume_col],
'base_price': base_price,
'sequence_length': take_points
})
return sequences, sequence_info
# 分析不同时间长度的数据
analysis_lengths = [100, 200, 500] # 100, 200, 500个数据点
for length in analysis_lengths:
print(f"\n{'='*60}")
print(f"分析后{length}个数据点的价格走势")
print(f"{'='*60}")
# 提取买1和卖1的价格序列
buy1_sequences, buy1_info = extract_price_sequences(large_buy1_indices, 'buy', length)
sell1_sequences, sell1_info = extract_price_sequences(large_sell1_indices, 'sell', length)
print(f"成功提取 {len(buy1_sequences)} 个买1价格序列 (最大长度: {length})")
print(f"成功提取 {len(sell1_sequences)} 个卖1价格序列 (最大长度: {length})")
# 创建综合对比图表
fig, axes = plt.subplots(2, 2, figsize=(20, 16))
fig.suptitle(f'大额订单对比分析买1量>99 vs 卖1量>99 (后{length}个数据点)',
fontsize=16, fontweight='bold')
# 1. 买1量价格变化图
ax1 = axes[0, 0]
if buy1_sequences:
colors_buy = plt.cm.Blues(np.linspace(0.3, 0.9, len(buy1_sequences)))
for i, (sequence, info) in enumerate(zip(buy1_sequences, buy1_info)):
x_axis = range(len(sequence))
ax1.plot(x_axis, sequence, color=colors_buy[i], alpha=0.6, linewidth=0.8)
ax1.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
ax1.set_xlabel('数据点序号')
ax1.set_ylabel('相对价格变化')
ax1.set_title(f'买1量>99的价格变化走势 (后{length}点)\n{len(buy1_sequences)}个序列')
ax1.grid(True, alpha=0.3)
ax1.text(0.02, 0.98, f'序列数: {len(buy1_sequences)}', transform=ax1.transAxes,
verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.8))
# 2. 卖1量价格变化图
ax2 = axes[0, 1]
if sell1_sequences:
colors_sell = plt.cm.Reds(np.linspace(0.3, 0.9, len(sell1_sequences)))
for i, (sequence, info) in enumerate(zip(sell1_sequences, sell1_info)):
x_axis = range(len(sequence))
ax2.plot(x_axis, sequence, color=colors_sell[i], alpha=0.6, linewidth=0.8)
ax2.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
ax2.set_xlabel('数据点序号')
ax2.set_ylabel('相对价格变化')
ax2.set_title(f'卖1量>99的价格变化走势 (后{length}点)\n{len(sell1_sequences)}个序列')
ax2.grid(True, alpha=0.3)
ax2.text(0.02, 0.98, f'序列数: {len(sell1_sequences)}', transform=ax2.transAxes,
verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightcoral', alpha=0.8))
# 3. 平均变化对比图
ax3 = axes[1, 0]
def calculate_avg_changes(sequences):
if not sequences:
return []
max_len = max(len(seq) for seq in sequences)
avg_changes = []
for i in range(max_len):
point_changes = [seq[i] for seq in sequences if len(seq) > i]
if point_changes:
avg_changes.append(np.mean(point_changes))
return avg_changes
buy1_avg_changes = calculate_avg_changes(buy1_sequences)
sell1_avg_changes = calculate_avg_changes(sell1_sequences)
if buy1_avg_changes:
ax3.plot(range(len(buy1_avg_changes)), buy1_avg_changes,
color='blue', linewidth=2.5, label=f'买1量>99 (n={len(buy1_sequences)})')
if sell1_avg_changes:
ax3.plot(range(len(sell1_avg_changes)), sell1_avg_changes,
color='red', linewidth=2.5, label=f'卖1量>99 (n={len(sell1_sequences)})')
ax3.axhline(y=0, color='black', linestyle='--', alpha=0.7, linewidth=1.5)
ax3.set_xlabel('数据点序号')
ax3.set_ylabel('平均相对价格变化')
ax3.set_title(f'平均价格变化对比 (后{length}点)')
ax3.legend(fontsize=12)
ax3.grid(True, alpha=0.3)
# 4. 统计信息文本框
ax4 = axes[1, 1]
ax4.axis('off')
# 计算统计信息
def calculate_stats(sequences, name):
if not sequences:
return {}
final_changes = [seq[-1] for seq in sequences if len(seq) > 0]
if final_changes:
return {
'name': name,
'count': len(sequences),
'avg_final_change': np.mean(final_changes),
'std_final_change': np.std(final_changes),
'max_rise': np.max(final_changes),
'max_fall': np.min(final_changes)
}
return {}
buy1_stats = calculate_stats(buy1_sequences, '买1量>99')
sell1_stats = calculate_stats(sell1_sequences, '卖1量>99')
# 显示统计信息
stats_text = f"=== 统计信息对比 (后{length}点) ===\n\n"
if buy1_stats:
stats_text += f"【买1量>99】\n"
stats_text += f"序列数量: {buy1_stats['count']}\n"
stats_text += f"平均最终变化: {buy1_stats['avg_final_change']:.4f}\n"
stats_text += f"变化标准差: {buy1_stats['std_final_change']:.4f}\n"
stats_text += f"最大上涨: {buy1_stats['max_rise']:.4f}\n"
stats_text += f"最大下跌: {buy1_stats['max_fall']:.4f}\n\n"
if sell1_stats:
stats_text += f"【卖1量>99】\n"
stats_text += f"序列数量: {sell1_stats['count']}\n"
stats_text += f"平均最终变化: {sell1_stats['avg_final_change']:.4f}\n"
stats_text += f"变化标准差: {sell1_stats['std_final_change']:.4f}\n"
stats_text += f"最大上涨: {sell1_stats['max_rise']:.4f}\n"
stats_text += f"最大下跌: {sell1_stats['max_fall']:.4f}\n\n"
# 添加关键时间点对比
if buy1_avg_changes and sell1_avg_changes:
stats_text += f"=== 关键时间点对比 (后{length}点) ===\n"
# 根据数据长度选择关键点
if length >= 500:
points_to_check = [49, 199, 499] # 第50、200、500点
point_names = ['第50点', '第200点', '第500点']
elif length >= 200:
points_to_check = [49, 199] # 第50、200点
point_names = ['第50点', '第200点']
else:
points_to_check = [49] # 第50点
point_names = ['第50点']
for i, point in enumerate(points_to_check):
if point < len(buy1_avg_changes) and point < len(sell1_avg_changes):
stats_text += f"{point_names[i]}: "
stats_text += f"买1={buy1_avg_changes[point]:.4f}, "
stats_text += f"卖1={sell1_avg_changes[point]:.4f}, "
stats_text += f"差值={buy1_avg_changes[point] - sell1_avg_changes[point]:.4f}\n"
ax4.text(0.05, 0.95, stats_text, transform=ax4.transAxes, fontsize=11,
verticalalignment='top',
bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8))
plt.tight_layout()
# 保存综合图表
output_file = f'large_orders_comprehensive_analysis_{length}points.png'
plt.savefig(output_file, dpi=300, bbox_inches='tight')
print(f"\n{length}点综合分析图表已保存为: {output_file}")
print(f"完整路径: {os.path.abspath(output_file)}")
# 保存单独的买1和卖1图表
# 买1量单独图表
fig_buy, ax_buy = plt.subplots(figsize=(15, 10))
if buy1_sequences:
colors_buy = plt.cm.Blues(np.linspace(0.3, 0.9, len(buy1_sequences)))
for i, (sequence, info) in enumerate(zip(buy1_sequences, buy1_info)):
x_axis = range(len(sequence))
ax_buy.plot(x_axis, sequence, color=colors_buy[i], alpha=0.6, linewidth=1)
ax_buy.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
ax_buy.set_xlabel(f'数据点序号 (相对于大额买1订单, 后{length}点)', fontsize=12)
ax_buy.set_ylabel('相对价格变化 (相对于基准点)', fontsize=12)
ax_buy.set_title(f'买1量>99的数据点后{length}个相对价格变化走势\n所有序列从基准点(0)开始\n{len(buy1_sequences)}个序列',
fontsize=14, fontweight='bold')
ax_buy.grid(True, alpha=0.3)
plt.tight_layout()
buy_output_file = f'large_buy1_relative_price_changes_{length}points.png'
plt.savefig(buy_output_file, dpi=300, bbox_inches='tight')
print(f"买1量{length}点分析图表已保存为: {buy_output_file}")
plt.close()
# 卖1量单独图表
fig_sell, ax_sell = plt.subplots(figsize=(15, 10))
if sell1_sequences:
colors_sell = plt.cm.Reds(np.linspace(0.3, 0.9, len(sell1_sequences)))
for i, (sequence, info) in enumerate(zip(sell1_sequences, sell1_info)):
x_axis = range(len(sequence))
ax_sell.plot(x_axis, sequence, color=colors_sell[i], alpha=0.6, linewidth=1)
ax_sell.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
ax_sell.set_xlabel(f'数据点序号 (相对于大额卖1订单, 后{length}点)', fontsize=12)
ax_sell.set_ylabel('相对价格变化 (相对于基准点)', fontsize=12)
ax_sell.set_title(f'卖1量>99的数据点后{length}个相对价格变化走势\n所有序列从基准点(0)开始\n{len(sell1_sequences)}个序列',
fontsize=14, fontweight='bold')
ax_sell.grid(True, alpha=0.3)
plt.tight_layout()
sell_output_file = f'large_sell1_relative_price_changes_{length}points.png'
plt.savefig(sell_output_file, dpi=300, bbox_inches='tight')
print(f"卖1量{length}点分析图表已保存为: {sell_output_file}")
plt.close()
# 显示统计信息
print(f"\n{'='*50}")
print(f"详细统计信息 (后{length}点):")
print(f"{'='*50}")
if buy1_stats:
print(f"\n【买1量>99】")
print(f"序列数量: {buy1_stats['count']}")
print(f"平均最终变化: {buy1_stats['avg_final_change']:.4f}")
print(f"变化标准差: {buy1_stats['std_final_change']:.4f}")
print(f"最大上涨: {buy1_stats['max_rise']:.4f}")
print(f"最大下跌: {buy1_stats['max_fall']:.4f}")
if sell1_stats:
print(f"\n【卖1量>99】")
print(f"序列数量: {sell1_stats['count']}")
print(f"平均最终变化: {sell1_stats['avg_final_change']:.4f}")
print(f"变化标准差: {sell1_stats['std_final_change']:.4f}")
print(f"最大上涨: {sell1_stats['max_rise']:.4f}")
print(f"最大下跌: {sell1_stats['max_fall']:.4f}")
# 关键时间点分析
if buy1_avg_changes and sell1_avg_changes:
print(f"\n关键时间点对比:")
if length >= 500:
key_points = [(49, '第50点'), (199, '第200点'), (499, '第500点')]
elif length >= 200:
key_points = [(49, '第50点'), (199, '第200点')]
else:
key_points = [(49, '第50点')]
for point, name in key_points:
if point < len(buy1_avg_changes) and point < len(sell1_avg_changes):
diff = buy1_avg_changes[point] - sell1_avg_changes[point]
print(f"{name}: 买1={buy1_avg_changes[point]:.4f}, 卖1={sell1_avg_changes[point]:.4f}, 差值={diff:.4f}")
plt.close('all') # 关闭所有图形以释放内存
print(f"\n{'='*60}")
print("所有分析完成!")
print(f"{'='*60}")
if __name__ == "__main__":
analyze_large_orders_extended()