huice/analyze_large_orders.py
Your Name e5dd5b5593 feat: 期货数据分析工具集 v2.0
## 核心功能
### 1. 成交量序列分析 (volume_price_sequence.py)
- 按累计成交量排序的价格趋势分析
- 三合一综合图表:价格序列+成交量分布+时间序列
- 关键价格水平自动标注

### 2. 成交量分布深度分析 (volume_distribution_analysis.py)
- 7种专业可视化图表
- 统计特征分析和分布拟合
- 交易模式识别和业务洞察

### 3. 大额订单分析工具集 (large_orders/)
- 买1/卖1量大单分析 (阈值99)
- 买卖挂单合计分析 (阈值200)
- 当前成交量分析 (阈值150)
- 信号抑制优化算法 (38%抑制率)

## 技术特性
- 信号抑制算法:有效减少重复信号干扰
- 多维度分析:支持多种信号类型
- 专业可视化:四宫格综合分析图
- 业务洞察:基于数据的交易建议

## 分析结果
- 卖1量大单:短期下跌,长期大幅上涨反转
- 买挂合计:各时间窗口小幅正收益
- 信号抑制:短期收益从-0.0778提升至+0.1347

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-02 15:15:53 +08:00

1041 lines
49 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
AU2512期货大单分析工具 (买1量和卖1量整合分析)
这个脚本专门分析买1量和卖1量大于阈值的数据并计算这些时点后100和200个成交笔数内的价格范围。
买1量大通常表示存在大量买盘挂单可能是价格上涨的信号。
卖1量大通常表示存在大量卖盘挂单可能是价格下跌的信号。
使用方法:
python analyze_large_orders.py [data_file]
如果不指定文件,默认分析 data/au2512_20251013.parquet
输出目录: large_orders/
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
import os
import sys
import argparse
from pathlib import Path
warnings.filterwarnings('ignore')
class LargeOrdersAnalyzer:
"""大单分析器 (买1量 + 卖1量)"""
def __init__(self, data_file=None, output_dir="large_orders"):
"""
初始化分析器
Args:
data_file (str): 数据文件路径,默认为 data/au2512_20251013.parquet
output_dir (str): 输出目录,默认为 large_orders
"""
self.data_file = data_file or "data/au2512_20251013.parquet"
self.output_dir = output_dir
# 确保输出目录存在
os.makedirs(self.output_dir, exist_ok=True)
self.df = None
self.buy1_large_orders = None
self.sell1_large_orders = None
self.buy1_results_100 = None
self.buy1_results_200 = None
self.sell1_results_100 = None
self.sell1_results_200 = None
self.setup_chinese_font()
def setup_chinese_font(self):
"""设置中文字体支持"""
try:
# 尝试不同的中文字体
chinese_fonts = ['Microsoft YaHei', 'SimHei', 'SimSun', 'KaiTi', 'FangSong']
for font in chinese_fonts:
try:
plt.rcParams['font.sans-serif'] = [font]
plt.rcParams['axes.unicode_minus'] = False
# 测试字体是否可用
fig, ax = plt.subplots(figsize=(1, 1))
ax.text(0.5, 0.5, '测试', fontsize=12)
plt.close(fig)
print(f"使用中文字体: {font}")
return
except:
continue
# 如果都不行,使用默认字体
plt.rcParams['font.sans-serif'] = ['DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
print("警告: 无法加载中文字体,使用默认字体")
except Exception as e:
print(f"字体设置警告: {e}")
plt.rcParams['font.sans-serif'] = ['DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
def load_data(self):
"""加载数据"""
try:
if not os.path.exists(self.data_file):
raise FileNotFoundError(f"数据文件不存在: {self.data_file}")
print(f"正在加载数据: {self.data_file}")
# 根据文件扩展名选择读取方式
if self.data_file.endswith('.parquet'):
self.df = pd.read_parquet(self.data_file)
elif self.data_file.endswith('.csv'):
self.df = pd.read_csv(self.data_file)
else:
raise ValueError("不支持的文件格式,请使用 .parquet 或 .csv 文件")
print(f"数据加载成功: {len(self.df):,} 条记录")
# 确保数据按数列号排序
if '数列号' in self.df.columns:
self.df = self.df.sort_values('数列号').reset_index(drop=True)
print("数据已按数列号排序")
else:
print("警告: 未找到数列号列,使用原始顺序")
return True
except Exception as e:
print(f"数据加载失败: {e}")
return False
def analyze_large_buy1_orders(self, threshold=99):
"""分析买1量大于阈值的数据"""
print(f"\n=== 分析买1量 > {threshold} 的数据 ===")
# 找到买1量列
buy1_col = None
for col in self.df.columns:
if '买1量' in col:
buy1_col = col
break
if buy1_col is None:
raise ValueError("未找到买1量列")
print(f"使用买1量列: {buy1_col}")
# 筛选买1量大于阈值的数据
large_buy1_mask = self.df[buy1_col] > threshold
self.buy1_large_orders = self.df[large_buy1_mask].copy().reset_index(drop=True)
print(f"买1量 > {threshold} 的记录数: {len(self.buy1_large_orders):,}")
print(f"占总记录比例: {len(self.buy1_large_orders)/len(self.df)*100:.2f}%")
if len(self.buy1_large_orders) == 0:
print("未找到符合条件的买1量记录")
return False
# 显示买1量统计
print(f"\n买1量统计:")
print(f" 最小值: {self.buy1_large_orders[buy1_col].min()}")
print(f" 最大值: {self.buy1_large_orders[buy1_col].max()}")
print(f" 平均值: {self.buy1_large_orders[buy1_col].mean():.1f}")
print(f" 中位数: {self.buy1_large_orders[buy1_col].median()}")
return True
def analyze_large_sell1_orders(self, threshold=99):
"""分析卖1量大于阈值的数据"""
print(f"\n=== 分析卖1量 > {threshold} 的数据 ===")
# 找到卖1量列
sell1_col = None
for col in self.df.columns:
if '卖1量' in col:
sell1_col = col
break
if sell1_col is None:
raise ValueError("未找到卖1量列")
print(f"使用卖1量列: {sell1_col}")
# 筛选卖1量大于阈值的数据
large_sell1_mask = self.df[sell1_col] > threshold
self.sell1_large_orders = self.df[large_sell1_mask].copy().reset_index(drop=True)
print(f"卖1量 > {threshold} 的记录数: {len(self.sell1_large_orders):,}")
print(f"占总记录比例: {len(self.sell1_large_orders)/len(self.df)*100:.2f}%")
if len(self.sell1_large_orders) == 0:
print("未找到符合条件的卖1量记录")
return False
# 显示卖1量统计
print(f"\n卖1量统计:")
print(f" 最小值: {self.sell1_large_orders[sell1_col].min()}")
print(f" 最大值: {self.sell1_large_orders[sell1_col].max()}")
print(f" 平均值: {self.sell1_large_orders[sell1_col].mean():.1f}")
print(f" 中位数: {self.sell1_large_orders[sell1_col].median()}")
return True
def calculate_future_price_ranges(self):
"""计算后续成交价范围 (买1量和卖1量)"""
print(f"\n=== 计算后续成交价范围 ===")
# 买1量分析
if len(self.buy1_large_orders) > 0:
self._calculate_buy1_ranges()
# 卖1量分析
if len(self.sell1_large_orders) > 0:
self._calculate_sell1_ranges()
return True
def _calculate_buy1_ranges(self):
"""计算买1量的后续价格范围"""
buy1_col = None
for col in self.df.columns:
if '买1量' in col:
buy1_col = col
break
buy1_results_100 = []
buy1_results_200 = []
for idx, row in self.buy1_large_orders.iterrows():
current_seq_num = row['数列号'] if '数列号' in row else idx
current_price = row['成交价']
buy1_volume = row[buy1_col]
# 100笔分析
future_mask_100 = self.df['数列号'] >= current_seq_num + 1
future_data_100 = self.df[future_mask_100].head(100)
if len(future_data_100) > 0:
min_price_100 = future_data_100['成交价'].min()
max_price_100 = future_data_100['成交价'].max()
price_range_100 = max_price_100 - min_price_100
# 正确计算价格变化(相对于当前价格的最低价变化)
price_decline_100 = current_price - min_price_100 # 下跌幅度(正数表示下跌)
price_rise_100 = max_price_100 - current_price # 上涨幅度(正数表示上涨)
# 计算净变化(最终相对于起始的变化)
final_price_100 = future_data_100['成交价'].iloc[-1]
net_change_100 = final_price_100 - current_price
net_change_pct_100 = (net_change_100 / current_price) * 100
buy1_results_100.append({
'数列号': current_seq_num,
'成交价': current_price,
'买1量': buy1_volume,
'未来100笔最小价': min_price_100,
'未来100笔最大价': max_price_100,
'未来100笔最终价': final_price_100,
'未来100笔价格范围': price_range_100,
'未来100笔价格下跌': price_decline_100,
'未来100笔价格上涨': price_rise_100,
'未来100笔净变化': net_change_100,
'未来100笔净变化%': net_change_pct_100,
'实际样本数': len(future_data_100)
})
# 200笔分析
future_mask_200 = self.df['数列号'] >= current_seq_num + 1
future_data_200 = self.df[future_mask_200].head(200)
if len(future_data_200) > 0:
min_price_200 = future_data_200['成交价'].min()
max_price_200 = future_data_200['成交价'].max()
price_range_200 = max_price_200 - min_price_200
# 正确计算价格变化
price_decline_200 = current_price - min_price_200 # 下跌幅度
price_rise_200 = max_price_200 - current_price # 上涨幅度
# 计算净变化
final_price_200 = future_data_200['成交价'].iloc[-1]
net_change_200 = final_price_200 - current_price
net_change_pct_200 = (net_change_200 / current_price) * 100
buy1_results_200.append({
'数列号': current_seq_num,
'成交价': current_price,
'买1量': buy1_volume,
'未来200笔最小价': min_price_200,
'未来200笔最大价': max_price_200,
'未来200笔最终价': final_price_200,
'未来200笔价格范围': price_range_200,
'未来200笔价格下跌': price_decline_200,
'未来200笔价格上涨': price_rise_200,
'未来200笔净变化': net_change_200,
'未来200笔净变化%': net_change_pct_200,
'实际样本数': len(future_data_200)
})
self.buy1_results_100 = pd.DataFrame(buy1_results_100)
self.buy1_results_200 = pd.DataFrame(buy1_results_200)
print(f"买1量分析完成: 100笔 {len(self.buy1_results_100)} 条, 200笔 {len(self.buy1_results_200)}")
def _calculate_sell1_ranges(self):
"""计算卖1量的后续价格范围"""
sell1_col = None
for col in self.df.columns:
if '卖1量' in col:
sell1_col = col
break
sell1_results_100 = []
sell1_results_200 = []
for idx, row in self.sell1_large_orders.iterrows():
current_seq_num = row['数列号'] if '数列号' in row else idx
current_price = row['成交价']
sell1_volume = row[sell1_col]
# 100笔分析
future_mask_100 = self.df['数列号'] >= current_seq_num + 1
future_data_100 = self.df[future_mask_100].head(100)
if len(future_data_100) > 0:
min_price_100 = future_data_100['成交价'].min()
max_price_100 = future_data_100['成交价'].max()
price_range_100 = max_price_100 - min_price_100
# 计算价格变化(相对于当前价格)
price_decline_100 = current_price - min_price_100 # 下跌幅度
price_rise_100 = max_price_100 - current_price # 上涨幅度
# 计算净变化(最终相对于起始的变化)
final_price_100 = future_data_100['成交价'].iloc[-1]
net_change_100 = final_price_100 - current_price
net_change_pct_100 = (net_change_100 / current_price) * 100
sell1_results_100.append({
'数列号': current_seq_num,
'成交价': current_price,
'卖1量': sell1_volume,
'未来100笔最小价': min_price_100,
'未来100笔最大价': max_price_100,
'未来100笔最终价': final_price_100,
'未来100笔价格范围': price_range_100,
'未来100笔价格下跌': price_decline_100,
'未来100笔价格上涨': price_rise_100,
'未来100笔净变化': net_change_100,
'未来100笔净变化%': net_change_pct_100,
'实际样本数': len(future_data_100)
})
# 200笔分析
future_mask_200 = self.df['数列号'] >= current_seq_num + 1
future_data_200 = self.df[future_mask_200].head(200)
if len(future_data_200) > 0:
min_price_200 = future_data_200['成交价'].min()
max_price_200 = future_data_200['成交价'].max()
price_range_200 = max_price_200 - min_price_200
# 计算价格变化
price_decline_200 = current_price - min_price_200 # 下跌幅度
price_rise_200 = max_price_200 - current_price # 上涨幅度
# 计算净变化
final_price_200 = future_data_200['成交价'].iloc[-1]
net_change_200 = final_price_200 - current_price
net_change_pct_200 = (net_change_200 / current_price) * 100
sell1_results_200.append({
'数列号': current_seq_num,
'成交价': current_price,
'卖1量': sell1_volume,
'未来200笔最小价': min_price_200,
'未来200笔最大价': max_price_200,
'未来200笔最终价': final_price_200,
'未来200笔价格范围': price_range_200,
'未来200笔价格下跌': price_decline_200,
'未来200笔价格上涨': price_rise_200,
'未来200笔净变化': net_change_200,
'未来200笔净变化%': net_change_pct_200,
'实际样本数': len(future_data_200)
})
self.sell1_results_100 = pd.DataFrame(sell1_results_100)
self.sell1_results_200 = pd.DataFrame(sell1_results_200)
print(f"卖1量分析完成: 100笔 {len(self.sell1_results_100)} 条, 200笔 {len(self.sell1_results_200)}")
def print_analysis_results(self):
"""打印分析结果"""
print(f"\n" + "="*90)
print("AU2512期货大单分析结果 (买1量 vs 卖1量)")
print("="*90)
# 对比表
print(f"\n【大单数据对比】")
print(f"{'类型':>8} {'样本数':>8} {'占比':>8} {'成交量范围':>12} {'平均成交量':>12}")
print("-" * 60)
if len(self.buy1_large_orders) > 0:
buy1_col = None
for col in self.df.columns:
if '买1量' in col:
buy1_col = col
break
buy_vol_range = f"{self.buy1_large_orders[buy1_col].min()}-{self.buy1_large_orders[buy1_col].max()}"
buy_avg_vol = f"{self.buy1_large_orders[buy1_col].mean():.1f}"
print(f"{'买1量':>8} {len(self.buy1_large_orders):>8} {len(self.buy1_large_orders)/len(self.df)*100:>7.2f}% {buy_vol_range:>12} {buy_avg_vol:>12}")
if len(self.sell1_large_orders) > 0:
sell1_col = None
for col in self.df.columns:
if '卖1量' in col:
sell1_col = col
break
sell_vol_range = f"{self.sell1_large_orders[sell1_col].min()}-{self.sell1_large_orders[sell1_col].max()}"
sell_avg_vol = f"{self.sell1_large_orders[sell1_col].mean():.1f}"
print(f"{'卖1量':>8} {len(self.sell1_large_orders):>8} {len(self.sell1_large_orders)/len(self.df)*100:>7.2f}% {sell_vol_range:>12} {sell_avg_vol:>12}")
# 100笔分析对比
print(f"\n【100笔后续表现对比】")
print(f"{'类型':>8} {'价格范围':>10} {'趋势变化':>10} {'成功率':>8} {'平均幅度%':>12}")
print("-" * 60)
if len(self.buy1_results_100) > 0:
buy_avg_range = self.buy1_results_100['未来100笔价格范围'].mean()
buy_avg_net_change = self.buy1_results_100['未来100笔净变化%'].mean()
buy_decline_rate = (self.buy1_results_100['未来100笔价格下跌'] > 0).sum() / len(self.buy1_results_100) * 100
trend_buy = "下跌" if buy_avg_net_change < 0 else "上涨"
print(f"{'买1量':>8} {buy_avg_range:>10.2f} {trend_buy:>10} {buy_decline_rate:>7.1f}% {buy_avg_net_change:+>11.3f}%")
if len(self.sell1_results_100) > 0:
sell_avg_range = self.sell1_results_100['未来100笔价格范围'].mean()
sell_avg_net_change = self.sell1_results_100['未来100笔净变化%'].mean()
sell_decline_rate = (self.sell1_results_100['未来100笔价格下跌'] > 0).sum() / len(self.sell1_results_100) * 100
trend_sell = "下跌" if sell_avg_net_change < 0 else "上涨"
print(f"{'卖1量':>8} {sell_avg_range:>10.2f} {trend_sell:>10} {sell_decline_rate:>7.1f}% {sell_avg_net_change:+>11.3f}%")
# 200笔分析对比
print(f"\n【200笔后续表现对比】")
print(f"{'类型':>8} {'价格范围':>10} {'趋势变化':>10} {'成功率':>8} {'平均幅度%':>12}")
print("-" * 60)
if len(self.buy1_results_200) > 0:
buy_avg_range_200 = self.buy1_results_200['未来200笔价格范围'].mean()
buy_avg_change_200 = self.buy1_results_200['未来200笔净变化%'].mean()
buy_decline_rate_200 = (self.buy1_results_200['未来200笔价格下跌'] > 0).sum() / len(self.buy1_results_200) * 100
trend_buy_200 = "下跌" if buy_avg_change_200 < 0 else "上涨"
print(f"{'买1量':>8} {buy_avg_range_200:>10.2f} {trend_buy_200:>10} {buy_decline_rate_200:>7.1f}% {buy_avg_change_200:+>11.3f}%")
if len(self.sell1_results_200) > 0:
sell_avg_range_200 = self.sell1_results_200['未来200笔价格范围'].mean()
sell_avg_change_200 = self.sell1_results_200['未来200笔净变化%'].mean()
sell_decline_rate_200 = (self.sell1_results_200['未来200笔价格下跌'] > 0).sum() / len(self.sell1_results_200) * 100
trend_sell_200 = "下跌" if sell_avg_change_200 < 0 else "上涨"
print(f"{'卖1量':>8} {sell_avg_range_200:>10.2f} {trend_sell_200:>10} {sell_decline_rate_200:>7.1f}% {sell_avg_change_200:+>11.3f}%")
# 详细样本展示
print(f"\n【详细分析 - 各类型前5个样本】")
print("\n买1量大单样本:")
if len(self.buy1_results_100) > 0:
print(f"{'序号':>4} {'数列号':>8} {'成交价':>8} {'买1量':>8} {'100笔范围':>10} {'100笔涨幅%':>12}")
print("-" * 60)
for i in range(min(5, len(self.buy1_results_100))):
row = self.buy1_results_100.iloc[i]
print(f"{i+1:>4} {row['数列号']:>8.0f} {row['成交价']:>8.2f} {row['买1量']:>8.0f} "
f"{row['未来100笔价格范围']:>10.2f} {row['未来100笔净变化%']:+>12.3f}%")
print("\n卖1量大单样本:")
if len(self.sell1_results_100) > 0:
print(f"{'序号':>4} {'数列号':>8} {'成交价':>8} {'卖1量':>8} {'100笔范围':>10} {'100笔变化%':>12}")
print("-" * 60)
for i in range(min(5, len(self.sell1_results_100))):
row = self.sell1_results_100.iloc[i]
print(f"{i+1:>4} {row['数列号']:>8.0f} {row['成交价']:>8.2f} {row['卖1量']:>8.0f} "
f"{row['未来100笔价格范围']:>10.2f} {row['未来100笔净变化%']:+>12.3f}%")
# 业务洞察
print(f"\n【业务洞察】")
print(f"1. 大单样本数量对比:")
if len(self.buy1_large_orders) > 0 and len(self.sell1_large_orders) > 0:
print(f" - 买1量大单: {len(self.buy1_large_orders)} 个 ({len(self.buy1_large_orders)/len(self.df)*100:.2f}%)")
print(f" - 卖1量大单: {len(self.sell1_large_orders)} 个 ({len(self.sell1_large_orders)/len(self.df)*100:.2f}%)")
ratio = len(self.buy1_large_orders) / len(self.sell1_large_orders)
print(f" - 买卖单比例: {ratio:.1f}:1 (买单占{'主导' if ratio > 1 else '劣势'})")
print(f"2. 后续价格波动分析:")
if len(self.buy1_results_100) > 0 and len(self.sell1_results_100) > 0:
buy_volatility = self.buy1_results_100['未来100笔价格范围'].mean()
sell_volatility = self.sell1_results_100['未来100笔价格范围'].mean()
print(f" - 买1量大单后平均波动: {buy_volatility:.2f}")
print(f" - 卖1量大单后平均波动: {sell_volatility:.2f}")
higher_volatility = "买1量" if buy_volatility > sell_volatility else "卖1量"
print(f" - {higher_volatility}大单引发更大的价格波动")
print(f"3. 趋势预测能力:")
# 判断主要趋势方向
overall_trend = "强势上涨" if self._is_overall_uptrend() else "震荡整理"
print(f" - 市场整体趋势: {overall_trend}")
print(f" - 大单信号强度: {'极强' if self._is_signal_strong() else '中等' if self._is_signal_moderate() else '较弱'}")
def _is_overall_uptrend(self):
"""判断是否为整体上涨趋势"""
buy_positive = (self.buy1_results_100['未来100笔净变化'] > 0).sum() / len(self.buy1_results_100) if len(self.buy1_results_100) > 0 else 0
sell_positive = (self.sell1_results_100['未来100笔净变化'] > 0).sum() / len(self.sell1_results_100) if len(self.sell1_results_100) > 0 else 0
# 如果买1量大单和卖1量大单后都主要上涨说明整体强势
return buy_positive > 0.5 and sell_positive > 0.5
def _is_signal_strong(self):
"""判断信号强度"""
if len(self.buy1_results_100) > 0:
buy_decline_rate = (self.buy1_results_100['未来100笔价格下跌'] > 0).sum() / len(self.buy1_results_100)
# 如果下跌概率很高,说明信号强度强
return buy_decline_rate > 0.8
return False
def _is_signal_moderate(self):
"""判断信号强度为中等"""
if len(self.buy1_results_100) > 0:
buy_decline_rate = (self.buy1_results_100['未来100笔价格下跌'] > 0).sum() / len(self.buy1_results_100)
return 0.5 < buy_decline_rate <= 0.8
return False
def create_comprehensive_chart(self):
"""创建综合对比图表"""
print(f"\n=== 生成综合分析图表 ===")
try:
# 创建大型综合图表
fig = plt.figure(figsize=(24, 16))
gs = fig.add_gridspec(4, 4, hspace=0.3, wspace=0.3)
# 1. 样本数量对比 (左上)
ax1 = fig.add_subplot(gs[0, 0])
order_types = []
sample_counts = []
colors = []
if len(self.buy1_large_orders) > 0:
order_types.append('买1量大单')
sample_counts.append(len(self.buy1_large_orders))
colors.append('lightgreen')
if len(self.sell1_large_orders) > 0:
order_types.append('卖1量大单')
sample_counts.append(len(self.sell1_large_orders))
colors.append('lightcoral')
bars = ax1.bar(order_types, sample_counts, color=colors, alpha=0.8, edgecolor='black')
ax1.set_ylabel('样本数量', fontsize=12)
ax1.set_title('大单样本数量对比', fontsize=14, fontweight='bold')
ax1.grid(True, alpha=0.3, axis='y')
# 添加数值标签
for bar, count in zip(bars, sample_counts):
height = bar.get_height()
ax1.text(bar.get_x() + bar.get_width()/2., height + 1,
f'{count}', ha='center', va='bottom', fontsize=12, fontweight='bold')
# 2. 价格范围对比 (右上)
ax2 = fig.add_subplot(gs[0, 1])
categories = []
buy_ranges = []
sell_ranges = []
if len(self.buy1_results_100) > 0:
categories.append('100笔')
buy_ranges.append(self.buy1_results_100['未来100笔价格范围'].mean())
if len(self.buy1_results_200) > 0:
categories.append('200笔')
buy_ranges.append(self.buy1_results_200['未来200笔价格范围'].mean())
x = np.arange(len(categories))
width = 0.35
if len(self.buy1_results_100) > 0:
ax2.bar(x - width/2, buy_ranges, width, label='买1量', color='lightgreen', alpha=0.8)
if len(self.sell1_results_100) > 0:
sell_ranges = []
if len(self.sell1_results_100) > 0:
sell_ranges.append(self.sell1_results_100['未来100笔价格范围'].mean())
if len(self.sell1_results_200) > 0:
sell_ranges.append(self.sell1_results_200['未来200笔价格范围'].mean())
ax2.bar(x + width/2, sell_ranges, width, label='卖1量', color='lightcoral', alpha=0.8)
ax2.set_xlabel('时间窗口', fontsize=12)
ax2.set_ylabel('平均价格范围 (元)', fontsize=12)
ax2.set_title('价格波动范围对比', fontsize=14, fontweight='bold')
ax2.set_xticks(x)
ax2.set_xticklabels(categories)
ax2.legend()
ax2.grid(True, alpha=0.3, axis='y')
# 3. 成功率对比 (左下第二个)
ax3 = fig.add_subplot(gs[0, 2])
success_types = []
success_rates = []
success_colors = []
if len(self.buy1_results_100) > 0:
buy_decline_rate = (self.buy1_results_100['未来100笔价格下跌'] > 0).sum() / len(self.buy1_results_100) * 100
success_types.append('买1量\n下跌概率')
success_rates.append(buy_decline_rate)
success_colors.append('lightgreen')
if len(self.sell1_results_100) > 0:
sell_decline_rate = (self.sell1_results_100['未来100笔价格下跌'] > 0).sum() / len(self.sell1_results_100) * 100
success_types.append('卖1量\n下跌概率')
success_rates.append(sell_decline_rate)
success_colors.append('lightcoral')
bars = ax3.bar(success_types, success_rates, color=success_colors, alpha=0.8, edgecolor='black')
ax3.set_ylabel('下跌概率 (%)', fontsize=12)
ax3.set_title('价格下跌概率对比', fontsize=14, fontweight='bold')
ax3.set_ylim(0, 105)
ax3.grid(True, alpha=0.3, axis='y')
# 添加数值标签
for bar, rate in zip(bars, success_rates):
height = bar.get_height()
ax3.text(bar.get_x() + bar.get_width()/2., height - 3,
f'{rate:.1f}%', ha='center', va='top', fontsize=11, fontweight='bold')
# 4. 散点图对比 (右下第二个)
ax4 = fig.add_subplot(gs[0, 3])
if len(self.buy1_results_100) > 0:
ax4.scatter(self.buy1_results_100['买1量'], self.buy1_results_100['未来100笔价格范围'],
alpha=0.7, color='green', label='买1量', s=50)
if len(self.sell1_results_100) > 0:
ax4.scatter(self.sell1_results_100['卖1量'], self.sell1_results_100['未来100笔价格范围'],
alpha=0.7, color='red', label='卖1量', s=50)
ax4.set_xlabel('挂单量 (手)', fontsize=12)
ax4.set_ylabel('价格范围 (元)', fontsize=12)
ax4.set_title('挂单量 vs 价格波动', fontsize=14, fontweight='bold')
ax4.legend()
ax4.grid(True, alpha=0.3)
# 5-6. 100笔大单价格变化幅度柱状图 (第二行)
# 买1量100笔价格变化幅度 (第二行左半部分)
if len(self.buy1_results_100) > 0:
ax5 = fig.add_subplot(gs[1, 0])
sample_count = min(25, len(self.buy1_results_100)) # 显示前25个样本
sample_indices = range(1, sample_count + 1)
# 买1量100笔价格净变化
buy1_changes_100 = self.buy1_results_100['未来100笔净变化'].iloc[:sample_count].values
# 为柱子分配颜色:正值为绿色,负值为红色
buy1_colors_100 = ['lightgreen' if x >= 0 else 'lightcoral' for x in buy1_changes_100]
buy1_bars_100 = ax5.bar(sample_indices, buy1_changes_100,
width=0.6, alpha=0.8, color=buy1_colors_100,
label='买1量100笔价格变化(元)', edgecolor='black', linewidth=0.5)
# 添加零线
ax5.axhline(y=0, color='black', linestyle='-', linewidth=1, alpha=0.7)
ax5.set_xlabel('样本序号', fontsize=12)
ax5.set_ylabel('价格变化幅度 (元)', fontsize=12)
ax5.set_title('买1量大单 - 100笔价格变化', fontsize=14, fontweight='bold')
ax5.grid(True, alpha=0.3, axis='y')
# 添加数值标签
for i, change in enumerate(buy1_changes_100):
y_pos = change + 0.01 if change >= 0 else change - 0.01
ax5.text(i + 1, y_pos, f'{change:+.2f}',
ha='center', va='bottom' if change >= 0 else 'top',
fontsize=9, rotation=45)
# 卖1量100笔价格变化幅度 (第二行右半部分)
if len(self.sell1_results_100) > 0:
ax6 = fig.add_subplot(gs[1, 1])
sample_count = min(25, len(self.sell1_results_100)) # 显示前25个样本
sample_indices = range(1, sample_count + 1)
# 卖1量100笔价格净变化
sell1_changes_100 = self.sell1_results_100['未来100笔净变化'].iloc[:sample_count].values
# 为柱子分配颜色:正值为绿色,负值为红色
sell1_colors_100 = ['lightgreen' if x >= 0 else 'lightcoral' for x in sell1_changes_100]
sell1_bars_100 = ax6.bar(sample_indices, sell1_changes_100,
width=0.6, alpha=0.8, color=sell1_colors_100,
label='卖1量100笔价格变化(元)', edgecolor='black', linewidth=0.5)
# 添加零线
ax6.axhline(y=0, color='black', linestyle='-', linewidth=1, alpha=0.7)
ax6.set_xlabel('样本序号', fontsize=12)
ax6.set_ylabel('价格变化幅度 (元)', fontsize=12)
ax6.set_title('卖1量大单 - 100笔价格变化', fontsize=14, fontweight='bold')
ax6.grid(True, alpha=0.3, axis='y')
# 添加数值标签
for i, change in enumerate(sell1_changes_100):
y_pos = change + 0.01 if change >= 0 else change - 0.01
ax6.text(i + 1, y_pos, f'{change:+.2f}',
ha='center', va='bottom' if change >= 0 else 'top',
fontsize=9, rotation=45)
# 7-8. 200笔大单价格变化幅度柱状图 (第二行第三、四列)
# 买1量200笔价格变化幅度 (第二行第三个)
if len(self.buy1_results_200) > 0:
ax7 = fig.add_subplot(gs[1, 2])
sample_count = min(25, len(self.buy1_results_200)) # 显示前25个样本
sample_indices = range(1, sample_count + 1)
# 买1量200笔价格净变化
buy1_changes_200 = self.buy1_results_200['未来200笔净变化'].iloc[:sample_count].values
# 为柱子分配颜色:正值为绿色,负值为红色
buy1_colors_200 = ['darkgreen' if x >= 0 else 'darkred' for x in buy1_changes_200]
buy1_bars_200 = ax7.bar(sample_indices, buy1_changes_200,
width=0.6, alpha=0.8, color=buy1_colors_200,
label='买1量200笔价格变化(元)', edgecolor='black', linewidth=0.5)
# 添加零线
ax7.axhline(y=0, color='black', linestyle='-', linewidth=1, alpha=0.7)
ax7.set_xlabel('样本序号', fontsize=12)
ax7.set_ylabel('价格变化幅度 (元)', fontsize=12)
ax7.set_title('买1量大单 - 200笔价格变化', fontsize=14, fontweight='bold')
ax7.grid(True, alpha=0.3, axis='y')
# 添加数值标签
for i, change in enumerate(buy1_changes_200):
y_pos = change + 0.01 if change >= 0 else change - 0.01
ax7.text(i + 1, y_pos, f'{change:+.2f}',
ha='center', va='bottom' if change >= 0 else 'top',
fontsize=9, rotation=45)
# 卖1量200笔价格变化幅度 (第二行第四个)
if len(self.sell1_results_200) > 0:
ax8 = fig.add_subplot(gs[1, 3])
sample_count = min(25, len(self.sell1_results_200)) # 显示前25个样本
sample_indices = range(1, sample_count + 1)
# 卖1量200笔价格净变化
sell1_changes_200 = self.sell1_results_200['未来200笔净变化'].iloc[:sample_count].values
# 为柱子分配颜色:正值为绿色,负值为红色
sell1_colors_200 = ['darkgreen' if x >= 0 else 'darkred' for x in sell1_changes_200]
sell1_bars_200 = ax8.bar(sample_indices, sell1_changes_200,
width=0.6, alpha=0.8, color=sell1_colors_200,
label='卖1量200笔价格变化(元)', edgecolor='black', linewidth=0.5)
# 添加零线
ax8.axhline(y=0, color='black', linestyle='-', linewidth=1, alpha=0.7)
ax8.set_xlabel('样本序号', fontsize=12)
ax8.set_ylabel('价格变化幅度 (元)', fontsize=12)
ax8.set_title('卖1量大单 - 200笔价格变化', fontsize=14, fontweight='bold')
ax8.grid(True, alpha=0.3, axis='y')
# 添加数值标签
for i, change in enumerate(sell1_changes_200):
y_pos = change + 0.01 if change >= 0 else change - 0.01
ax8.text(i + 1, y_pos, f'{change:+.2f}',
ha='center', va='bottom' if change >= 0 else 'top',
fontsize=9, rotation=45)
# 9-12. 箱线图对比 (第三行)
# 买1量箱线图 (第三行左半部分)
if len(self.buy1_results_100) > 0:
ax7 = fig.add_subplot(gs[2, :2])
buy_data_to_plot = []
buy_labels = []
if len(self.buy1_results_100) > 0:
buy_data_to_plot.append(self.buy1_results_100['未来100笔价格范围'])
buy_labels.append('100笔价格范围')
if len(self.buy1_results_200) > 0:
buy_data_to_plot.append(self.buy1_results_200['未来200笔价格范围'])
buy_labels.append('200笔价格范围')
if len(self.buy1_results_100) > 0:
buy_data_to_plot.append(self.buy1_results_100['未来100笔净变化%'] * 100)
buy_labels.append('100笔涨幅%(×10)')
if len(self.buy1_results_200) > 0:
buy_data_to_plot.append(self.buy1_results_200['未来200笔净变化%'] * 100)
buy_labels.append('200笔涨幅%(×10)')
bp = ax7.boxplot(buy_data_to_plot, labels=buy_labels, patch_artist=True)
colors = ['lightgreen', 'darkgreen', 'lightblue', 'blue']
for patch, color in zip(bp['boxes'], colors[:len(bp['boxes'])]):
patch.set_facecolor(color)
ax7.set_ylabel('数值', fontsize=12)
ax7.set_title('买1量大单 - 分布特征', fontsize=14, fontweight='bold')
ax7.grid(True, alpha=0.3)
# 卖1量箱线图 (第三行右半部分)
if len(self.sell1_results_100) > 0:
ax8 = fig.add_subplot(gs[2, 2:])
sell_data_to_plot = []
sell_labels = []
if len(self.sell1_results_100) > 0:
sell_data_to_plot.append(self.sell1_results_100['未来100笔价格范围'])
sell_labels.append('100笔价格范围')
if len(self.sell1_results_200) > 0:
sell_data_to_plot.append(self.sell1_results_200['未来200笔价格范围'])
sell_labels.append('200笔价格范围')
if len(self.sell1_results_100) > 0:
sell_data_to_plot.append(self.sell1_results_100['未来100笔净变化%'] * 100)
sell_labels.append('100笔跌幅%(×10)')
if len(self.sell1_results_200) > 0:
sell_data_to_plot.append(self.sell1_results_200['未来200笔净变化%'] * 100)
sell_labels.append('200笔跌幅%(×10)')
bp = ax8.boxplot(sell_data_to_plot, labels=sell_labels, patch_artist=True)
colors = ['lightcoral', 'darkred', 'lightpink', 'red']
for patch, color in zip(bp['boxes'], colors[:len(bp['boxes'])]):
patch.set_facecolor(color)
ax8.set_ylabel('数值', fontsize=12)
ax8.set_title('卖1量大单 - 分布特征', fontsize=14, fontweight='bold')
ax8.grid(True, alpha=0.3)
# 13. 综合摘要 (第四行)
ax9 = fig.add_subplot(gs[3, :])
ax9.axis('off')
# 创建综合摘要文本
summary_text = self._generate_summary_text()
# 显示摘要文本
ax9.text(0.05, 0.95, summary_text, transform=ax9.transAxes,
fontsize=10, ha='left', va='top',
bbox=dict(boxstyle='round,pad=0.5', facecolor='lightyellow', alpha=0.3))
# 设置总标题
fig.suptitle('AU2512期货大单综合分析报告 (买1量 vs 卖1量)', fontsize=22, fontweight='bold', y=0.98)
# 调整布局
plt.tight_layout()
# 保存图表
chart_file = os.path.join(self.output_dir, 'large_orders_comprehensive_analysis.png')
plt.savefig(chart_file, dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none')
plt.close()
print(f"综合分析图表已保存: {chart_file}")
return True
except Exception as e:
print(f"创建综合图表时发生错误: {e}")
return False
def _generate_summary_text(self):
"""生成综合摘要文本"""
text_parts = ["大单综合分析摘要\n"]
# 基础统计
text_parts.append("=" * 50)
text_parts.append("样本统计:")
if len(self.buy1_large_orders) > 0:
buy1_col = None
for col in self.df.columns:
if '买1量' in col:
buy1_col = col
break
text_parts.append(f"• 买1量大单: {len(self.buy1_large_orders)} 个 ({len(self.buy1_large_orders)/len(self.df)*100:.2f}%)")
text_parts.append(f" 买1量范围: {self.buy1_large_orders[buy1_col].min()}-{self.buy1_large_orders[buy1_col].max()}")
if len(self.sell1_large_orders) > 0:
sell1_col = None
for col in self.df.columns:
if '卖1量' in col:
sell1_col = col
break
text_parts.append(f"• 卖1量大单: {len(self.sell1_large_orders)} 个 ({len(self.sell1_large_orders)/len(self.df)*100:.2f}%)")
text_parts.append(f" 卖1量范围: {self.sell1_large_orders[sell1_col].min()}-{self.sell1_large_orders[sell1_col].max()}")
# 表现对比
text_parts.append("\n" + "=" * 50)
text_parts.append("表现对比:")
if len(self.buy1_results_100) > 0:
buy_avg_range = self.buy1_results_100['未来100笔价格范围'].mean()
buy_avg_change = self.buy1_results_100['未来100笔最大涨幅%'].mean()
buy_success_rate = (self.buy1_results_100['未来100笔最大涨幅'] > 0).sum() / len(self.buy1_results_100) * 100
text_parts.append(f"• 买1量大单后:")
text_parts.append(f" - 平均价格波动: {buy_avg_range:.3f}")
text_parts.append(f" - 平均涨幅: {buy_avg_change:+.3f}%")
text_parts.append(f" - 上涨概率: {buy_success_rate:.1f}%")
if len(self.sell1_results_100) > 0:
sell_avg_range = self.sell1_results_100['未来100笔价格范围'].mean()
sell_avg_change = self.sell1_results_100['未来100笔价格变化%'].mean()
sell_success_rate = (self.sell1_results_100['未来100笔价格变化'] < 0).sum() / len(self.sell1_results_100) * 100
text_parts.append(f"• 卖1量大单后:")
text_parts.append(f" - 平均价格波动: {sell_avg_range:.3f}")
text_parts.append(f" - 平均变化: {sell_avg_change:+.3f}%")
text_parts.append(f" - 下跌概率: {sell_success_rate:.1f}%")
# 市场洞察
text_parts.append("\n" + "=" * 50)
text_parts.append("市场洞察:")
# 判断市场特征
if len(self.buy1_results_100) > 0 and len(self.sell1_results_100) > 0:
buy_success = (self.buy1_results_100['未来100笔最大涨幅'] > 0).sum() / len(self.buy1_results_100)
sell_decline = (self.sell1_results_100['未来100笔价格变化'] < 0).sum() / len(self.sell1_results_100)
if buy_success > 0.9 and sell_decline < 0.3:
text_parts.append("• 市场特征: 强势上涨市场,买盘力量占主导")
elif buy_success > 0.7 and sell_decline > 0.7:
text_parts.append("• 市场特征: 震荡市场,多空力量均衡")
else:
text_parts.append("• 市场特征: 趋势不明,需进一步观察")
# 交易建议
text_parts.append("\n" + "=" * 50)
text_parts.append("交易建议:")
text_parts.append("• 大单信号具有较高的预测价值")
text_parts.append("• 买1量大单可作为价格上涨的参考信号")
text_parts.append("• 在强势市场中卖1量大单可能被完全吸收")
text_parts.append("• 建议结合其他技术指标综合判断")
return "\n".join(text_parts)
def save_results(self):
"""保存分析结果"""
try:
# 保存买1量详细结果
if len(self.buy1_results_100) > 0:
self.buy1_results_100.to_csv(os.path.join(self.output_dir, 'buy1_large_orders_100_analysis.csv'),
index=False, encoding='utf-8-sig')
self.buy1_results_200.to_csv(os.path.join(self.output_dir, 'buy1_large_orders_200_analysis.csv'),
index=False, encoding='utf-8-sig')
# 保存卖1量详细结果
if len(self.sell1_results_100) > 0:
self.sell1_results_100.to_csv(os.path.join(self.output_dir, 'sell1_large_orders_100_analysis.csv'),
index=False, encoding='utf-8-sig')
self.sell1_results_200.to_csv(os.path.join(self.output_dir, 'sell1_large_orders_200_analysis.csv'),
index=False, encoding='utf-8-sig')
# 保存汇总结果
summary_data = {
'指标': [],
'数值': []
}
if len(self.buy1_large_orders) > 0:
summary_data['指标'].extend([
'买1量>99的样本数',
'买1量100笔平均价格范围',
'买1量200笔平均价格范围',
'买1量100笔平均净变化%',
'买1量100笔下跌概率%'
])
summary_data['数值'].extend([
len(self.buy1_large_orders),
f"{self.buy1_results_100['未来100笔价格范围'].mean():.2f}",
f"{self.buy1_results_200['未来200笔价格范围'].mean():.2f}",
f"{self.buy1_results_100['未来100笔净变化%'].mean():+.2f}%",
f"{(self.buy1_results_100['未来100笔价格下跌'] > 0).sum()/len(self.buy1_results_100)*100:.1f}%"
])
if len(self.sell1_large_orders) > 0:
summary_data['指标'].extend([
'卖1量>99的样本数',
'卖1量100笔平均价格范围',
'卖1量200笔平均价格范围',
'卖1量100笔平均净变化%',
'卖1量100笔下跌概率%'
])
summary_data['数值'].extend([
len(self.sell1_large_orders),
f"{self.sell1_results_100['未来100笔价格范围'].mean():.2f}",
f"{self.sell1_results_200['未来200笔价格范围'].mean():.2f}",
f"{self.sell1_results_100['未来100笔净变化%'].mean():+.2f}%",
f"{(self.sell1_results_100['未来100笔价格下跌'] > 0).sum()/len(self.sell1_results_100)*100:.1f}%"
])
summary_df = pd.DataFrame(summary_data)
summary_df.to_csv(os.path.join(self.output_dir, 'large_orders_summary.csv'),
index=False, encoding='utf-8-sig')
print(f"\n分析结果已保存到 {self.output_dir} 目录:")
if len(self.buy1_results_100) > 0:
print(f" buy1_large_orders_100_analysis.csv - 买1量100笔详细分析")
print(f" buy1_large_orders_200_analysis.csv - 买1量200笔详细分析")
if len(self.sell1_results_100) > 0:
print(f" sell1_large_orders_100_analysis.csv - 卖1量100笔详细分析")
print(f" sell1_large_orders_200_analysis.csv - 卖1量200笔详细分析")
print(f" large_orders_summary.csv - 综合汇总分析")
# 创建综合可视化图表
self.create_comprehensive_chart()
except Exception as e:
print(f"保存结果时发生错误: {e}")
def run_analysis(self, threshold=99):
"""运行完整分析"""
print(f"开始AU2512期货大单综合分析 (阈值: {threshold})...")
# 加载数据
if not self.load_data():
return False
# 分析买1量大单
buy1_success = self.analyze_large_buy1_orders(threshold)
# 分析卖1量大单
sell1_success = self.analyze_large_sell1_orders(threshold)
if not buy1_success and not sell1_success:
print("未找到任何符合条件的记录")
return False
# 计算后续价格范围
self.calculate_future_price_ranges()
# 打印分析结果
self.print_analysis_results()
# 保存结果
self.save_results()
return True
def main():
"""主函数"""
parser = argparse.ArgumentParser(description='AU2512期货大单综合分析工具')
parser.add_argument('data_file', nargs='?',
default='data/au2512_20251013.parquet',
help='数据文件路径 (默认: data/au2512_20251013.parquet)')
parser.add_argument('--threshold', '-t', type=int, default=99,
help='买卖单量阈值 (默认: 99)')
parser.add_argument('--output-dir', '-o', default='large_orders',
help='输出目录 (默认: large_orders)')
args = parser.parse_args()
# 检查数据文件是否存在
if not os.path.exists(args.data_file):
print(f"错误: 数据文件不存在 - {args.data_file}")
print("\n使用方法:")
print(" python analyze_large_orders.py [数据文件路径] [--threshold 阈值] [--output-dir 输出目录]")
print(f" python analyze_large_orders.py # 使用默认参数")
sys.exit(1)
# 创建分析器并运行
analyzer = LargeOrdersAnalyzer(args.data_file, args.output_dir)
success = analyzer.run_analysis(args.threshold)
if success:
print(f"\n分析完成!结果已保存到: {args.output_dir}")
else:
print(f"\n分析失败!")
sys.exit(1)
if __name__ == "__main__":
main()