#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ AU2512期货大单分析工具 (买1量和卖1量整合分析) 这个脚本专门分析买1量和卖1量大于阈值的数据,并计算这些时点后100和200个成交笔数内的价格范围。 买1量大通常表示存在大量买盘挂单,可能是价格上涨的信号。 卖1量大通常表示存在大量卖盘挂单,可能是价格下跌的信号。 使用方法: python analyze_large_orders.py [data_file] 如果不指定文件,默认分析 data/au2512_20251013.parquet 输出目录: large_orders/ """ import pandas as pd import numpy as np import matplotlib.pyplot as plt import warnings import os import sys import argparse from pathlib import Path warnings.filterwarnings('ignore') class LargeOrdersAnalyzer: """大单分析器 (买1量 + 卖1量)""" def __init__(self, data_file=None, output_dir="large_orders"): """ 初始化分析器 Args: data_file (str): 数据文件路径,默认为 data/au2512_20251013.parquet output_dir (str): 输出目录,默认为 large_orders """ self.data_file = data_file or "data/au2512_20251013.parquet" self.output_dir = output_dir # 确保输出目录存在 os.makedirs(self.output_dir, exist_ok=True) self.df = None self.buy1_large_orders = None self.sell1_large_orders = None self.buy1_results_100 = None self.buy1_results_200 = None self.sell1_results_100 = None self.sell1_results_200 = None self.setup_chinese_font() def setup_chinese_font(self): """设置中文字体支持""" try: # 尝试不同的中文字体 chinese_fonts = ['Microsoft YaHei', 'SimHei', 'SimSun', 'KaiTi', 'FangSong'] for font in chinese_fonts: try: plt.rcParams['font.sans-serif'] = [font] plt.rcParams['axes.unicode_minus'] = False # 测试字体是否可用 fig, ax = plt.subplots(figsize=(1, 1)) ax.text(0.5, 0.5, '测试', fontsize=12) plt.close(fig) print(f"使用中文字体: {font}") return except: continue # 如果都不行,使用默认字体 plt.rcParams['font.sans-serif'] = ['DejaVu Sans'] plt.rcParams['axes.unicode_minus'] = False print("警告: 无法加载中文字体,使用默认字体") except Exception as e: print(f"字体设置警告: {e}") plt.rcParams['font.sans-serif'] = ['DejaVu Sans'] plt.rcParams['axes.unicode_minus'] = False def load_data(self): """加载数据""" try: if not os.path.exists(self.data_file): raise FileNotFoundError(f"数据文件不存在: {self.data_file}") print(f"正在加载数据: {self.data_file}") # 根据文件扩展名选择读取方式 if self.data_file.endswith('.parquet'): self.df = pd.read_parquet(self.data_file) elif self.data_file.endswith('.csv'): self.df = pd.read_csv(self.data_file) else: raise ValueError("不支持的文件格式,请使用 .parquet 或 .csv 文件") print(f"数据加载成功: {len(self.df):,} 条记录") # 确保数据按数列号排序 if '数列号' in self.df.columns: self.df = self.df.sort_values('数列号').reset_index(drop=True) print("数据已按数列号排序") else: print("警告: 未找到数列号列,使用原始顺序") return True except Exception as e: print(f"数据加载失败: {e}") return False def analyze_large_buy1_orders(self, threshold=99): """分析买1量大于阈值的数据""" print(f"\n=== 分析买1量 > {threshold} 的数据 ===") # 找到买1量列 buy1_col = None for col in self.df.columns: if '买1量' in col: buy1_col = col break if buy1_col is None: raise ValueError("未找到买1量列") print(f"使用买1量列: {buy1_col}") # 筛选买1量大于阈值的数据 large_buy1_mask = self.df[buy1_col] > threshold self.buy1_large_orders = self.df[large_buy1_mask].copy().reset_index(drop=True) print(f"买1量 > {threshold} 的记录数: {len(self.buy1_large_orders):,}") print(f"占总记录比例: {len(self.buy1_large_orders)/len(self.df)*100:.2f}%") if len(self.buy1_large_orders) == 0: print("未找到符合条件的买1量记录") return False # 显示买1量统计 print(f"\n买1量统计:") print(f" 最小值: {self.buy1_large_orders[buy1_col].min()}") print(f" 最大值: {self.buy1_large_orders[buy1_col].max()}") print(f" 平均值: {self.buy1_large_orders[buy1_col].mean():.1f}") print(f" 中位数: {self.buy1_large_orders[buy1_col].median()}") return True def analyze_large_sell1_orders(self, threshold=99): """分析卖1量大于阈值的数据""" print(f"\n=== 分析卖1量 > {threshold} 的数据 ===") # 找到卖1量列 sell1_col = None for col in self.df.columns: if '卖1量' in col: sell1_col = col break if sell1_col is None: raise ValueError("未找到卖1量列") print(f"使用卖1量列: {sell1_col}") # 筛选卖1量大于阈值的数据 large_sell1_mask = self.df[sell1_col] > threshold self.sell1_large_orders = self.df[large_sell1_mask].copy().reset_index(drop=True) print(f"卖1量 > {threshold} 的记录数: {len(self.sell1_large_orders):,}") print(f"占总记录比例: {len(self.sell1_large_orders)/len(self.df)*100:.2f}%") if len(self.sell1_large_orders) == 0: print("未找到符合条件的卖1量记录") return False # 显示卖1量统计 print(f"\n卖1量统计:") print(f" 最小值: {self.sell1_large_orders[sell1_col].min()}") print(f" 最大值: {self.sell1_large_orders[sell1_col].max()}") print(f" 平均值: {self.sell1_large_orders[sell1_col].mean():.1f}") print(f" 中位数: {self.sell1_large_orders[sell1_col].median()}") return True def calculate_future_price_ranges(self): """计算后续成交价范围 (买1量和卖1量)""" print(f"\n=== 计算后续成交价范围 ===") # 买1量分析 if len(self.buy1_large_orders) > 0: self._calculate_buy1_ranges() # 卖1量分析 if len(self.sell1_large_orders) > 0: self._calculate_sell1_ranges() return True def _calculate_buy1_ranges(self): """计算买1量的后续价格范围""" buy1_col = None for col in self.df.columns: if '买1量' in col: buy1_col = col break buy1_results_100 = [] buy1_results_200 = [] for idx, row in self.buy1_large_orders.iterrows(): current_seq_num = row['数列号'] if '数列号' in row else idx current_price = row['成交价'] buy1_volume = row[buy1_col] # 100笔分析 future_mask_100 = self.df['数列号'] >= current_seq_num + 1 future_data_100 = self.df[future_mask_100].head(100) if len(future_data_100) > 0: min_price_100 = future_data_100['成交价'].min() max_price_100 = future_data_100['成交价'].max() price_range_100 = max_price_100 - min_price_100 # 正确计算价格变化(相对于当前价格的最低价变化) price_decline_100 = current_price - min_price_100 # 下跌幅度(正数表示下跌) price_rise_100 = max_price_100 - current_price # 上涨幅度(正数表示上涨) # 计算净变化(最终相对于起始的变化) final_price_100 = future_data_100['成交价'].iloc[-1] net_change_100 = final_price_100 - current_price net_change_pct_100 = (net_change_100 / current_price) * 100 buy1_results_100.append({ '数列号': current_seq_num, '成交价': current_price, '买1量': buy1_volume, '未来100笔最小价': min_price_100, '未来100笔最大价': max_price_100, '未来100笔最终价': final_price_100, '未来100笔价格范围': price_range_100, '未来100笔价格下跌': price_decline_100, '未来100笔价格上涨': price_rise_100, '未来100笔净变化': net_change_100, '未来100笔净变化%': net_change_pct_100, '实际样本数': len(future_data_100) }) # 200笔分析 future_mask_200 = self.df['数列号'] >= current_seq_num + 1 future_data_200 = self.df[future_mask_200].head(200) if len(future_data_200) > 0: min_price_200 = future_data_200['成交价'].min() max_price_200 = future_data_200['成交价'].max() price_range_200 = max_price_200 - min_price_200 # 正确计算价格变化 price_decline_200 = current_price - min_price_200 # 下跌幅度 price_rise_200 = max_price_200 - current_price # 上涨幅度 # 计算净变化 final_price_200 = future_data_200['成交价'].iloc[-1] net_change_200 = final_price_200 - current_price net_change_pct_200 = (net_change_200 / current_price) * 100 buy1_results_200.append({ '数列号': current_seq_num, '成交价': current_price, '买1量': buy1_volume, '未来200笔最小价': min_price_200, '未来200笔最大价': max_price_200, '未来200笔最终价': final_price_200, '未来200笔价格范围': price_range_200, '未来200笔价格下跌': price_decline_200, '未来200笔价格上涨': price_rise_200, '未来200笔净变化': net_change_200, '未来200笔净变化%': net_change_pct_200, '实际样本数': len(future_data_200) }) self.buy1_results_100 = pd.DataFrame(buy1_results_100) self.buy1_results_200 = pd.DataFrame(buy1_results_200) print(f"买1量分析完成: 100笔 {len(self.buy1_results_100)} 条, 200笔 {len(self.buy1_results_200)} 条") def _calculate_sell1_ranges(self): """计算卖1量的后续价格范围""" sell1_col = None for col in self.df.columns: if '卖1量' in col: sell1_col = col break sell1_results_100 = [] sell1_results_200 = [] for idx, row in self.sell1_large_orders.iterrows(): current_seq_num = row['数列号'] if '数列号' in row else idx current_price = row['成交价'] sell1_volume = row[sell1_col] # 100笔分析 future_mask_100 = self.df['数列号'] >= current_seq_num + 1 future_data_100 = self.df[future_mask_100].head(100) if len(future_data_100) > 0: min_price_100 = future_data_100['成交价'].min() max_price_100 = future_data_100['成交价'].max() price_range_100 = max_price_100 - min_price_100 # 计算价格变化(相对于当前价格) price_decline_100 = current_price - min_price_100 # 下跌幅度 price_rise_100 = max_price_100 - current_price # 上涨幅度 # 计算净变化(最终相对于起始的变化) final_price_100 = future_data_100['成交价'].iloc[-1] net_change_100 = final_price_100 - current_price net_change_pct_100 = (net_change_100 / current_price) * 100 sell1_results_100.append({ '数列号': current_seq_num, '成交价': current_price, '卖1量': sell1_volume, '未来100笔最小价': min_price_100, '未来100笔最大价': max_price_100, '未来100笔最终价': final_price_100, '未来100笔价格范围': price_range_100, '未来100笔价格下跌': price_decline_100, '未来100笔价格上涨': price_rise_100, '未来100笔净变化': net_change_100, '未来100笔净变化%': net_change_pct_100, '实际样本数': len(future_data_100) }) # 200笔分析 future_mask_200 = self.df['数列号'] >= current_seq_num + 1 future_data_200 = self.df[future_mask_200].head(200) if len(future_data_200) > 0: min_price_200 = future_data_200['成交价'].min() max_price_200 = future_data_200['成交价'].max() price_range_200 = max_price_200 - min_price_200 # 计算价格变化 price_decline_200 = current_price - min_price_200 # 下跌幅度 price_rise_200 = max_price_200 - current_price # 上涨幅度 # 计算净变化 final_price_200 = future_data_200['成交价'].iloc[-1] net_change_200 = final_price_200 - current_price net_change_pct_200 = (net_change_200 / current_price) * 100 sell1_results_200.append({ '数列号': current_seq_num, '成交价': current_price, '卖1量': sell1_volume, '未来200笔最小价': min_price_200, '未来200笔最大价': max_price_200, '未来200笔最终价': final_price_200, '未来200笔价格范围': price_range_200, '未来200笔价格下跌': price_decline_200, '未来200笔价格上涨': price_rise_200, '未来200笔净变化': net_change_200, '未来200笔净变化%': net_change_pct_200, '实际样本数': len(future_data_200) }) self.sell1_results_100 = pd.DataFrame(sell1_results_100) self.sell1_results_200 = pd.DataFrame(sell1_results_200) print(f"卖1量分析完成: 100笔 {len(self.sell1_results_100)} 条, 200笔 {len(self.sell1_results_200)} 条") def print_analysis_results(self): """打印分析结果""" print(f"\n" + "="*90) print("AU2512期货大单分析结果 (买1量 vs 卖1量)") print("="*90) # 对比表 print(f"\n【大单数据对比】") print(f"{'类型':>8} {'样本数':>8} {'占比':>8} {'成交量范围':>12} {'平均成交量':>12}") print("-" * 60) if len(self.buy1_large_orders) > 0: buy1_col = None for col in self.df.columns: if '买1量' in col: buy1_col = col break buy_vol_range = f"{self.buy1_large_orders[buy1_col].min()}-{self.buy1_large_orders[buy1_col].max()}" buy_avg_vol = f"{self.buy1_large_orders[buy1_col].mean():.1f}" print(f"{'买1量':>8} {len(self.buy1_large_orders):>8} {len(self.buy1_large_orders)/len(self.df)*100:>7.2f}% {buy_vol_range:>12} {buy_avg_vol:>12}") if len(self.sell1_large_orders) > 0: sell1_col = None for col in self.df.columns: if '卖1量' in col: sell1_col = col break sell_vol_range = f"{self.sell1_large_orders[sell1_col].min()}-{self.sell1_large_orders[sell1_col].max()}" sell_avg_vol = f"{self.sell1_large_orders[sell1_col].mean():.1f}" print(f"{'卖1量':>8} {len(self.sell1_large_orders):>8} {len(self.sell1_large_orders)/len(self.df)*100:>7.2f}% {sell_vol_range:>12} {sell_avg_vol:>12}") # 100笔分析对比 print(f"\n【100笔后续表现对比】") print(f"{'类型':>8} {'价格范围':>10} {'趋势变化':>10} {'成功率':>8} {'平均幅度%':>12}") print("-" * 60) if len(self.buy1_results_100) > 0: buy_avg_range = self.buy1_results_100['未来100笔价格范围'].mean() buy_avg_net_change = self.buy1_results_100['未来100笔净变化%'].mean() buy_decline_rate = (self.buy1_results_100['未来100笔价格下跌'] > 0).sum() / len(self.buy1_results_100) * 100 trend_buy = "下跌" if buy_avg_net_change < 0 else "上涨" print(f"{'买1量':>8} {buy_avg_range:>10.2f} {trend_buy:>10} {buy_decline_rate:>7.1f}% {buy_avg_net_change:+>11.3f}%") if len(self.sell1_results_100) > 0: sell_avg_range = self.sell1_results_100['未来100笔价格范围'].mean() sell_avg_net_change = self.sell1_results_100['未来100笔净变化%'].mean() sell_decline_rate = (self.sell1_results_100['未来100笔价格下跌'] > 0).sum() / len(self.sell1_results_100) * 100 trend_sell = "下跌" if sell_avg_net_change < 0 else "上涨" print(f"{'卖1量':>8} {sell_avg_range:>10.2f} {trend_sell:>10} {sell_decline_rate:>7.1f}% {sell_avg_net_change:+>11.3f}%") # 200笔分析对比 print(f"\n【200笔后续表现对比】") print(f"{'类型':>8} {'价格范围':>10} {'趋势变化':>10} {'成功率':>8} {'平均幅度%':>12}") print("-" * 60) if len(self.buy1_results_200) > 0: buy_avg_range_200 = self.buy1_results_200['未来200笔价格范围'].mean() buy_avg_change_200 = self.buy1_results_200['未来200笔净变化%'].mean() buy_decline_rate_200 = (self.buy1_results_200['未来200笔价格下跌'] > 0).sum() / len(self.buy1_results_200) * 100 trend_buy_200 = "下跌" if buy_avg_change_200 < 0 else "上涨" print(f"{'买1量':>8} {buy_avg_range_200:>10.2f} {trend_buy_200:>10} {buy_decline_rate_200:>7.1f}% {buy_avg_change_200:+>11.3f}%") if len(self.sell1_results_200) > 0: sell_avg_range_200 = self.sell1_results_200['未来200笔价格范围'].mean() sell_avg_change_200 = self.sell1_results_200['未来200笔净变化%'].mean() sell_decline_rate_200 = (self.sell1_results_200['未来200笔价格下跌'] > 0).sum() / len(self.sell1_results_200) * 100 trend_sell_200 = "下跌" if sell_avg_change_200 < 0 else "上涨" print(f"{'卖1量':>8} {sell_avg_range_200:>10.2f} {trend_sell_200:>10} {sell_decline_rate_200:>7.1f}% {sell_avg_change_200:+>11.3f}%") # 详细样本展示 print(f"\n【详细分析 - 各类型前5个样本】") print("\n买1量大单样本:") if len(self.buy1_results_100) > 0: print(f"{'序号':>4} {'数列号':>8} {'成交价':>8} {'买1量':>8} {'100笔范围':>10} {'100笔涨幅%':>12}") print("-" * 60) for i in range(min(5, len(self.buy1_results_100))): row = self.buy1_results_100.iloc[i] print(f"{i+1:>4} {row['数列号']:>8.0f} {row['成交价']:>8.2f} {row['买1量']:>8.0f} " f"{row['未来100笔价格范围']:>10.2f} {row['未来100笔净变化%']:+>12.3f}%") print("\n卖1量大单样本:") if len(self.sell1_results_100) > 0: print(f"{'序号':>4} {'数列号':>8} {'成交价':>8} {'卖1量':>8} {'100笔范围':>10} {'100笔变化%':>12}") print("-" * 60) for i in range(min(5, len(self.sell1_results_100))): row = self.sell1_results_100.iloc[i] print(f"{i+1:>4} {row['数列号']:>8.0f} {row['成交价']:>8.2f} {row['卖1量']:>8.0f} " f"{row['未来100笔价格范围']:>10.2f} {row['未来100笔净变化%']:+>12.3f}%") # 业务洞察 print(f"\n【业务洞察】") print(f"1. 大单样本数量对比:") if len(self.buy1_large_orders) > 0 and len(self.sell1_large_orders) > 0: print(f" - 买1量大单: {len(self.buy1_large_orders)} 个 ({len(self.buy1_large_orders)/len(self.df)*100:.2f}%)") print(f" - 卖1量大单: {len(self.sell1_large_orders)} 个 ({len(self.sell1_large_orders)/len(self.df)*100:.2f}%)") ratio = len(self.buy1_large_orders) / len(self.sell1_large_orders) print(f" - 买卖单比例: {ratio:.1f}:1 (买单占{'主导' if ratio > 1 else '劣势'})") print(f"2. 后续价格波动分析:") if len(self.buy1_results_100) > 0 and len(self.sell1_results_100) > 0: buy_volatility = self.buy1_results_100['未来100笔价格范围'].mean() sell_volatility = self.sell1_results_100['未来100笔价格范围'].mean() print(f" - 买1量大单后平均波动: {buy_volatility:.2f}元") print(f" - 卖1量大单后平均波动: {sell_volatility:.2f}元") higher_volatility = "买1量" if buy_volatility > sell_volatility else "卖1量" print(f" - {higher_volatility}大单引发更大的价格波动") print(f"3. 趋势预测能力:") # 判断主要趋势方向 overall_trend = "强势上涨" if self._is_overall_uptrend() else "震荡整理" print(f" - 市场整体趋势: {overall_trend}") print(f" - 大单信号强度: {'极强' if self._is_signal_strong() else '中等' if self._is_signal_moderate() else '较弱'}") def _is_overall_uptrend(self): """判断是否为整体上涨趋势""" buy_positive = (self.buy1_results_100['未来100笔净变化'] > 0).sum() / len(self.buy1_results_100) if len(self.buy1_results_100) > 0 else 0 sell_positive = (self.sell1_results_100['未来100笔净变化'] > 0).sum() / len(self.sell1_results_100) if len(self.sell1_results_100) > 0 else 0 # 如果买1量大单和卖1量大单后都主要上涨,说明整体强势 return buy_positive > 0.5 and sell_positive > 0.5 def _is_signal_strong(self): """判断信号强度""" if len(self.buy1_results_100) > 0: buy_decline_rate = (self.buy1_results_100['未来100笔价格下跌'] > 0).sum() / len(self.buy1_results_100) # 如果下跌概率很高,说明信号强度强 return buy_decline_rate > 0.8 return False def _is_signal_moderate(self): """判断信号强度为中等""" if len(self.buy1_results_100) > 0: buy_decline_rate = (self.buy1_results_100['未来100笔价格下跌'] > 0).sum() / len(self.buy1_results_100) return 0.5 < buy_decline_rate <= 0.8 return False def create_comprehensive_chart(self): """创建综合对比图表""" print(f"\n=== 生成综合分析图表 ===") try: # 创建大型综合图表 fig = plt.figure(figsize=(24, 16)) gs = fig.add_gridspec(4, 4, hspace=0.3, wspace=0.3) # 1. 样本数量对比 (左上) ax1 = fig.add_subplot(gs[0, 0]) order_types = [] sample_counts = [] colors = [] if len(self.buy1_large_orders) > 0: order_types.append('买1量大单') sample_counts.append(len(self.buy1_large_orders)) colors.append('lightgreen') if len(self.sell1_large_orders) > 0: order_types.append('卖1量大单') sample_counts.append(len(self.sell1_large_orders)) colors.append('lightcoral') bars = ax1.bar(order_types, sample_counts, color=colors, alpha=0.8, edgecolor='black') ax1.set_ylabel('样本数量', fontsize=12) ax1.set_title('大单样本数量对比', fontsize=14, fontweight='bold') ax1.grid(True, alpha=0.3, axis='y') # 添加数值标签 for bar, count in zip(bars, sample_counts): height = bar.get_height() ax1.text(bar.get_x() + bar.get_width()/2., height + 1, f'{count}', ha='center', va='bottom', fontsize=12, fontweight='bold') # 2. 价格范围对比 (右上) ax2 = fig.add_subplot(gs[0, 1]) categories = [] buy_ranges = [] sell_ranges = [] if len(self.buy1_results_100) > 0: categories.append('100笔') buy_ranges.append(self.buy1_results_100['未来100笔价格范围'].mean()) if len(self.buy1_results_200) > 0: categories.append('200笔') buy_ranges.append(self.buy1_results_200['未来200笔价格范围'].mean()) x = np.arange(len(categories)) width = 0.35 if len(self.buy1_results_100) > 0: ax2.bar(x - width/2, buy_ranges, width, label='买1量', color='lightgreen', alpha=0.8) if len(self.sell1_results_100) > 0: sell_ranges = [] if len(self.sell1_results_100) > 0: sell_ranges.append(self.sell1_results_100['未来100笔价格范围'].mean()) if len(self.sell1_results_200) > 0: sell_ranges.append(self.sell1_results_200['未来200笔价格范围'].mean()) ax2.bar(x + width/2, sell_ranges, width, label='卖1量', color='lightcoral', alpha=0.8) ax2.set_xlabel('时间窗口', fontsize=12) ax2.set_ylabel('平均价格范围 (元)', fontsize=12) ax2.set_title('价格波动范围对比', fontsize=14, fontweight='bold') ax2.set_xticks(x) ax2.set_xticklabels(categories) ax2.legend() ax2.grid(True, alpha=0.3, axis='y') # 3. 成功率对比 (左下第二个) ax3 = fig.add_subplot(gs[0, 2]) success_types = [] success_rates = [] success_colors = [] if len(self.buy1_results_100) > 0: buy_decline_rate = (self.buy1_results_100['未来100笔价格下跌'] > 0).sum() / len(self.buy1_results_100) * 100 success_types.append('买1量\n下跌概率') success_rates.append(buy_decline_rate) success_colors.append('lightgreen') if len(self.sell1_results_100) > 0: sell_decline_rate = (self.sell1_results_100['未来100笔价格下跌'] > 0).sum() / len(self.sell1_results_100) * 100 success_types.append('卖1量\n下跌概率') success_rates.append(sell_decline_rate) success_colors.append('lightcoral') bars = ax3.bar(success_types, success_rates, color=success_colors, alpha=0.8, edgecolor='black') ax3.set_ylabel('下跌概率 (%)', fontsize=12) ax3.set_title('价格下跌概率对比', fontsize=14, fontweight='bold') ax3.set_ylim(0, 105) ax3.grid(True, alpha=0.3, axis='y') # 添加数值标签 for bar, rate in zip(bars, success_rates): height = bar.get_height() ax3.text(bar.get_x() + bar.get_width()/2., height - 3, f'{rate:.1f}%', ha='center', va='top', fontsize=11, fontweight='bold') # 4. 散点图对比 (右下第二个) ax4 = fig.add_subplot(gs[0, 3]) if len(self.buy1_results_100) > 0: ax4.scatter(self.buy1_results_100['买1量'], self.buy1_results_100['未来100笔价格范围'], alpha=0.7, color='green', label='买1量', s=50) if len(self.sell1_results_100) > 0: ax4.scatter(self.sell1_results_100['卖1量'], self.sell1_results_100['未来100笔价格范围'], alpha=0.7, color='red', label='卖1量', s=50) ax4.set_xlabel('挂单量 (手)', fontsize=12) ax4.set_ylabel('价格范围 (元)', fontsize=12) ax4.set_title('挂单量 vs 价格波动', fontsize=14, fontweight='bold') ax4.legend() ax4.grid(True, alpha=0.3) # 5-6. 100笔大单价格变化幅度柱状图 (第二行) # 买1量100笔价格变化幅度 (第二行左半部分) if len(self.buy1_results_100) > 0: ax5 = fig.add_subplot(gs[1, 0]) sample_count = min(25, len(self.buy1_results_100)) # 显示前25个样本 sample_indices = range(1, sample_count + 1) # 买1量100笔价格净变化(元) buy1_changes_100 = self.buy1_results_100['未来100笔净变化'].iloc[:sample_count].values # 为柱子分配颜色:正值为绿色,负值为红色 buy1_colors_100 = ['lightgreen' if x >= 0 else 'lightcoral' for x in buy1_changes_100] buy1_bars_100 = ax5.bar(sample_indices, buy1_changes_100, width=0.6, alpha=0.8, color=buy1_colors_100, label='买1量100笔价格变化(元)', edgecolor='black', linewidth=0.5) # 添加零线 ax5.axhline(y=0, color='black', linestyle='-', linewidth=1, alpha=0.7) ax5.set_xlabel('样本序号', fontsize=12) ax5.set_ylabel('价格变化幅度 (元)', fontsize=12) ax5.set_title('买1量大单 - 100笔价格变化', fontsize=14, fontweight='bold') ax5.grid(True, alpha=0.3, axis='y') # 添加数值标签 for i, change in enumerate(buy1_changes_100): y_pos = change + 0.01 if change >= 0 else change - 0.01 ax5.text(i + 1, y_pos, f'{change:+.2f}', ha='center', va='bottom' if change >= 0 else 'top', fontsize=9, rotation=45) # 卖1量100笔价格变化幅度 (第二行右半部分) if len(self.sell1_results_100) > 0: ax6 = fig.add_subplot(gs[1, 1]) sample_count = min(25, len(self.sell1_results_100)) # 显示前25个样本 sample_indices = range(1, sample_count + 1) # 卖1量100笔价格净变化(元) sell1_changes_100 = self.sell1_results_100['未来100笔净变化'].iloc[:sample_count].values # 为柱子分配颜色:正值为绿色,负值为红色 sell1_colors_100 = ['lightgreen' if x >= 0 else 'lightcoral' for x in sell1_changes_100] sell1_bars_100 = ax6.bar(sample_indices, sell1_changes_100, width=0.6, alpha=0.8, color=sell1_colors_100, label='卖1量100笔价格变化(元)', edgecolor='black', linewidth=0.5) # 添加零线 ax6.axhline(y=0, color='black', linestyle='-', linewidth=1, alpha=0.7) ax6.set_xlabel('样本序号', fontsize=12) ax6.set_ylabel('价格变化幅度 (元)', fontsize=12) ax6.set_title('卖1量大单 - 100笔价格变化', fontsize=14, fontweight='bold') ax6.grid(True, alpha=0.3, axis='y') # 添加数值标签 for i, change in enumerate(sell1_changes_100): y_pos = change + 0.01 if change >= 0 else change - 0.01 ax6.text(i + 1, y_pos, f'{change:+.2f}', ha='center', va='bottom' if change >= 0 else 'top', fontsize=9, rotation=45) # 7-8. 200笔大单价格变化幅度柱状图 (第二行第三、四列) # 买1量200笔价格变化幅度 (第二行第三个) if len(self.buy1_results_200) > 0: ax7 = fig.add_subplot(gs[1, 2]) sample_count = min(25, len(self.buy1_results_200)) # 显示前25个样本 sample_indices = range(1, sample_count + 1) # 买1量200笔价格净变化(元) buy1_changes_200 = self.buy1_results_200['未来200笔净变化'].iloc[:sample_count].values # 为柱子分配颜色:正值为绿色,负值为红色 buy1_colors_200 = ['darkgreen' if x >= 0 else 'darkred' for x in buy1_changes_200] buy1_bars_200 = ax7.bar(sample_indices, buy1_changes_200, width=0.6, alpha=0.8, color=buy1_colors_200, label='买1量200笔价格变化(元)', edgecolor='black', linewidth=0.5) # 添加零线 ax7.axhline(y=0, color='black', linestyle='-', linewidth=1, alpha=0.7) ax7.set_xlabel('样本序号', fontsize=12) ax7.set_ylabel('价格变化幅度 (元)', fontsize=12) ax7.set_title('买1量大单 - 200笔价格变化', fontsize=14, fontweight='bold') ax7.grid(True, alpha=0.3, axis='y') # 添加数值标签 for i, change in enumerate(buy1_changes_200): y_pos = change + 0.01 if change >= 0 else change - 0.01 ax7.text(i + 1, y_pos, f'{change:+.2f}', ha='center', va='bottom' if change >= 0 else 'top', fontsize=9, rotation=45) # 卖1量200笔价格变化幅度 (第二行第四个) if len(self.sell1_results_200) > 0: ax8 = fig.add_subplot(gs[1, 3]) sample_count = min(25, len(self.sell1_results_200)) # 显示前25个样本 sample_indices = range(1, sample_count + 1) # 卖1量200笔价格净变化(元) sell1_changes_200 = self.sell1_results_200['未来200笔净变化'].iloc[:sample_count].values # 为柱子分配颜色:正值为绿色,负值为红色 sell1_colors_200 = ['darkgreen' if x >= 0 else 'darkred' for x in sell1_changes_200] sell1_bars_200 = ax8.bar(sample_indices, sell1_changes_200, width=0.6, alpha=0.8, color=sell1_colors_200, label='卖1量200笔价格变化(元)', edgecolor='black', linewidth=0.5) # 添加零线 ax8.axhline(y=0, color='black', linestyle='-', linewidth=1, alpha=0.7) ax8.set_xlabel('样本序号', fontsize=12) ax8.set_ylabel('价格变化幅度 (元)', fontsize=12) ax8.set_title('卖1量大单 - 200笔价格变化', fontsize=14, fontweight='bold') ax8.grid(True, alpha=0.3, axis='y') # 添加数值标签 for i, change in enumerate(sell1_changes_200): y_pos = change + 0.01 if change >= 0 else change - 0.01 ax8.text(i + 1, y_pos, f'{change:+.2f}', ha='center', va='bottom' if change >= 0 else 'top', fontsize=9, rotation=45) # 9-12. 箱线图对比 (第三行) # 买1量箱线图 (第三行左半部分) if len(self.buy1_results_100) > 0: ax7 = fig.add_subplot(gs[2, :2]) buy_data_to_plot = [] buy_labels = [] if len(self.buy1_results_100) > 0: buy_data_to_plot.append(self.buy1_results_100['未来100笔价格范围']) buy_labels.append('100笔价格范围') if len(self.buy1_results_200) > 0: buy_data_to_plot.append(self.buy1_results_200['未来200笔价格范围']) buy_labels.append('200笔价格范围') if len(self.buy1_results_100) > 0: buy_data_to_plot.append(self.buy1_results_100['未来100笔净变化%'] * 100) buy_labels.append('100笔涨幅%(×10)') if len(self.buy1_results_200) > 0: buy_data_to_plot.append(self.buy1_results_200['未来200笔净变化%'] * 100) buy_labels.append('200笔涨幅%(×10)') bp = ax7.boxplot(buy_data_to_plot, labels=buy_labels, patch_artist=True) colors = ['lightgreen', 'darkgreen', 'lightblue', 'blue'] for patch, color in zip(bp['boxes'], colors[:len(bp['boxes'])]): patch.set_facecolor(color) ax7.set_ylabel('数值', fontsize=12) ax7.set_title('买1量大单 - 分布特征', fontsize=14, fontweight='bold') ax7.grid(True, alpha=0.3) # 卖1量箱线图 (第三行右半部分) if len(self.sell1_results_100) > 0: ax8 = fig.add_subplot(gs[2, 2:]) sell_data_to_plot = [] sell_labels = [] if len(self.sell1_results_100) > 0: sell_data_to_plot.append(self.sell1_results_100['未来100笔价格范围']) sell_labels.append('100笔价格范围') if len(self.sell1_results_200) > 0: sell_data_to_plot.append(self.sell1_results_200['未来200笔价格范围']) sell_labels.append('200笔价格范围') if len(self.sell1_results_100) > 0: sell_data_to_plot.append(self.sell1_results_100['未来100笔净变化%'] * 100) sell_labels.append('100笔跌幅%(×10)') if len(self.sell1_results_200) > 0: sell_data_to_plot.append(self.sell1_results_200['未来200笔净变化%'] * 100) sell_labels.append('200笔跌幅%(×10)') bp = ax8.boxplot(sell_data_to_plot, labels=sell_labels, patch_artist=True) colors = ['lightcoral', 'darkred', 'lightpink', 'red'] for patch, color in zip(bp['boxes'], colors[:len(bp['boxes'])]): patch.set_facecolor(color) ax8.set_ylabel('数值', fontsize=12) ax8.set_title('卖1量大单 - 分布特征', fontsize=14, fontweight='bold') ax8.grid(True, alpha=0.3) # 13. 综合摘要 (第四行) ax9 = fig.add_subplot(gs[3, :]) ax9.axis('off') # 创建综合摘要文本 summary_text = self._generate_summary_text() # 显示摘要文本 ax9.text(0.05, 0.95, summary_text, transform=ax9.transAxes, fontsize=10, ha='left', va='top', bbox=dict(boxstyle='round,pad=0.5', facecolor='lightyellow', alpha=0.3)) # 设置总标题 fig.suptitle('AU2512期货大单综合分析报告 (买1量 vs 卖1量)', fontsize=22, fontweight='bold', y=0.98) # 调整布局 plt.tight_layout() # 保存图表 chart_file = os.path.join(self.output_dir, 'large_orders_comprehensive_analysis.png') plt.savefig(chart_file, dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none') plt.close() print(f"综合分析图表已保存: {chart_file}") return True except Exception as e: print(f"创建综合图表时发生错误: {e}") return False def _generate_summary_text(self): """生成综合摘要文本""" text_parts = ["大单综合分析摘要\n"] # 基础统计 text_parts.append("=" * 50) text_parts.append("样本统计:") if len(self.buy1_large_orders) > 0: buy1_col = None for col in self.df.columns: if '买1量' in col: buy1_col = col break text_parts.append(f"• 买1量大单: {len(self.buy1_large_orders)} 个 ({len(self.buy1_large_orders)/len(self.df)*100:.2f}%)") text_parts.append(f" 买1量范围: {self.buy1_large_orders[buy1_col].min()}-{self.buy1_large_orders[buy1_col].max()} 手") if len(self.sell1_large_orders) > 0: sell1_col = None for col in self.df.columns: if '卖1量' in col: sell1_col = col break text_parts.append(f"• 卖1量大单: {len(self.sell1_large_orders)} 个 ({len(self.sell1_large_orders)/len(self.df)*100:.2f}%)") text_parts.append(f" 卖1量范围: {self.sell1_large_orders[sell1_col].min()}-{self.sell1_large_orders[sell1_col].max()} 手") # 表现对比 text_parts.append("\n" + "=" * 50) text_parts.append("表现对比:") if len(self.buy1_results_100) > 0: buy_avg_range = self.buy1_results_100['未来100笔价格范围'].mean() buy_avg_change = self.buy1_results_100['未来100笔最大涨幅%'].mean() buy_success_rate = (self.buy1_results_100['未来100笔最大涨幅'] > 0).sum() / len(self.buy1_results_100) * 100 text_parts.append(f"• 买1量大单后:") text_parts.append(f" - 平均价格波动: {buy_avg_range:.3f} 元") text_parts.append(f" - 平均涨幅: {buy_avg_change:+.3f}%") text_parts.append(f" - 上涨概率: {buy_success_rate:.1f}%") if len(self.sell1_results_100) > 0: sell_avg_range = self.sell1_results_100['未来100笔价格范围'].mean() sell_avg_change = self.sell1_results_100['未来100笔价格变化%'].mean() sell_success_rate = (self.sell1_results_100['未来100笔价格变化'] < 0).sum() / len(self.sell1_results_100) * 100 text_parts.append(f"• 卖1量大单后:") text_parts.append(f" - 平均价格波动: {sell_avg_range:.3f} 元") text_parts.append(f" - 平均变化: {sell_avg_change:+.3f}%") text_parts.append(f" - 下跌概率: {sell_success_rate:.1f}%") # 市场洞察 text_parts.append("\n" + "=" * 50) text_parts.append("市场洞察:") # 判断市场特征 if len(self.buy1_results_100) > 0 and len(self.sell1_results_100) > 0: buy_success = (self.buy1_results_100['未来100笔最大涨幅'] > 0).sum() / len(self.buy1_results_100) sell_decline = (self.sell1_results_100['未来100笔价格变化'] < 0).sum() / len(self.sell1_results_100) if buy_success > 0.9 and sell_decline < 0.3: text_parts.append("• 市场特征: 强势上涨市场,买盘力量占主导") elif buy_success > 0.7 and sell_decline > 0.7: text_parts.append("• 市场特征: 震荡市场,多空力量均衡") else: text_parts.append("• 市场特征: 趋势不明,需进一步观察") # 交易建议 text_parts.append("\n" + "=" * 50) text_parts.append("交易建议:") text_parts.append("• 大单信号具有较高的预测价值") text_parts.append("• 买1量大单可作为价格上涨的参考信号") text_parts.append("• 在强势市场中,卖1量大单可能被完全吸收") text_parts.append("• 建议结合其他技术指标综合判断") return "\n".join(text_parts) def save_results(self): """保存分析结果""" try: # 保存买1量详细结果 if len(self.buy1_results_100) > 0: self.buy1_results_100.to_csv(os.path.join(self.output_dir, 'buy1_large_orders_100_analysis.csv'), index=False, encoding='utf-8-sig') self.buy1_results_200.to_csv(os.path.join(self.output_dir, 'buy1_large_orders_200_analysis.csv'), index=False, encoding='utf-8-sig') # 保存卖1量详细结果 if len(self.sell1_results_100) > 0: self.sell1_results_100.to_csv(os.path.join(self.output_dir, 'sell1_large_orders_100_analysis.csv'), index=False, encoding='utf-8-sig') self.sell1_results_200.to_csv(os.path.join(self.output_dir, 'sell1_large_orders_200_analysis.csv'), index=False, encoding='utf-8-sig') # 保存汇总结果 summary_data = { '指标': [], '数值': [] } if len(self.buy1_large_orders) > 0: summary_data['指标'].extend([ '买1量>99的样本数', '买1量100笔平均价格范围', '买1量200笔平均价格范围', '买1量100笔平均净变化%', '买1量100笔下跌概率%' ]) summary_data['数值'].extend([ len(self.buy1_large_orders), f"{self.buy1_results_100['未来100笔价格范围'].mean():.2f}", f"{self.buy1_results_200['未来200笔价格范围'].mean():.2f}", f"{self.buy1_results_100['未来100笔净变化%'].mean():+.2f}%", f"{(self.buy1_results_100['未来100笔价格下跌'] > 0).sum()/len(self.buy1_results_100)*100:.1f}%" ]) if len(self.sell1_large_orders) > 0: summary_data['指标'].extend([ '卖1量>99的样本数', '卖1量100笔平均价格范围', '卖1量200笔平均价格范围', '卖1量100笔平均净变化%', '卖1量100笔下跌概率%' ]) summary_data['数值'].extend([ len(self.sell1_large_orders), f"{self.sell1_results_100['未来100笔价格范围'].mean():.2f}", f"{self.sell1_results_200['未来200笔价格范围'].mean():.2f}", f"{self.sell1_results_100['未来100笔净变化%'].mean():+.2f}%", f"{(self.sell1_results_100['未来100笔价格下跌'] > 0).sum()/len(self.sell1_results_100)*100:.1f}%" ]) summary_df = pd.DataFrame(summary_data) summary_df.to_csv(os.path.join(self.output_dir, 'large_orders_summary.csv'), index=False, encoding='utf-8-sig') print(f"\n分析结果已保存到 {self.output_dir} 目录:") if len(self.buy1_results_100) > 0: print(f" buy1_large_orders_100_analysis.csv - 买1量100笔详细分析") print(f" buy1_large_orders_200_analysis.csv - 买1量200笔详细分析") if len(self.sell1_results_100) > 0: print(f" sell1_large_orders_100_analysis.csv - 卖1量100笔详细分析") print(f" sell1_large_orders_200_analysis.csv - 卖1量200笔详细分析") print(f" large_orders_summary.csv - 综合汇总分析") # 创建综合可视化图表 self.create_comprehensive_chart() except Exception as e: print(f"保存结果时发生错误: {e}") def run_analysis(self, threshold=99): """运行完整分析""" print(f"开始AU2512期货大单综合分析 (阈值: {threshold})...") # 加载数据 if not self.load_data(): return False # 分析买1量大单 buy1_success = self.analyze_large_buy1_orders(threshold) # 分析卖1量大单 sell1_success = self.analyze_large_sell1_orders(threshold) if not buy1_success and not sell1_success: print("未找到任何符合条件的记录") return False # 计算后续价格范围 self.calculate_future_price_ranges() # 打印分析结果 self.print_analysis_results() # 保存结果 self.save_results() return True def main(): """主函数""" parser = argparse.ArgumentParser(description='AU2512期货大单综合分析工具') parser.add_argument('data_file', nargs='?', default='data/au2512_20251013.parquet', help='数据文件路径 (默认: data/au2512_20251013.parquet)') parser.add_argument('--threshold', '-t', type=int, default=99, help='买卖单量阈值 (默认: 99)') parser.add_argument('--output-dir', '-o', default='large_orders', help='输出目录 (默认: large_orders)') args = parser.parse_args() # 检查数据文件是否存在 if not os.path.exists(args.data_file): print(f"错误: 数据文件不存在 - {args.data_file}") print("\n使用方法:") print(" python analyze_large_orders.py [数据文件路径] [--threshold 阈值] [--output-dir 输出目录]") print(f" python analyze_large_orders.py # 使用默认参数") sys.exit(1) # 创建分析器并运行 analyzer = LargeOrdersAnalyzer(args.data_file, args.output_dir) success = analyzer.run_analysis(args.threshold) if success: print(f"\n分析完成!结果已保存到: {args.output_dir}") else: print(f"\n分析失败!") sys.exit(1) if __name__ == "__main__": main()