import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib as mpl import os # 设置中文字体 plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans'] plt.rcParams['axes.unicode_minus'] = False def analyze_large_orders_extended(): """分析买1量和卖1量大于99的数据点后不同长度的成交价走势""" print("正在读取数据文件...") # 读取数据(从上级目录的data文件夹) df = pd.read_parquet('data/au2512_20251013.parquet') print(f"数据总行数: {len(df)}") print(f"数据列名: {df.columns.tolist()}") # 查找买1量和卖1量列的实际名称 buy1_vol_col = None sell1_vol_col = None for col in df.columns: if '买1量' in str(col) or 'buy1' in str(col).lower(): buy1_vol_col = col if '卖1量' in str(col) or 'sell1' in str(col).lower(): sell1_vol_col = col if buy1_vol_col is None: print("未找到买1量列") return if sell1_vol_col is None: print("未找到卖1量列") return print(f"使用买1量列: {buy1_vol_col}") print(f"使用卖1量列: {sell1_vol_col}") # 获取成交价列名 price_col = None for col in df.columns: if '成交价' in str(col) or 'price' in str(col).lower(): price_col = col break if price_col is None: print("未找到成交价列") return print(f"使用成交价列: {price_col}") # 筛选大额买1和卖1订单 large_buy1_mask = df[buy1_vol_col] > 99 large_sell1_mask = df[sell1_vol_col] > 99 large_buy1_indices = df[large_buy1_mask].index.tolist() large_sell1_indices = df[large_sell1_mask].index.tolist() print(f"找到买1量大于99的数据点数量: {len(large_buy1_indices)}") print(f"找到卖1量大于99的数据点数量: {len(large_sell1_indices)}") # 提取价格序列的函数 def extract_price_sequences(indices, order_type, max_points): sequences = [] sequence_info = [] for idx in indices: remaining_points = len(df) - idx - 1 take_points = min(max_points, remaining_points) if take_points > 0: base_price = df.loc[idx, price_col] future_prices = df.loc[idx + 1: idx + take_points, price_col].values price_changes = future_prices - base_price sequences.append(price_changes) volume_col = buy1_vol_col if order_type == 'buy' else sell1_vol_col sequence_info.append({ 'start_index': idx, 'volume': df.loc[idx, volume_col], 'base_price': base_price, 'sequence_length': take_points }) return sequences, sequence_info # 分析不同时间长度的数据 analysis_lengths = [100, 200, 500] # 100, 200, 500个数据点 for length in analysis_lengths: print(f"\n{'='*60}") print(f"分析后{length}个数据点的价格走势") print(f"{'='*60}") # 提取买1和卖1的价格序列 buy1_sequences, buy1_info = extract_price_sequences(large_buy1_indices, 'buy', length) sell1_sequences, sell1_info = extract_price_sequences(large_sell1_indices, 'sell', length) print(f"成功提取 {len(buy1_sequences)} 个买1价格序列 (最大长度: {length})") print(f"成功提取 {len(sell1_sequences)} 个卖1价格序列 (最大长度: {length})") # 创建综合对比图表 fig, axes = plt.subplots(2, 2, figsize=(20, 16)) fig.suptitle(f'大额订单对比分析:买1量>99 vs 卖1量>99 (后{length}个数据点)', fontsize=16, fontweight='bold') # 1. 买1量价格变化图 ax1 = axes[0, 0] if buy1_sequences: colors_buy = plt.cm.Blues(np.linspace(0.3, 0.9, len(buy1_sequences))) for i, (sequence, info) in enumerate(zip(buy1_sequences, buy1_info)): x_axis = range(len(sequence)) ax1.plot(x_axis, sequence, color=colors_buy[i], alpha=0.6, linewidth=0.8) ax1.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5) ax1.set_xlabel('数据点序号') ax1.set_ylabel('相对价格变化') ax1.set_title(f'买1量>99的价格变化走势 (后{length}点)\n共{len(buy1_sequences)}个序列') ax1.grid(True, alpha=0.3) ax1.text(0.02, 0.98, f'序列数: {len(buy1_sequences)}', transform=ax1.transAxes, verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.8)) # 2. 卖1量价格变化图 ax2 = axes[0, 1] if sell1_sequences: colors_sell = plt.cm.Reds(np.linspace(0.3, 0.9, len(sell1_sequences))) for i, (sequence, info) in enumerate(zip(sell1_sequences, sell1_info)): x_axis = range(len(sequence)) ax2.plot(x_axis, sequence, color=colors_sell[i], alpha=0.6, linewidth=0.8) ax2.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5) ax2.set_xlabel('数据点序号') ax2.set_ylabel('相对价格变化') ax2.set_title(f'卖1量>99的价格变化走势 (后{length}点)\n共{len(sell1_sequences)}个序列') ax2.grid(True, alpha=0.3) ax2.text(0.02, 0.98, f'序列数: {len(sell1_sequences)}', transform=ax2.transAxes, verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightcoral', alpha=0.8)) # 3. 平均变化对比图 ax3 = axes[1, 0] def calculate_avg_changes(sequences): if not sequences: return [] max_len = max(len(seq) for seq in sequences) avg_changes = [] for i in range(max_len): point_changes = [seq[i] for seq in sequences if len(seq) > i] if point_changes: avg_changes.append(np.mean(point_changes)) return avg_changes buy1_avg_changes = calculate_avg_changes(buy1_sequences) sell1_avg_changes = calculate_avg_changes(sell1_sequences) if buy1_avg_changes: ax3.plot(range(len(buy1_avg_changes)), buy1_avg_changes, color='blue', linewidth=2.5, label=f'买1量>99 (n={len(buy1_sequences)})') if sell1_avg_changes: ax3.plot(range(len(sell1_avg_changes)), sell1_avg_changes, color='red', linewidth=2.5, label=f'卖1量>99 (n={len(sell1_sequences)})') ax3.axhline(y=0, color='black', linestyle='--', alpha=0.7, linewidth=1.5) ax3.set_xlabel('数据点序号') ax3.set_ylabel('平均相对价格变化') ax3.set_title(f'平均价格变化对比 (后{length}点)') ax3.legend(fontsize=12) ax3.grid(True, alpha=0.3) # 4. 统计信息文本框 ax4 = axes[1, 1] ax4.axis('off') # 计算统计信息 def calculate_stats(sequences, name): if not sequences: return {} final_changes = [seq[-1] for seq in sequences if len(seq) > 0] if final_changes: return { 'name': name, 'count': len(sequences), 'avg_final_change': np.mean(final_changes), 'std_final_change': np.std(final_changes), 'max_rise': np.max(final_changes), 'max_fall': np.min(final_changes) } return {} buy1_stats = calculate_stats(buy1_sequences, '买1量>99') sell1_stats = calculate_stats(sell1_sequences, '卖1量>99') # 显示统计信息 stats_text = f"=== 统计信息对比 (后{length}点) ===\n\n" if buy1_stats: stats_text += f"【买1量>99】\n" stats_text += f"序列数量: {buy1_stats['count']}\n" stats_text += f"平均最终变化: {buy1_stats['avg_final_change']:.4f}\n" stats_text += f"变化标准差: {buy1_stats['std_final_change']:.4f}\n" stats_text += f"最大上涨: {buy1_stats['max_rise']:.4f}\n" stats_text += f"最大下跌: {buy1_stats['max_fall']:.4f}\n\n" if sell1_stats: stats_text += f"【卖1量>99】\n" stats_text += f"序列数量: {sell1_stats['count']}\n" stats_text += f"平均最终变化: {sell1_stats['avg_final_change']:.4f}\n" stats_text += f"变化标准差: {sell1_stats['std_final_change']:.4f}\n" stats_text += f"最大上涨: {sell1_stats['max_rise']:.4f}\n" stats_text += f"最大下跌: {sell1_stats['max_fall']:.4f}\n\n" # 添加关键时间点对比 if buy1_avg_changes and sell1_avg_changes: stats_text += f"=== 关键时间点对比 (后{length}点) ===\n" # 根据数据长度选择关键点 if length >= 500: points_to_check = [49, 199, 499] # 第50、200、500点 point_names = ['第50点', '第200点', '第500点'] elif length >= 200: points_to_check = [49, 199] # 第50、200点 point_names = ['第50点', '第200点'] else: points_to_check = [49] # 第50点 point_names = ['第50点'] for i, point in enumerate(points_to_check): if point < len(buy1_avg_changes) and point < len(sell1_avg_changes): stats_text += f"{point_names[i]}: " stats_text += f"买1={buy1_avg_changes[point]:.4f}, " stats_text += f"卖1={sell1_avg_changes[point]:.4f}, " stats_text += f"差值={buy1_avg_changes[point] - sell1_avg_changes[point]:.4f}\n" ax4.text(0.05, 0.95, stats_text, transform=ax4.transAxes, fontsize=11, verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8)) plt.tight_layout() # 保存综合图表 output_file = f'large_orders_comprehensive_analysis_{length}points.png' plt.savefig(output_file, dpi=300, bbox_inches='tight') print(f"\n{length}点综合分析图表已保存为: {output_file}") print(f"完整路径: {os.path.abspath(output_file)}") # 保存单独的买1和卖1图表 # 买1量单独图表 fig_buy, ax_buy = plt.subplots(figsize=(15, 10)) if buy1_sequences: colors_buy = plt.cm.Blues(np.linspace(0.3, 0.9, len(buy1_sequences))) for i, (sequence, info) in enumerate(zip(buy1_sequences, buy1_info)): x_axis = range(len(sequence)) ax_buy.plot(x_axis, sequence, color=colors_buy[i], alpha=0.6, linewidth=1) ax_buy.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5) ax_buy.set_xlabel(f'数据点序号 (相对于大额买1订单, 后{length}点)', fontsize=12) ax_buy.set_ylabel('相对价格变化 (相对于基准点)', fontsize=12) ax_buy.set_title(f'买1量>99的数据点后{length}个相对价格变化走势\n所有序列从基准点(0)开始\n共{len(buy1_sequences)}个序列', fontsize=14, fontweight='bold') ax_buy.grid(True, alpha=0.3) plt.tight_layout() buy_output_file = f'large_buy1_relative_price_changes_{length}points.png' plt.savefig(buy_output_file, dpi=300, bbox_inches='tight') print(f"买1量{length}点分析图表已保存为: {buy_output_file}") plt.close() # 卖1量单独图表 fig_sell, ax_sell = plt.subplots(figsize=(15, 10)) if sell1_sequences: colors_sell = plt.cm.Reds(np.linspace(0.3, 0.9, len(sell1_sequences))) for i, (sequence, info) in enumerate(zip(sell1_sequences, sell1_info)): x_axis = range(len(sequence)) ax_sell.plot(x_axis, sequence, color=colors_sell[i], alpha=0.6, linewidth=1) ax_sell.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5) ax_sell.set_xlabel(f'数据点序号 (相对于大额卖1订单, 后{length}点)', fontsize=12) ax_sell.set_ylabel('相对价格变化 (相对于基准点)', fontsize=12) ax_sell.set_title(f'卖1量>99的数据点后{length}个相对价格变化走势\n所有序列从基准点(0)开始\n共{len(sell1_sequences)}个序列', fontsize=14, fontweight='bold') ax_sell.grid(True, alpha=0.3) plt.tight_layout() sell_output_file = f'large_sell1_relative_price_changes_{length}points.png' plt.savefig(sell_output_file, dpi=300, bbox_inches='tight') print(f"卖1量{length}点分析图表已保存为: {sell_output_file}") plt.close() # 显示统计信息 print(f"\n{'='*50}") print(f"详细统计信息 (后{length}点):") print(f"{'='*50}") if buy1_stats: print(f"\n【买1量>99】") print(f"序列数量: {buy1_stats['count']}") print(f"平均最终变化: {buy1_stats['avg_final_change']:.4f}") print(f"变化标准差: {buy1_stats['std_final_change']:.4f}") print(f"最大上涨: {buy1_stats['max_rise']:.4f}") print(f"最大下跌: {buy1_stats['max_fall']:.4f}") if sell1_stats: print(f"\n【卖1量>99】") print(f"序列数量: {sell1_stats['count']}") print(f"平均最终变化: {sell1_stats['avg_final_change']:.4f}") print(f"变化标准差: {sell1_stats['std_final_change']:.4f}") print(f"最大上涨: {sell1_stats['max_rise']:.4f}") print(f"最大下跌: {sell1_stats['max_fall']:.4f}") # 关键时间点分析 if buy1_avg_changes and sell1_avg_changes: print(f"\n关键时间点对比:") if length >= 500: key_points = [(49, '第50点'), (199, '第200点'), (499, '第500点')] elif length >= 200: key_points = [(49, '第50点'), (199, '第200点')] else: key_points = [(49, '第50点')] for point, name in key_points: if point < len(buy1_avg_changes) and point < len(sell1_avg_changes): diff = buy1_avg_changes[point] - sell1_avg_changes[point] print(f"{name}: 买1={buy1_avg_changes[point]:.4f}, 卖1={sell1_avg_changes[point]:.4f}, 差值={diff:.4f}") plt.close('all') # 关闭所有图形以释放内存 print(f"\n{'='*60}") print("所有分析完成!") print(f"{'='*60}") if __name__ == "__main__": analyze_large_orders_extended()