huice/large_orders/analyze_current_volume_optimized.py
Your Name e5dd5b5593 feat: 期货数据分析工具集 v2.0
## 核心功能
### 1. 成交量序列分析 (volume_price_sequence.py)
- 按累计成交量排序的价格趋势分析
- 三合一综合图表:价格序列+成交量分布+时间序列
- 关键价格水平自动标注

### 2. 成交量分布深度分析 (volume_distribution_analysis.py)
- 7种专业可视化图表
- 统计特征分析和分布拟合
- 交易模式识别和业务洞察

### 3. 大额订单分析工具集 (large_orders/)
- 买1/卖1量大单分析 (阈值99)
- 买卖挂单合计分析 (阈值200)
- 当前成交量分析 (阈值150)
- 信号抑制优化算法 (38%抑制率)

## 技术特性
- 信号抑制算法:有效减少重复信号干扰
- 多维度分析:支持多种信号类型
- 专业可视化:四宫格综合分析图
- 业务洞察:基于数据的交易建议

## 分析结果
- 卖1量大单:短期下跌,长期大幅上涨反转
- 买挂合计:各时间窗口小幅正收益
- 信号抑制:短期收益从-0.0778提升至+0.1347

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-02 15:15:53 +08:00

391 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import os
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
def analyze_current_volume_optimized():
"""分析当前成交量大于150的数据点后不同长度的成交价走势优化版信号抑制"""
print("正在读取数据文件...")
# 读取数据从上级目录的data文件夹
df = pd.read_parquet('../data/au2512_20251013.parquet')
print(f"数据总行数: {len(df)}")
print(f"数据列名: {df.columns.tolist()}")
# 查找当前成交量列的实际名称
current_volume_col = None
for col in df.columns:
if '当前成交量' in str(col) or 'cur_volume' in str(col).lower() or '成交量' in str(col):
if '累积' not in str(col): # 排除累积成交量
current_volume_col = col
break
if current_volume_col is None:
print("未找到当前成交量列,尝试查找其他可能的成交量列...")
# 如果没找到当前成交量,尝试其他可能的列名
for col in df.columns:
if '' in str(col) and '累积' not in str(col) and '' not in str(col) and '' not in str(col):
current_volume_col = col
print(f"使用可能的成交量列: {col}")
break
if current_volume_col is None:
print("未找到合适的成交量列")
return
print(f"使用当前成交量列: {current_volume_col}")
# 获取成交价列名
price_col = None
for col in df.columns:
if '成交价' in str(col) or 'price' in str(col).lower():
price_col = col
break
if price_col is None:
print("未找到成交价列")
return
print(f"使用成交价列: {price_col}")
# 显示当前成交量的统计信息
print(f"\n当前成交量统计信息:")
print(f"最小值: {df[current_volume_col].min()}")
print(f"最大值: {df[current_volume_col].max()}")
print(f"平均值: {df[current_volume_col].mean():.2f}")
print(f"中位数: {df[current_volume_col].median():.2f}")
# 筛选当前成交量大于150的数据点
large_volume_mask = df[current_volume_col] > 150
large_volume_indices = df[large_volume_mask].index.tolist()
print(f"\n找到当前成交量大于150的数据点数量: {len(large_volume_indices)}")
if len(large_volume_indices) > 0:
large_volumes = df.loc[large_volume_indices, current_volume_col]
print(f"大成交量统计: 最小={large_volumes.min():.0f}, 最大={large_volumes.max():.0f}, 平均={large_volumes.mean():.0f}")
# 信号抑制逻辑移除20个数据点内的重复信号
def apply_signal_suppression(indices, suppression_window=20):
"""应用信号抑制逻辑,移除指定窗口内的重复信号"""
if not indices:
return []
# 按索引排序
sorted_indices = sorted(indices)
filtered_indices = []
suppressed_count = 0
for i, idx in enumerate(sorted_indices):
# 检查是否与前面的有效信号距离太近
is_suppressed = False
for prev_idx in filtered_indices:
if idx - prev_idx <= suppression_window:
is_suppressed = True
suppressed_count += 1
break
if not is_suppressed:
filtered_indices.append(idx)
return filtered_indices, suppressed_count
# 应用信号抑制
print("\n应用信号抑制逻辑20个数据点窗口...")
filtered_indices, suppressed_count = apply_signal_suppression(large_volume_indices, 20)
print(f"原始信号数量: {len(large_volume_indices)}")
print(f"抑制后信号数量: {len(filtered_indices)}")
print(f"被抑制的信号数量: {suppressed_count}")
print(f"抑制率: {suppressed_count/len(large_volume_indices)*100:.1f}%")
if len(filtered_indices) > 0:
filtered_volumes = df.loc[filtered_indices, current_volume_col]
print(f"过滤后大成交量统计: 最小={filtered_volumes.min():.0f}, 最大={filtered_volumes.max():.0f}, 平均={filtered_volumes.mean():.0f}")
# 提取价格序列的函数
def extract_price_sequences(indices, max_points):
sequences = []
sequence_info = []
for idx in indices:
remaining_points = len(df) - idx - 1
take_points = min(max_points, remaining_points)
if take_points > 0:
base_price = df.loc[idx, price_col]
future_prices = df.loc[idx + 1: idx + take_points, price_col].values
price_changes = future_prices - base_price
sequences.append(price_changes)
sequence_info.append({
'start_index': idx,
'volume': df.loc[idx, current_volume_col],
'base_price': base_price,
'sequence_length': take_points
})
return sequences, sequence_info
# 分析不同时间长度的数据
analysis_lengths = [100, 200, 500] # 100, 200, 500个数据点
for length in analysis_lengths:
print(f"\n{'='*60}")
print(f"分析当前成交量>150后{length}个数据点的价格走势(优化版)")
print(f"{'='*60}")
# 提取过滤后大成交量的价格序列
volume_sequences, volume_info = extract_price_sequences(filtered_indices, length)
print(f"成功提取 {len(volume_sequences)} 个过滤后大成交量价格序列 (最大长度: {length})")
# 创建综合分析图表
fig, axes = plt.subplots(2, 2, figsize=(20, 16))
fig.suptitle(f'当前成交量>150的价格走势分析优化版(后{length}个数据点)\n原始信号:{len(large_volume_indices)}个 → 过滤后:{len(filtered_indices)}个 (抑制{suppressed_count}个)',
fontsize=14, fontweight='bold')
# 1. 过滤后大成交量价格变化图(所有序列)
ax1 = axes[0, 0]
if volume_sequences:
# 使用渐变色
colors = plt.cm.viridis(np.linspace(0.3, 0.9, len(volume_sequences)))
for i, (sequence, info) in enumerate(zip(volume_sequences, volume_info)):
x_axis = range(len(sequence))
ax1.plot(x_axis, sequence, color=colors[i], alpha=0.6, linewidth=0.8)
ax1.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
ax1.set_xlabel('数据点序号')
ax1.set_ylabel('相对价格变化')
ax1.set_title(f'过滤后价格变化走势 (后{length}点)\n{len(volume_sequences)}个有效序列')
ax1.grid(True, alpha=0.3)
# 添加信号抑制信息
suppression_text = f'原始: {len(large_volume_indices)}\n过滤: {len(filtered_indices)}\n抑制: {suppressed_count}'
ax1.text(0.02, 0.98, suppression_text, transform=ax1.transAxes,
verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightgreen', alpha=0.8))
# 2. 按成交量大小分组显示前20% vs 后80%
ax2 = axes[0, 1]
if len(volume_info) > 0:
# 按成交量排序
sorted_indices = sorted(range(len(volume_info)), key=lambda i: volume_info[i]['volume'], reverse=True)
top_20_percent = max(1, len(sorted_indices) // 5) # 至少1个
top_sequences = [volume_sequences[i] for i in sorted_indices[:top_20_percent]]
bottom_sequences = [volume_sequences[i] for i in sorted_indices[top_20_percent:]]
# 显示最大的20%成交量序列(红色)
for i, sequence in enumerate(top_sequences):
x_axis = range(len(sequence))
ax2.plot(x_axis, sequence, color='red', alpha=0.7, linewidth=1.2,
label='最大20%成交量' if i == 0 else "")
# 显示较小的80%成交量序列(蓝色)
for i, sequence in enumerate(bottom_sequences):
x_axis = range(len(sequence))
ax2.plot(x_axis, sequence, color='blue', alpha=0.4, linewidth=0.6,
label='其他80%成交量' if i == 0 else "")
ax2.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
ax2.set_xlabel('数据点序号')
ax2.set_ylabel('相对价格变化')
ax2.set_title(f'按成交量大小分组的价格走势 (后{length}点)\n红色:最大20%({len(top_sequences) if len(volume_info) > 0 else 0}个) 蓝色:其他80%({len(bottom_sequences) if len(volume_info) > 0 else 0}个)')
ax2.grid(True, alpha=0.3)
if len(volume_info) > 0:
ax2.legend(fontsize=10)
# 3. 平均变化和置信区间
ax3 = axes[1, 0]
def calculate_avg_changes_and_std(sequences):
if not sequences:
return [], []
max_len = max(len(seq) for seq in sequences)
avg_changes = []
std_changes = []
for i in range(max_len):
point_changes = [seq[i] for seq in sequences if len(seq) > i]
if point_changes:
avg_changes.append(np.mean(point_changes))
std_changes.append(np.std(point_changes))
return avg_changes, std_changes
avg_changes, std_changes = calculate_avg_changes_and_std(volume_sequences)
if avg_changes:
x_axis = range(len(avg_changes))
ax3.plot(x_axis, avg_changes, color='green', linewidth=2.5, label=f'平均变化 (n={len(volume_sequences)})')
# 添加置信区间±1个标准差
upper_bound = [avg + std for avg, std in zip(avg_changes, std_changes)]
lower_bound = [avg - std for avg, std in zip(avg_changes, std_changes)]
ax3.fill_between(x_axis, lower_bound, upper_bound, alpha=0.3, color='green', label='±1标准差区间')
ax3.axhline(y=0, color='black', linestyle='--', alpha=0.7, linewidth=1.5)
ax3.set_xlabel('数据点序号')
ax3.set_ylabel('平均相对价格变化')
ax3.set_title(f'平均价格变化及置信区间 (后{length}点)')
ax3.legend(fontsize=12)
ax3.grid(True, alpha=0.3)
# 4. 统计信息文本框
ax4 = axes[1, 1]
ax4.axis('off')
# 计算统计信息
def calculate_stats(sequences):
if not sequences:
return {}
final_changes = [seq[-1] for seq in sequences if len(seq) > 0]
if final_changes:
return {
'count': len(sequences),
'avg_final_change': np.mean(final_changes),
'std_final_change': np.std(final_changes),
'max_rise': np.max(final_changes),
'max_fall': np.min(final_changes),
'positive_ratio': sum(1 for change in final_changes if change > 0) / len(final_changes),
'avg_max_gain': np.mean([np.max(seq) for seq in sequences if len(seq) > 0]),
'avg_max_loss': np.mean([np.min(seq) for seq in sequences if len(seq) > 0])
}
return {}
volume_stats = calculate_stats(volume_sequences)
# 显示统计信息
stats_text = f"=== 当前成交量>150 统计信息 (后{length}点) ===\n"
stats_text += f"信号抑制效果:\n"
stats_text += f" 原始信号: {len(large_volume_indices)}\n"
stats_text += f" 过滤信号: {len(filtered_indices)}\n"
stats_text += f" 抑制数量: {suppressed_count}\n"
stats_text += f" 抑制率: {suppressed_count/len(large_volume_indices)*100:.1f}%\n\n"
if volume_stats:
stats_text += f"价格统计:\n"
stats_text += f" 序列数量: {volume_stats['count']}\n"
stats_text += f" 平均最终变化: {volume_stats['avg_final_change']:.4f}\n"
stats_text += f" 变化标准差: {volume_stats['std_final_change']:.4f}\n"
stats_text += f" 最大上涨: {volume_stats['max_rise']:.4f}\n"
stats_text += f" 最大下跌: {volume_stats['max_fall']:.4f}\n"
stats_text += f" 上涨比例: {volume_stats['positive_ratio']:.1%}\n"
stats_text += f" 平均最大获利: {volume_stats['avg_max_gain']:.4f}\n"
stats_text += f" 平均最大亏损: {volume_stats['avg_max_loss']:.4f}\n\n"
# 添加关键时间点分析
if avg_changes:
stats_text += f"=== 关键时间点分析 ===\n"
if length >= 500:
points_to_check = [9, 49, 199, 499] # 第10、50、200、500点
point_names = ['第10点', '第50点', '第200点', '第500点']
elif length >= 200:
points_to_check = [9, 49, 199] # 第10、50、200点
point_names = ['第10点', '第50点', '第200点']
else:
points_to_check = [9, 49] # 第10、50点
point_names = ['第10点', '第50点']
for i, point in enumerate(points_to_check):
if point < len(avg_changes):
stats_text += f" {point_names[i]}: {avg_changes[point]:.4f}"
if point < len(std_changes):
stats_text += f"{std_changes[point]:.4f})"
stats_text += "\n"
# 添加成交量信息
if len(volume_info) > 0:
volumes = [info['volume'] for info in volume_info]
stats_text += f"\n=== 成交量信息 ===\n"
stats_text += f" 成交量范围: {min(volumes):.0f} - {max(volumes):.0f}\n"
stats_text += f" 平均成交量: {np.mean(volumes):.0f}\n"
stats_text += f" 成交量中位数: {np.median(volumes):.0f}"
ax4.text(0.05, 0.95, stats_text, transform=ax4.transAxes, fontsize=10,
verticalalignment='top',
bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8))
plt.tight_layout()
# 保存综合图表
output_file = f'current_volume_optimized_comprehensive_analysis_{length}points.png'
plt.savefig(output_file, dpi=300, bbox_inches='tight')
print(f"\n{length}点优化版综合分析图表已保存为: {output_file}")
print(f"完整路径: {os.path.abspath(output_file)}")
# 保存单独的过滤后大成交量走势图
fig_volume, ax_volume = plt.subplots(figsize=(15, 10))
if volume_sequences:
colors = plt.cm.viridis(np.linspace(0.3, 0.9, len(volume_sequences)))
for i, (sequence, info) in enumerate(zip(volume_sequences, volume_info)):
x_axis = range(len(sequence))
ax_volume.plot(x_axis, sequence, color=colors[i], alpha=0.6, linewidth=1)
ax_volume.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
ax_volume.set_xlabel(f'数据点序号 (相对于过滤后大成交量时刻, 后{length}点)', fontsize=12)
ax_volume.set_ylabel('相对价格变化 (相对于基准点)', fontsize=12)
ax_volume.set_title(f'当前成交量>150过滤后数据点{length}个相对价格变化走势\n信号抑制: {len(large_volume_indices)}{len(filtered_indices)} (抑制{suppressed_count}个)\n{len(volume_sequences)}个有效序列',
fontsize=14, fontweight='bold')
ax_volume.grid(True, alpha=0.3)
plt.tight_layout()
volume_output_file = f'current_volume_optimized_relative_price_changes_{length}points.png'
plt.savefig(volume_output_file, dpi=300, bbox_inches='tight')
print(f"过滤后大成交量{length}点分析图表已保存为: {volume_output_file}")
plt.close()
# 显示统计信息
print(f"\n{'='*50}")
print(f"详细统计信息 (后{length}点) - 优化版:")
print(f"{'='*50}")
print(f"\n【信号抑制效果】")
print(f"原始信号数量: {len(large_volume_indices)}")
print(f"过滤后信号数量: {len(filtered_indices)}")
print(f"被抑制信号数量: {suppressed_count}")
print(f"信号抑制率: {suppressed_count/len(large_volume_indices)*100:.1f}%")
if volume_stats:
print(f"\n【过滤后价格统计】")
print(f"序列数量: {volume_stats['count']}")
print(f"平均最终变化: {volume_stats['avg_final_change']:.4f}")
print(f"变化标准差: {volume_stats['std_final_change']:.4f}")
print(f"最大上涨: {volume_stats['max_rise']:.4f}")
print(f"最大下跌: {volume_stats['max_fall']:.4f}")
print(f"上涨比例: {volume_stats['positive_ratio']:.1%}")
print(f"平均最大获利: {volume_stats['avg_max_gain']:.4f}")
print(f"平均最大亏损: {volume_stats['avg_max_loss']:.4f}")
# 关键时间点分析
if avg_changes:
print(f"\n关键时间点分析:")
if length >= 500:
key_points = [(9, '第10点'), (49, '第50点'), (199, '第200点'), (499, '第500点')]
elif length >= 200:
key_points = [(9, '第10点'), (49, '第50点'), (199, '第200点')]
else:
key_points = [(9, '第10点'), (49, '第50点')]
for point, name in key_points:
if point < len(avg_changes):
print(f"{name}: {avg_changes[point]:.4f}", end="")
if point < len(std_changes):
print(f"{std_changes[point]:.4f})")
else:
print()
plt.close('all') # 关闭所有图形以释放内存
print(f"\n{'='*60}")
print("优化版分析完成!")
print("信号抑制逻辑已成功应用,减少了重复信号的影响。")
print(f"{'='*60}")
if __name__ == "__main__":
analyze_current_volume_optimized()