huice/large_orders/analyze_current_volume_extended.py
Your Name e5dd5b5593 feat: 期货数据分析工具集 v2.0
## 核心功能
### 1. 成交量序列分析 (volume_price_sequence.py)
- 按累计成交量排序的价格趋势分析
- 三合一综合图表:价格序列+成交量分布+时间序列
- 关键价格水平自动标注

### 2. 成交量分布深度分析 (volume_distribution_analysis.py)
- 7种专业可视化图表
- 统计特征分析和分布拟合
- 交易模式识别和业务洞察

### 3. 大额订单分析工具集 (large_orders/)
- 买1/卖1量大单分析 (阈值99)
- 买卖挂单合计分析 (阈值200)
- 当前成交量分析 (阈值150)
- 信号抑制优化算法 (38%抑制率)

## 技术特性
- 信号抑制算法:有效减少重复信号干扰
- 多维度分析:支持多种信号类型
- 专业可视化:四宫格综合分析图
- 业务洞察:基于数据的交易建议

## 分析结果
- 卖1量大单:短期下跌,长期大幅上涨反转
- 买挂合计:各时间窗口小幅正收益
- 信号抑制:短期收益从-0.0778提升至+0.1347

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-02 15:15:53 +08:00

337 lines
15 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import os
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
def analyze_current_volume_extended():
"""分析当前成交量大于150的数据点后不同长度的成交价走势"""
print("正在读取数据文件...")
# 读取数据从上级目录的data文件夹
df = pd.read_parquet('../data/au2512_20251013.parquet')
print(f"数据总行数: {len(df)}")
print(f"数据列名: {df.columns.tolist()}")
# 查找当前成交量列的实际名称
current_volume_col = None
for col in df.columns:
if '当前成交量' in str(col) or 'cur_volume' in str(col).lower() or '成交量' in str(col):
if '累积' not in str(col): # 排除累积成交量
current_volume_col = col
break
if current_volume_col is None:
print("未找到当前成交量列,尝试查找其他可能的成交量列...")
# 如果没找到当前成交量,尝试其他可能的列名
for col in df.columns:
if '' in str(col) and '累积' not in str(col) and '' not in str(col) and '' not in str(col):
current_volume_col = col
print(f"使用可能的成交量列: {col}")
break
if current_volume_col is None:
print("未找到合适的成交量列")
return
print(f"使用当前成交量列: {current_volume_col}")
# 获取成交价列名
price_col = None
for col in df.columns:
if '成交价' in str(col) or 'price' in str(col).lower():
price_col = col
break
if price_col is None:
print("未找到成交价列")
return
print(f"使用成交价列: {price_col}")
# 显示当前成交量的统计信息
print(f"\n当前成交量统计信息:")
print(f"最小值: {df[current_volume_col].min()}")
print(f"最大值: {df[current_volume_col].max()}")
print(f"平均值: {df[current_volume_col].mean():.2f}")
print(f"中位数: {df[current_volume_col].median():.2f}")
# 筛选当前成交量大于150的数据点
large_volume_mask = df[current_volume_col] > 150
large_volume_indices = df[large_volume_mask].index.tolist()
print(f"\n找到当前成交量大于150的数据点数量: {len(large_volume_indices)}")
if len(large_volume_indices) > 0:
large_volumes = df.loc[large_volume_indices, current_volume_col]
print(f"大成交量统计: 最小={large_volumes.min():.0f}, 最大={large_volumes.max():.0f}, 平均={large_volumes.mean():.0f}")
# 提取价格序列的函数
def extract_price_sequences(indices, max_points):
sequences = []
sequence_info = []
for idx in indices:
remaining_points = len(df) - idx - 1
take_points = min(max_points, remaining_points)
if take_points > 0:
base_price = df.loc[idx, price_col]
future_prices = df.loc[idx + 1: idx + take_points, price_col].values
price_changes = future_prices - base_price
sequences.append(price_changes)
sequence_info.append({
'start_index': idx,
'volume': df.loc[idx, current_volume_col],
'base_price': base_price,
'sequence_length': take_points
})
return sequences, sequence_info
# 分析不同时间长度的数据
analysis_lengths = [100, 200, 500] # 100, 200, 500个数据点
for length in analysis_lengths:
print(f"\n{'='*60}")
print(f"分析当前成交量>150后{length}个数据点的价格走势")
print(f"{'='*60}")
# 提取大成交量的价格序列
volume_sequences, volume_info = extract_price_sequences(large_volume_indices, length)
print(f"成功提取 {len(volume_sequences)} 个大成交量价格序列 (最大长度: {length})")
# 创建综合分析图表
fig, axes = plt.subplots(2, 2, figsize=(20, 16))
fig.suptitle(f'当前成交量>150的价格走势分析 (后{length}个数据点)',
fontsize=16, fontweight='bold')
# 1. 大成交量价格变化图(所有序列)
ax1 = axes[0, 0]
if volume_sequences:
# 使用渐变色
colors = plt.cm.viridis(np.linspace(0.3, 0.9, len(volume_sequences)))
for i, (sequence, info) in enumerate(zip(volume_sequences, volume_info)):
x_axis = range(len(sequence))
ax1.plot(x_axis, sequence, color=colors[i], alpha=0.6, linewidth=0.8)
ax1.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
ax1.set_xlabel('数据点序号')
ax1.set_ylabel('相对价格变化')
ax1.set_title(f'当前成交量>150的价格变化走势 (后{length}点)\n{len(volume_sequences)}个序列')
ax1.grid(True, alpha=0.3)
ax1.text(0.02, 0.98, f'序列数: {len(volume_sequences)}', transform=ax1.transAxes,
verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightgreen', alpha=0.8))
# 2. 按成交量大小分组显示前20% vs 后80%
ax2 = axes[0, 1]
if len(volume_info) > 0:
# 按成交量排序
sorted_indices = sorted(range(len(volume_info)), key=lambda i: volume_info[i]['volume'], reverse=True)
top_20_percent = max(1, len(sorted_indices) // 5) # 至少1个
top_sequences = [volume_sequences[i] for i in sorted_indices[:top_20_percent]]
bottom_sequences = [volume_sequences[i] for i in sorted_indices[top_20_percent:]]
# 显示最大的20%成交量序列(红色)
for i, sequence in enumerate(top_sequences):
x_axis = range(len(sequence))
ax2.plot(x_axis, sequence, color='red', alpha=0.7, linewidth=1.2,
label='最大20%成交量' if i == 0 else "")
# 显示较小的80%成交量序列(蓝色)
for i, sequence in enumerate(bottom_sequences):
x_axis = range(len(sequence))
ax2.plot(x_axis, sequence, color='blue', alpha=0.4, linewidth=0.6,
label='其他80%成交量' if i == 0 else "")
ax2.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
ax2.set_xlabel('数据点序号')
ax2.set_ylabel('相对价格变化')
ax2.set_title(f'按成交量大小分组的价格走势 (后{length}点)\n红色:最大20%({len(top_sequences) if len(volume_info) > 0 else 0}个) 蓝色:其他80%({len(bottom_sequences) if len(volume_info) > 0 else 0}个)')
ax2.grid(True, alpha=0.3)
if len(volume_info) > 0:
ax2.legend(fontsize=10)
# 3. 平均变化和置信区间
ax3 = axes[1, 0]
def calculate_avg_changes_and_std(sequences):
if not sequences:
return [], []
max_len = max(len(seq) for seq in sequences)
avg_changes = []
std_changes = []
for i in range(max_len):
point_changes = [seq[i] for seq in sequences if len(seq) > i]
if point_changes:
avg_changes.append(np.mean(point_changes))
std_changes.append(np.std(point_changes))
return avg_changes, std_changes
avg_changes, std_changes = calculate_avg_changes_and_std(volume_sequences)
if avg_changes:
x_axis = range(len(avg_changes))
ax3.plot(x_axis, avg_changes, color='green', linewidth=2.5, label=f'平均变化 (n={len(volume_sequences)})')
# 添加置信区间±1个标准差
upper_bound = [avg + std for avg, std in zip(avg_changes, std_changes)]
lower_bound = [avg - std for avg, std in zip(avg_changes, std_changes)]
ax3.fill_between(x_axis, lower_bound, upper_bound, alpha=0.3, color='green', label='±1标准差区间')
ax3.axhline(y=0, color='black', linestyle='--', alpha=0.7, linewidth=1.5)
ax3.set_xlabel('数据点序号')
ax3.set_ylabel('平均相对价格变化')
ax3.set_title(f'平均价格变化及置信区间 (后{length}点)')
ax3.legend(fontsize=12)
ax3.grid(True, alpha=0.3)
# 4. 统计信息文本框
ax4 = axes[1, 1]
ax4.axis('off')
# 计算统计信息
def calculate_stats(sequences):
if not sequences:
return {}
final_changes = [seq[-1] for seq in sequences if len(seq) > 0]
if final_changes:
return {
'count': len(sequences),
'avg_final_change': np.mean(final_changes),
'std_final_change': np.std(final_changes),
'max_rise': np.max(final_changes),
'max_fall': np.min(final_changes),
'positive_ratio': sum(1 for change in final_changes if change > 0) / len(final_changes),
'avg_max_gain': np.mean([np.max(seq) for seq in sequences if len(seq) > 0]),
'avg_max_loss': np.mean([np.min(seq) for seq in sequences if len(seq) > 0])
}
return {}
volume_stats = calculate_stats(volume_sequences)
# 显示统计信息
stats_text = f"=== 当前成交量>150 统计信息 (后{length}点) ===\n\n"
if volume_stats:
stats_text += f"序列数量: {volume_stats['count']}\n"
stats_text += f"平均最终变化: {volume_stats['avg_final_change']:.4f}\n"
stats_text += f"变化标准差: {volume_stats['std_final_change']:.4f}\n"
stats_text += f"最大上涨: {volume_stats['max_rise']:.4f}\n"
stats_text += f"最大下跌: {volume_stats['max_fall']:.4f}\n"
stats_text += f"上涨比例: {volume_stats['positive_ratio']:.1%}\n"
stats_text += f"平均最大获利: {volume_stats['avg_max_gain']:.4f}\n"
stats_text += f"平均最大亏损: {volume_stats['avg_max_loss']:.4f}\n\n"
# 添加关键时间点分析
if avg_changes:
stats_text += f"=== 关键时间点分析 ===\n"
if length >= 500:
points_to_check = [9, 49, 199, 499] # 第10、50、200、500点
point_names = ['第10点', '第50点', '第200点', '第500点']
elif length >= 200:
points_to_check = [9, 49, 199] # 第10、50、200点
point_names = ['第10点', '第50点', '第200点']
else:
points_to_check = [9, 49] # 第10、50点
point_names = ['第10点', '第50点']
for i, point in enumerate(points_to_check):
if point < len(avg_changes):
stats_text += f"{point_names[i]}: {avg_changes[point]:.4f}"
if point < len(std_changes):
stats_text += f"{std_changes[point]:.4f})"
stats_text += "\n"
# 添加成交量信息
if len(volume_info) > 0:
volumes = [info['volume'] for info in volume_info]
stats_text += f"\n=== 成交量信息 ===\n"
stats_text += f"成交量范围: {min(volumes):.0f} - {max(volumes):.0f}\n"
stats_text += f"平均成交量: {np.mean(volumes):.0f}\n"
stats_text += f"成交量中位数: {np.median(volumes):.0f}"
ax4.text(0.05, 0.95, stats_text, transform=ax4.transAxes, fontsize=11,
verticalalignment='top',
bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8))
plt.tight_layout()
# 保存综合图表
output_file = f'current_volume_comprehensive_analysis_{length}points.png'
plt.savefig(output_file, dpi=300, bbox_inches='tight')
print(f"\n{length}点综合分析图表已保存为: {output_file}")
print(f"完整路径: {os.path.abspath(output_file)}")
# 保存单独的大成交量走势图
fig_volume, ax_volume = plt.subplots(figsize=(15, 10))
if volume_sequences:
colors = plt.cm.viridis(np.linspace(0.3, 0.9, len(volume_sequences)))
for i, (sequence, info) in enumerate(zip(volume_sequences, volume_info)):
x_axis = range(len(sequence))
ax_volume.plot(x_axis, sequence, color=colors[i], alpha=0.6, linewidth=1)
ax_volume.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
ax_volume.set_xlabel(f'数据点序号 (相对于大成交量时刻, 后{length}点)', fontsize=12)
ax_volume.set_ylabel('相对价格变化 (相对于基准点)', fontsize=12)
ax_volume.set_title(f'当前成交量>150的数据点后{length}个相对价格变化走势\n所有序列从基准点(0)开始\n{len(volume_sequences)}个序列',
fontsize=14, fontweight='bold')
ax_volume.grid(True, alpha=0.3)
plt.tight_layout()
volume_output_file = f'current_volume_relative_price_changes_{length}points.png'
plt.savefig(volume_output_file, dpi=300, bbox_inches='tight')
print(f"大成交量{length}点分析图表已保存为: {volume_output_file}")
plt.close()
# 显示统计信息
print(f"\n{'='*50}")
print(f"详细统计信息 (后{length}点):")
print(f"{'='*50}")
if volume_stats:
print(f"\n【当前成交量>150】")
print(f"序列数量: {volume_stats['count']}")
print(f"平均最终变化: {volume_stats['avg_final_change']:.4f}")
print(f"变化标准差: {volume_stats['std_final_change']:.4f}")
print(f"最大上涨: {volume_stats['max_rise']:.4f}")
print(f"最大下跌: {volume_stats['max_fall']:.4f}")
print(f"上涨比例: {volume_stats['positive_ratio']:.1%}")
print(f"平均最大获利: {volume_stats['avg_max_gain']:.4f}")
print(f"平均最大亏损: {volume_stats['avg_max_loss']:.4f}")
# 关键时间点分析
if avg_changes:
print(f"\n关键时间点分析:")
if length >= 500:
key_points = [(9, '第10点'), (49, '第50点'), (199, '第200点'), (499, '第500点')]
elif length >= 200:
key_points = [(9, '第10点'), (49, '第50点'), (199, '第200点')]
else:
key_points = [(9, '第10点'), (49, '第50点')]
for point, name in key_points:
if point < len(avg_changes):
print(f"{name}: {avg_changes[point]:.4f}", end="")
if point < len(std_changes):
print(f"{std_changes[point]:.4f})")
else:
print()
plt.close('all') # 关闭所有图形以释放内存
print(f"\n{'='*60}")
print("所有分析完成!")
print(f"{'='*60}")
if __name__ == "__main__":
analyze_current_volume_extended()