## 核心功能 ### 1. 成交量序列分析 (volume_price_sequence.py) - 按累计成交量排序的价格趋势分析 - 三合一综合图表:价格序列+成交量分布+时间序列 - 关键价格水平自动标注 ### 2. 成交量分布深度分析 (volume_distribution_analysis.py) - 7种专业可视化图表 - 统计特征分析和分布拟合 - 交易模式识别和业务洞察 ### 3. 大额订单分析工具集 (large_orders/) - 买1/卖1量大单分析 (阈值99) - 买卖挂单合计分析 (阈值200) - 当前成交量分析 (阈值150) - 信号抑制优化算法 (38%抑制率) ## 技术特性 - 信号抑制算法:有效减少重复信号干扰 - 多维度分析:支持多种信号类型 - 专业可视化:四宫格综合分析图 - 业务洞察:基于数据的交易建议 ## 分析结果 - 卖1量大单:短期下跌,长期大幅上涨反转 - 买挂合计:各时间窗口小幅正收益 - 信号抑制:短期收益从-0.0778提升至+0.1347 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
391 lines
18 KiB
Python
391 lines
18 KiB
Python
import pandas as pd
|
||
import numpy as np
|
||
import matplotlib.pyplot as plt
|
||
import matplotlib as mpl
|
||
import os
|
||
|
||
# 设置中文字体
|
||
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']
|
||
plt.rcParams['axes.unicode_minus'] = False
|
||
|
||
def analyze_current_volume_optimized():
|
||
"""分析当前成交量大于150的数据点后不同长度的成交价走势(优化版:信号抑制)"""
|
||
|
||
print("正在读取数据文件...")
|
||
# 读取数据(从上级目录的data文件夹)
|
||
df = pd.read_parquet('../data/au2512_20251013.parquet')
|
||
|
||
print(f"数据总行数: {len(df)}")
|
||
print(f"数据列名: {df.columns.tolist()}")
|
||
|
||
# 查找当前成交量列的实际名称
|
||
current_volume_col = None
|
||
|
||
for col in df.columns:
|
||
if '当前成交量' in str(col) or 'cur_volume' in str(col).lower() or '成交量' in str(col):
|
||
if '累积' not in str(col): # 排除累积成交量
|
||
current_volume_col = col
|
||
break
|
||
|
||
if current_volume_col is None:
|
||
print("未找到当前成交量列,尝试查找其他可能的成交量列...")
|
||
# 如果没找到当前成交量,尝试其他可能的列名
|
||
for col in df.columns:
|
||
if '量' in str(col) and '累积' not in str(col) and '买' not in str(col) and '卖' not in str(col):
|
||
current_volume_col = col
|
||
print(f"使用可能的成交量列: {col}")
|
||
break
|
||
|
||
if current_volume_col is None:
|
||
print("未找到合适的成交量列")
|
||
return
|
||
|
||
print(f"使用当前成交量列: {current_volume_col}")
|
||
|
||
# 获取成交价列名
|
||
price_col = None
|
||
for col in df.columns:
|
||
if '成交价' in str(col) or 'price' in str(col).lower():
|
||
price_col = col
|
||
break
|
||
|
||
if price_col is None:
|
||
print("未找到成交价列")
|
||
return
|
||
|
||
print(f"使用成交价列: {price_col}")
|
||
|
||
# 显示当前成交量的统计信息
|
||
print(f"\n当前成交量统计信息:")
|
||
print(f"最小值: {df[current_volume_col].min()}")
|
||
print(f"最大值: {df[current_volume_col].max()}")
|
||
print(f"平均值: {df[current_volume_col].mean():.2f}")
|
||
print(f"中位数: {df[current_volume_col].median():.2f}")
|
||
|
||
# 筛选当前成交量大于150的数据点
|
||
large_volume_mask = df[current_volume_col] > 150
|
||
large_volume_indices = df[large_volume_mask].index.tolist()
|
||
|
||
print(f"\n找到当前成交量大于150的数据点数量: {len(large_volume_indices)}")
|
||
|
||
if len(large_volume_indices) > 0:
|
||
large_volumes = df.loc[large_volume_indices, current_volume_col]
|
||
print(f"大成交量统计: 最小={large_volumes.min():.0f}, 最大={large_volumes.max():.0f}, 平均={large_volumes.mean():.0f}")
|
||
|
||
# 信号抑制逻辑:移除20个数据点内的重复信号
|
||
def apply_signal_suppression(indices, suppression_window=20):
|
||
"""应用信号抑制逻辑,移除指定窗口内的重复信号"""
|
||
if not indices:
|
||
return []
|
||
|
||
# 按索引排序
|
||
sorted_indices = sorted(indices)
|
||
filtered_indices = []
|
||
suppressed_count = 0
|
||
|
||
for i, idx in enumerate(sorted_indices):
|
||
# 检查是否与前面的有效信号距离太近
|
||
is_suppressed = False
|
||
for prev_idx in filtered_indices:
|
||
if idx - prev_idx <= suppression_window:
|
||
is_suppressed = True
|
||
suppressed_count += 1
|
||
break
|
||
|
||
if not is_suppressed:
|
||
filtered_indices.append(idx)
|
||
|
||
return filtered_indices, suppressed_count
|
||
|
||
# 应用信号抑制
|
||
print("\n应用信号抑制逻辑(20个数据点窗口)...")
|
||
filtered_indices, suppressed_count = apply_signal_suppression(large_volume_indices, 20)
|
||
|
||
print(f"原始信号数量: {len(large_volume_indices)}")
|
||
print(f"抑制后信号数量: {len(filtered_indices)}")
|
||
print(f"被抑制的信号数量: {suppressed_count}")
|
||
print(f"抑制率: {suppressed_count/len(large_volume_indices)*100:.1f}%")
|
||
|
||
if len(filtered_indices) > 0:
|
||
filtered_volumes = df.loc[filtered_indices, current_volume_col]
|
||
print(f"过滤后大成交量统计: 最小={filtered_volumes.min():.0f}, 最大={filtered_volumes.max():.0f}, 平均={filtered_volumes.mean():.0f}")
|
||
|
||
# 提取价格序列的函数
|
||
def extract_price_sequences(indices, max_points):
|
||
sequences = []
|
||
sequence_info = []
|
||
|
||
for idx in indices:
|
||
remaining_points = len(df) - idx - 1
|
||
take_points = min(max_points, remaining_points)
|
||
|
||
if take_points > 0:
|
||
base_price = df.loc[idx, price_col]
|
||
future_prices = df.loc[idx + 1: idx + take_points, price_col].values
|
||
price_changes = future_prices - base_price
|
||
sequences.append(price_changes)
|
||
|
||
sequence_info.append({
|
||
'start_index': idx,
|
||
'volume': df.loc[idx, current_volume_col],
|
||
'base_price': base_price,
|
||
'sequence_length': take_points
|
||
})
|
||
|
||
return sequences, sequence_info
|
||
|
||
# 分析不同时间长度的数据
|
||
analysis_lengths = [100, 200, 500] # 100, 200, 500个数据点
|
||
|
||
for length in analysis_lengths:
|
||
print(f"\n{'='*60}")
|
||
print(f"分析当前成交量>150后{length}个数据点的价格走势(优化版)")
|
||
print(f"{'='*60}")
|
||
|
||
# 提取过滤后大成交量的价格序列
|
||
volume_sequences, volume_info = extract_price_sequences(filtered_indices, length)
|
||
|
||
print(f"成功提取 {len(volume_sequences)} 个过滤后大成交量价格序列 (最大长度: {length})")
|
||
|
||
# 创建综合分析图表
|
||
fig, axes = plt.subplots(2, 2, figsize=(20, 16))
|
||
fig.suptitle(f'当前成交量>150的价格走势分析(优化版)(后{length}个数据点)\n原始信号:{len(large_volume_indices)}个 → 过滤后:{len(filtered_indices)}个 (抑制{suppressed_count}个)',
|
||
fontsize=14, fontweight='bold')
|
||
|
||
# 1. 过滤后大成交量价格变化图(所有序列)
|
||
ax1 = axes[0, 0]
|
||
if volume_sequences:
|
||
# 使用渐变色
|
||
colors = plt.cm.viridis(np.linspace(0.3, 0.9, len(volume_sequences)))
|
||
for i, (sequence, info) in enumerate(zip(volume_sequences, volume_info)):
|
||
x_axis = range(len(sequence))
|
||
ax1.plot(x_axis, sequence, color=colors[i], alpha=0.6, linewidth=0.8)
|
||
|
||
ax1.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
|
||
ax1.set_xlabel('数据点序号')
|
||
ax1.set_ylabel('相对价格变化')
|
||
ax1.set_title(f'过滤后价格变化走势 (后{length}点)\n共{len(volume_sequences)}个有效序列')
|
||
ax1.grid(True, alpha=0.3)
|
||
|
||
# 添加信号抑制信息
|
||
suppression_text = f'原始: {len(large_volume_indices)}个\n过滤: {len(filtered_indices)}个\n抑制: {suppressed_count}个'
|
||
ax1.text(0.02, 0.98, suppression_text, transform=ax1.transAxes,
|
||
verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightgreen', alpha=0.8))
|
||
|
||
# 2. 按成交量大小分组显示(前20% vs 后80%)
|
||
ax2 = axes[0, 1]
|
||
if len(volume_info) > 0:
|
||
# 按成交量排序
|
||
sorted_indices = sorted(range(len(volume_info)), key=lambda i: volume_info[i]['volume'], reverse=True)
|
||
top_20_percent = max(1, len(sorted_indices) // 5) # 至少1个
|
||
|
||
top_sequences = [volume_sequences[i] for i in sorted_indices[:top_20_percent]]
|
||
bottom_sequences = [volume_sequences[i] for i in sorted_indices[top_20_percent:]]
|
||
|
||
# 显示最大的20%成交量序列(红色)
|
||
for i, sequence in enumerate(top_sequences):
|
||
x_axis = range(len(sequence))
|
||
ax2.plot(x_axis, sequence, color='red', alpha=0.7, linewidth=1.2,
|
||
label='最大20%成交量' if i == 0 else "")
|
||
|
||
# 显示较小的80%成交量序列(蓝色)
|
||
for i, sequence in enumerate(bottom_sequences):
|
||
x_axis = range(len(sequence))
|
||
ax2.plot(x_axis, sequence, color='blue', alpha=0.4, linewidth=0.6,
|
||
label='其他80%成交量' if i == 0 else "")
|
||
|
||
ax2.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
|
||
ax2.set_xlabel('数据点序号')
|
||
ax2.set_ylabel('相对价格变化')
|
||
ax2.set_title(f'按成交量大小分组的价格走势 (后{length}点)\n红色:最大20%({len(top_sequences) if len(volume_info) > 0 else 0}个) 蓝色:其他80%({len(bottom_sequences) if len(volume_info) > 0 else 0}个)')
|
||
ax2.grid(True, alpha=0.3)
|
||
if len(volume_info) > 0:
|
||
ax2.legend(fontsize=10)
|
||
|
||
# 3. 平均变化和置信区间
|
||
ax3 = axes[1, 0]
|
||
|
||
def calculate_avg_changes_and_std(sequences):
|
||
if not sequences:
|
||
return [], []
|
||
max_len = max(len(seq) for seq in sequences)
|
||
avg_changes = []
|
||
std_changes = []
|
||
for i in range(max_len):
|
||
point_changes = [seq[i] for seq in sequences if len(seq) > i]
|
||
if point_changes:
|
||
avg_changes.append(np.mean(point_changes))
|
||
std_changes.append(np.std(point_changes))
|
||
return avg_changes, std_changes
|
||
|
||
avg_changes, std_changes = calculate_avg_changes_and_std(volume_sequences)
|
||
|
||
if avg_changes:
|
||
x_axis = range(len(avg_changes))
|
||
ax3.plot(x_axis, avg_changes, color='green', linewidth=2.5, label=f'平均变化 (n={len(volume_sequences)})')
|
||
|
||
# 添加置信区间(±1个标准差)
|
||
upper_bound = [avg + std for avg, std in zip(avg_changes, std_changes)]
|
||
lower_bound = [avg - std for avg, std in zip(avg_changes, std_changes)]
|
||
ax3.fill_between(x_axis, lower_bound, upper_bound, alpha=0.3, color='green', label='±1标准差区间')
|
||
|
||
ax3.axhline(y=0, color='black', linestyle='--', alpha=0.7, linewidth=1.5)
|
||
ax3.set_xlabel('数据点序号')
|
||
ax3.set_ylabel('平均相对价格变化')
|
||
ax3.set_title(f'平均价格变化及置信区间 (后{length}点)')
|
||
ax3.legend(fontsize=12)
|
||
ax3.grid(True, alpha=0.3)
|
||
|
||
# 4. 统计信息文本框
|
||
ax4 = axes[1, 1]
|
||
ax4.axis('off')
|
||
|
||
# 计算统计信息
|
||
def calculate_stats(sequences):
|
||
if not sequences:
|
||
return {}
|
||
final_changes = [seq[-1] for seq in sequences if len(seq) > 0]
|
||
if final_changes:
|
||
return {
|
||
'count': len(sequences),
|
||
'avg_final_change': np.mean(final_changes),
|
||
'std_final_change': np.std(final_changes),
|
||
'max_rise': np.max(final_changes),
|
||
'max_fall': np.min(final_changes),
|
||
'positive_ratio': sum(1 for change in final_changes if change > 0) / len(final_changes),
|
||
'avg_max_gain': np.mean([np.max(seq) for seq in sequences if len(seq) > 0]),
|
||
'avg_max_loss': np.mean([np.min(seq) for seq in sequences if len(seq) > 0])
|
||
}
|
||
return {}
|
||
|
||
volume_stats = calculate_stats(volume_sequences)
|
||
|
||
# 显示统计信息
|
||
stats_text = f"=== 当前成交量>150 统计信息 (后{length}点) ===\n"
|
||
stats_text += f"信号抑制效果:\n"
|
||
stats_text += f" 原始信号: {len(large_volume_indices)}个\n"
|
||
stats_text += f" 过滤信号: {len(filtered_indices)}个\n"
|
||
stats_text += f" 抑制数量: {suppressed_count}个\n"
|
||
stats_text += f" 抑制率: {suppressed_count/len(large_volume_indices)*100:.1f}%\n\n"
|
||
|
||
if volume_stats:
|
||
stats_text += f"价格统计:\n"
|
||
stats_text += f" 序列数量: {volume_stats['count']}\n"
|
||
stats_text += f" 平均最终变化: {volume_stats['avg_final_change']:.4f}\n"
|
||
stats_text += f" 变化标准差: {volume_stats['std_final_change']:.4f}\n"
|
||
stats_text += f" 最大上涨: {volume_stats['max_rise']:.4f}\n"
|
||
stats_text += f" 最大下跌: {volume_stats['max_fall']:.4f}\n"
|
||
stats_text += f" 上涨比例: {volume_stats['positive_ratio']:.1%}\n"
|
||
stats_text += f" 平均最大获利: {volume_stats['avg_max_gain']:.4f}\n"
|
||
stats_text += f" 平均最大亏损: {volume_stats['avg_max_loss']:.4f}\n\n"
|
||
|
||
# 添加关键时间点分析
|
||
if avg_changes:
|
||
stats_text += f"=== 关键时间点分析 ===\n"
|
||
if length >= 500:
|
||
points_to_check = [9, 49, 199, 499] # 第10、50、200、500点
|
||
point_names = ['第10点', '第50点', '第200点', '第500点']
|
||
elif length >= 200:
|
||
points_to_check = [9, 49, 199] # 第10、50、200点
|
||
point_names = ['第10点', '第50点', '第200点']
|
||
else:
|
||
points_to_check = [9, 49] # 第10、50点
|
||
point_names = ['第10点', '第50点']
|
||
|
||
for i, point in enumerate(points_to_check):
|
||
if point < len(avg_changes):
|
||
stats_text += f" {point_names[i]}: {avg_changes[point]:.4f}"
|
||
if point < len(std_changes):
|
||
stats_text += f" (±{std_changes[point]:.4f})"
|
||
stats_text += "\n"
|
||
|
||
# 添加成交量信息
|
||
if len(volume_info) > 0:
|
||
volumes = [info['volume'] for info in volume_info]
|
||
stats_text += f"\n=== 成交量信息 ===\n"
|
||
stats_text += f" 成交量范围: {min(volumes):.0f} - {max(volumes):.0f}\n"
|
||
stats_text += f" 平均成交量: {np.mean(volumes):.0f}\n"
|
||
stats_text += f" 成交量中位数: {np.median(volumes):.0f}"
|
||
|
||
ax4.text(0.05, 0.95, stats_text, transform=ax4.transAxes, fontsize=10,
|
||
verticalalignment='top',
|
||
bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8))
|
||
|
||
plt.tight_layout()
|
||
|
||
# 保存综合图表
|
||
output_file = f'current_volume_optimized_comprehensive_analysis_{length}points.png'
|
||
plt.savefig(output_file, dpi=300, bbox_inches='tight')
|
||
print(f"\n{length}点优化版综合分析图表已保存为: {output_file}")
|
||
print(f"完整路径: {os.path.abspath(output_file)}")
|
||
|
||
# 保存单独的过滤后大成交量走势图
|
||
fig_volume, ax_volume = plt.subplots(figsize=(15, 10))
|
||
if volume_sequences:
|
||
colors = plt.cm.viridis(np.linspace(0.3, 0.9, len(volume_sequences)))
|
||
for i, (sequence, info) in enumerate(zip(volume_sequences, volume_info)):
|
||
x_axis = range(len(sequence))
|
||
ax_volume.plot(x_axis, sequence, color=colors[i], alpha=0.6, linewidth=1)
|
||
|
||
ax_volume.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
|
||
ax_volume.set_xlabel(f'数据点序号 (相对于过滤后大成交量时刻, 后{length}点)', fontsize=12)
|
||
ax_volume.set_ylabel('相对价格变化 (相对于基准点)', fontsize=12)
|
||
ax_volume.set_title(f'当前成交量>150过滤后数据点{length}个相对价格变化走势\n信号抑制: {len(large_volume_indices)}→{len(filtered_indices)} (抑制{suppressed_count}个)\n共{len(volume_sequences)}个有效序列',
|
||
fontsize=14, fontweight='bold')
|
||
ax_volume.grid(True, alpha=0.3)
|
||
plt.tight_layout()
|
||
|
||
volume_output_file = f'current_volume_optimized_relative_price_changes_{length}points.png'
|
||
plt.savefig(volume_output_file, dpi=300, bbox_inches='tight')
|
||
print(f"过滤后大成交量{length}点分析图表已保存为: {volume_output_file}")
|
||
plt.close()
|
||
|
||
# 显示统计信息
|
||
print(f"\n{'='*50}")
|
||
print(f"详细统计信息 (后{length}点) - 优化版:")
|
||
print(f"{'='*50}")
|
||
|
||
print(f"\n【信号抑制效果】")
|
||
print(f"原始信号数量: {len(large_volume_indices)}")
|
||
print(f"过滤后信号数量: {len(filtered_indices)}")
|
||
print(f"被抑制信号数量: {suppressed_count}")
|
||
print(f"信号抑制率: {suppressed_count/len(large_volume_indices)*100:.1f}%")
|
||
|
||
if volume_stats:
|
||
print(f"\n【过滤后价格统计】")
|
||
print(f"序列数量: {volume_stats['count']}")
|
||
print(f"平均最终变化: {volume_stats['avg_final_change']:.4f}")
|
||
print(f"变化标准差: {volume_stats['std_final_change']:.4f}")
|
||
print(f"最大上涨: {volume_stats['max_rise']:.4f}")
|
||
print(f"最大下跌: {volume_stats['max_fall']:.4f}")
|
||
print(f"上涨比例: {volume_stats['positive_ratio']:.1%}")
|
||
print(f"平均最大获利: {volume_stats['avg_max_gain']:.4f}")
|
||
print(f"平均最大亏损: {volume_stats['avg_max_loss']:.4f}")
|
||
|
||
# 关键时间点分析
|
||
if avg_changes:
|
||
print(f"\n关键时间点分析:")
|
||
if length >= 500:
|
||
key_points = [(9, '第10点'), (49, '第50点'), (199, '第200点'), (499, '第500点')]
|
||
elif length >= 200:
|
||
key_points = [(9, '第10点'), (49, '第50点'), (199, '第200点')]
|
||
else:
|
||
key_points = [(9, '第10点'), (49, '第50点')]
|
||
|
||
for point, name in key_points:
|
||
if point < len(avg_changes):
|
||
print(f"{name}: {avg_changes[point]:.4f}", end="")
|
||
if point < len(std_changes):
|
||
print(f" (±{std_changes[point]:.4f})")
|
||
else:
|
||
print()
|
||
|
||
plt.close('all') # 关闭所有图形以释放内存
|
||
|
||
print(f"\n{'='*60}")
|
||
print("优化版分析完成!")
|
||
print("信号抑制逻辑已成功应用,减少了重复信号的影响。")
|
||
print(f"{'='*60}")
|
||
|
||
if __name__ == "__main__":
|
||
analyze_current_volume_optimized() |