## 核心功能 ### 1. 成交量序列分析 (volume_price_sequence.py) - 按累计成交量排序的价格趋势分析 - 三合一综合图表:价格序列+成交量分布+时间序列 - 关键价格水平自动标注 ### 2. 成交量分布深度分析 (volume_distribution_analysis.py) - 7种专业可视化图表 - 统计特征分析和分布拟合 - 交易模式识别和业务洞察 ### 3. 大额订单分析工具集 (large_orders/) - 买1/卖1量大单分析 (阈值99) - 买卖挂单合计分析 (阈值200) - 当前成交量分析 (阈值150) - 信号抑制优化算法 (38%抑制率) ## 技术特性 - 信号抑制算法:有效减少重复信号干扰 - 多维度分析:支持多种信号类型 - 专业可视化:四宫格综合分析图 - 业务洞察:基于数据的交易建议 ## 分析结果 - 卖1量大单:短期下跌,长期大幅上涨反转 - 买挂合计:各时间窗口小幅正收益 - 信号抑制:短期收益从-0.0778提升至+0.1347 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
347 lines
15 KiB
Python
347 lines
15 KiB
Python
import pandas as pd
|
||
import numpy as np
|
||
import matplotlib.pyplot as plt
|
||
import matplotlib as mpl
|
||
import os
|
||
|
||
# 设置中文字体
|
||
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']
|
||
plt.rcParams['axes.unicode_minus'] = False
|
||
|
||
def analyze_total_orders_extended():
|
||
"""分析买挂合计和卖挂合计大于200的数据点后不同长度的成交价走势"""
|
||
|
||
print("正在读取数据文件...")
|
||
# 读取数据(从上级目录的data文件夹)
|
||
df = pd.read_parquet('../data/au2512_20251013.parquet')
|
||
|
||
print(f"数据总行数: {len(df)}")
|
||
print(f"数据列名: {df.columns.tolist()}")
|
||
|
||
# 查找买1-5量和卖1-5量列的实际名称
|
||
buy_vol_cols = []
|
||
sell_vol_cols = []
|
||
|
||
for col in df.columns:
|
||
if '买' in str(col) and '量' in str(col):
|
||
buy_vol_cols.append(col)
|
||
if '卖' in str(col) and '量' in str(col):
|
||
sell_vol_cols.append(col)
|
||
|
||
print(f"找到买量列: {buy_vol_cols}")
|
||
print(f"找到卖量列: {sell_vol_cols}")
|
||
|
||
if len(buy_vol_cols) < 5:
|
||
print("警告: 未找到完整的买1-5量列")
|
||
return
|
||
if len(sell_vol_cols) < 5:
|
||
print("警告: 未找到完整的卖1-5量列")
|
||
return
|
||
|
||
# 获取成交价列名
|
||
price_col = None
|
||
for col in df.columns:
|
||
if '成交价' in str(col) or 'price' in str(col).lower():
|
||
price_col = col
|
||
break
|
||
|
||
if price_col is None:
|
||
print("未找到成交价列")
|
||
return
|
||
|
||
print(f"使用成交价列: {price_col}")
|
||
|
||
# 计算买挂合计和卖挂合计
|
||
print("正在计算买挂合计和卖挂合计...")
|
||
df['买挂合计'] = df[buy_vol_cols].sum(axis=1)
|
||
df['卖挂合计'] = df[sell_vol_cols].sum(axis=1)
|
||
|
||
# 筛选大额买挂和卖挂订单
|
||
large_buy_mask = df['买挂合计'] > 200
|
||
large_sell_mask = df['卖挂合计'] > 200
|
||
|
||
large_buy_indices = df[large_buy_mask].index.tolist()
|
||
large_sell_indices = df[large_sell_mask].index.tolist()
|
||
|
||
print(f"找到买挂合计大于200的数据点数量: {len(large_buy_indices)}")
|
||
print(f"找到卖挂合计大于200的数据点数量: {len(large_sell_indices)}")
|
||
|
||
# 显示一些统计信息
|
||
if len(large_buy_indices) > 0:
|
||
buy_total_volumes = df.loc[large_buy_indices, '买挂合计']
|
||
print(f"买挂合计统计: 最小={buy_total_volumes.min():.0f}, 最大={buy_total_volumes.max():.0f}, 平均={buy_total_volumes.mean():.0f}")
|
||
|
||
if len(large_sell_indices) > 0:
|
||
sell_total_volumes = df.loc[large_sell_indices, '卖挂合计']
|
||
print(f"卖挂合计统计: 最小={sell_total_volumes.min():.0f}, 最大={sell_total_volumes.max():.0f}, 平均={sell_total_volumes.mean():.0f}")
|
||
|
||
# 提取价格序列的函数
|
||
def extract_price_sequences(indices, order_type, max_points):
|
||
sequences = []
|
||
sequence_info = []
|
||
|
||
for idx in indices:
|
||
remaining_points = len(df) - idx - 1
|
||
take_points = min(max_points, remaining_points)
|
||
|
||
if take_points > 0:
|
||
base_price = df.loc[idx, price_col]
|
||
future_prices = df.loc[idx + 1: idx + take_points, price_col].values
|
||
price_changes = future_prices - base_price
|
||
sequences.append(price_changes)
|
||
|
||
volume_col = '买挂合计' if order_type == 'buy' else '卖挂合计'
|
||
sequence_info.append({
|
||
'start_index': idx,
|
||
'volume': df.loc[idx, volume_col],
|
||
'base_price': base_price,
|
||
'sequence_length': take_points
|
||
})
|
||
|
||
return sequences, sequence_info
|
||
|
||
# 分析不同时间长度的数据
|
||
analysis_lengths = [100, 200, 500] # 100, 200, 500个数据点
|
||
|
||
for length in analysis_lengths:
|
||
print(f"\n{'='*60}")
|
||
print(f"分析后{length}个数据点的价格走势")
|
||
print(f"{'='*60}")
|
||
|
||
# 提取买挂和卖挂的价格序列
|
||
buy_sequences, buy_info = extract_price_sequences(large_buy_indices, 'buy', length)
|
||
sell_sequences, sell_info = extract_price_sequences(large_sell_indices, 'sell', length)
|
||
|
||
print(f"成功提取 {len(buy_sequences)} 个买挂价格序列 (最大长度: {length})")
|
||
print(f"成功提取 {len(sell_sequences)} 个卖挂价格序列 (最大长度: {length})")
|
||
|
||
# 创建综合对比图表
|
||
fig, axes = plt.subplots(2, 2, figsize=(20, 16))
|
||
fig.suptitle(f'大额订单对比分析:买挂合计>200 vs 卖挂合计>200 (后{length}个数据点)',
|
||
fontsize=16, fontweight='bold')
|
||
|
||
# 1. 买挂价格变化图
|
||
ax1 = axes[0, 0]
|
||
if buy_sequences:
|
||
colors_buy = plt.cm.Blues(np.linspace(0.3, 0.9, len(buy_sequences)))
|
||
for i, (sequence, info) in enumerate(zip(buy_sequences, buy_info)):
|
||
x_axis = range(len(sequence))
|
||
ax1.plot(x_axis, sequence, color=colors_buy[i], alpha=0.6, linewidth=0.8)
|
||
|
||
ax1.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
|
||
ax1.set_xlabel('数据点序号')
|
||
ax1.set_ylabel('相对价格变化')
|
||
ax1.set_title(f'买挂合计>200的价格变化走势 (后{length}点)\n共{len(buy_sequences)}个序列')
|
||
ax1.grid(True, alpha=0.3)
|
||
ax1.text(0.02, 0.98, f'序列数: {len(buy_sequences)}', transform=ax1.transAxes,
|
||
verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.8))
|
||
|
||
# 2. 卖挂价格变化图
|
||
ax2 = axes[0, 1]
|
||
if sell_sequences:
|
||
colors_sell = plt.cm.Reds(np.linspace(0.3, 0.9, len(sell_sequences)))
|
||
for i, (sequence, info) in enumerate(zip(sell_sequences, sell_info)):
|
||
x_axis = range(len(sequence))
|
||
ax2.plot(x_axis, sequence, color=colors_sell[i], alpha=0.6, linewidth=0.8)
|
||
|
||
ax2.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
|
||
ax2.set_xlabel('数据点序号')
|
||
ax2.set_ylabel('相对价格变化')
|
||
ax2.set_title(f'卖挂合计>200的价格变化走势 (后{length}点)\n共{len(sell_sequences)}个序列')
|
||
ax2.grid(True, alpha=0.3)
|
||
ax2.text(0.02, 0.98, f'序列数: {len(sell_sequences)}', transform=ax2.transAxes,
|
||
verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightcoral', alpha=0.8))
|
||
|
||
# 3. 平均变化对比图
|
||
ax3 = axes[1, 0]
|
||
|
||
def calculate_avg_changes(sequences):
|
||
if not sequences:
|
||
return []
|
||
max_len = max(len(seq) for seq in sequences)
|
||
avg_changes = []
|
||
for i in range(max_len):
|
||
point_changes = [seq[i] for seq in sequences if len(seq) > i]
|
||
if point_changes:
|
||
avg_changes.append(np.mean(point_changes))
|
||
return avg_changes
|
||
|
||
buy_avg_changes = calculate_avg_changes(buy_sequences)
|
||
sell_avg_changes = calculate_avg_changes(sell_sequences)
|
||
|
||
if buy_avg_changes:
|
||
ax3.plot(range(len(buy_avg_changes)), buy_avg_changes,
|
||
color='blue', linewidth=2.5, label=f'买挂合计>200 (n={len(buy_sequences)})')
|
||
if sell_avg_changes:
|
||
ax3.plot(range(len(sell_avg_changes)), sell_avg_changes,
|
||
color='red', linewidth=2.5, label=f'卖挂合计>200 (n={len(sell_sequences)})')
|
||
|
||
ax3.axhline(y=0, color='black', linestyle='--', alpha=0.7, linewidth=1.5)
|
||
ax3.set_xlabel('数据点序号')
|
||
ax3.set_ylabel('平均相对价格变化')
|
||
ax3.set_title(f'平均价格变化对比 (后{length}点)')
|
||
ax3.legend(fontsize=12)
|
||
ax3.grid(True, alpha=0.3)
|
||
|
||
# 4. 统计信息文本框
|
||
ax4 = axes[1, 1]
|
||
ax4.axis('off')
|
||
|
||
# 计算统计信息
|
||
def calculate_stats(sequences, name):
|
||
if not sequences:
|
||
return {}
|
||
final_changes = [seq[-1] for seq in sequences if len(seq) > 0]
|
||
if final_changes:
|
||
return {
|
||
'name': name,
|
||
'count': len(sequences),
|
||
'avg_final_change': np.mean(final_changes),
|
||
'std_final_change': np.std(final_changes),
|
||
'max_rise': np.max(final_changes),
|
||
'max_fall': np.min(final_changes)
|
||
}
|
||
return {}
|
||
|
||
buy_stats = calculate_stats(buy_sequences, '买挂合计>200')
|
||
sell_stats = calculate_stats(sell_sequences, '卖挂合计>200')
|
||
|
||
# 显示统计信息
|
||
stats_text = f"=== 统计信息对比 (后{length}点) ===\n\n"
|
||
|
||
if buy_stats:
|
||
stats_text += f"【买挂合计>200】\n"
|
||
stats_text += f"序列数量: {buy_stats['count']}\n"
|
||
stats_text += f"平均最终变化: {buy_stats['avg_final_change']:.4f}\n"
|
||
stats_text += f"变化标准差: {buy_stats['std_final_change']:.4f}\n"
|
||
stats_text += f"最大上涨: {buy_stats['max_rise']:.4f}\n"
|
||
stats_text += f"最大下跌: {buy_stats['max_fall']:.4f}\n\n"
|
||
|
||
if sell_stats:
|
||
stats_text += f"【卖挂合计>200】\n"
|
||
stats_text += f"序列数量: {sell_stats['count']}\n"
|
||
stats_text += f"平均最终变化: {sell_stats['avg_final_change']:.4f}\n"
|
||
stats_text += f"变化标准差: {sell_stats['std_final_change']:.4f}\n"
|
||
stats_text += f"最大上涨: {sell_stats['max_rise']:.4f}\n"
|
||
stats_text += f"最大下跌: {sell_stats['max_fall']:.4f}\n\n"
|
||
|
||
# 添加关键时间点对比
|
||
if buy_avg_changes and sell_avg_changes:
|
||
stats_text += f"=== 关键时间点对比 (后{length}点) ===\n"
|
||
# 根据数据长度选择关键点
|
||
if length >= 500:
|
||
points_to_check = [49, 199, 499] # 第50、200、500点
|
||
point_names = ['第50点', '第200点', '第500点']
|
||
elif length >= 200:
|
||
points_to_check = [49, 199] # 第50、200点
|
||
point_names = ['第50点', '第200点']
|
||
else:
|
||
points_to_check = [49] # 第50点
|
||
point_names = ['第50点']
|
||
|
||
for i, point in enumerate(points_to_check):
|
||
if point < len(buy_avg_changes) and point < len(sell_avg_changes):
|
||
stats_text += f"{point_names[i]}: "
|
||
stats_text += f"买挂={buy_avg_changes[point]:.4f}, "
|
||
stats_text += f"卖挂={sell_avg_changes[point]:.4f}, "
|
||
stats_text += f"差值={buy_avg_changes[point] - sell_avg_changes[point]:.4f}\n"
|
||
|
||
ax4.text(0.05, 0.95, stats_text, transform=ax4.transAxes, fontsize=11,
|
||
verticalalignment='top',
|
||
bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8))
|
||
|
||
plt.tight_layout()
|
||
|
||
# 保存综合图表
|
||
output_file = f'total_orders_comprehensive_analysis_{length}points.png'
|
||
plt.savefig(output_file, dpi=300, bbox_inches='tight')
|
||
print(f"\n{length}点综合分析图表已保存为: {output_file}")
|
||
print(f"完整路径: {os.path.abspath(output_file)}")
|
||
|
||
# 保存单独的买挂和卖挂图表
|
||
# 买挂单独图表
|
||
fig_buy, ax_buy = plt.subplots(figsize=(15, 10))
|
||
if buy_sequences:
|
||
colors_buy = plt.cm.Blues(np.linspace(0.3, 0.9, len(buy_sequences)))
|
||
for i, (sequence, info) in enumerate(zip(buy_sequences, buy_info)):
|
||
x_axis = range(len(sequence))
|
||
ax_buy.plot(x_axis, sequence, color=colors_buy[i], alpha=0.6, linewidth=1)
|
||
|
||
ax_buy.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
|
||
ax_buy.set_xlabel(f'数据点序号 (相对于大额买挂订单, 后{length}点)', fontsize=12)
|
||
ax_buy.set_ylabel('相对价格变化 (相对于基准点)', fontsize=12)
|
||
ax_buy.set_title(f'买挂合计>200的数据点后{length}个相对价格变化走势\n所有序列从基准点(0)开始\n共{len(buy_sequences)}个序列',
|
||
fontsize=14, fontweight='bold')
|
||
ax_buy.grid(True, alpha=0.3)
|
||
plt.tight_layout()
|
||
|
||
buy_output_file = f'total_buy_relative_price_changes_{length}points.png'
|
||
plt.savefig(buy_output_file, dpi=300, bbox_inches='tight')
|
||
print(f"买挂{length}点分析图表已保存为: {buy_output_file}")
|
||
plt.close()
|
||
|
||
# 卖挂单独图表
|
||
fig_sell, ax_sell = plt.subplots(figsize=(15, 10))
|
||
if sell_sequences:
|
||
colors_sell = plt.cm.Reds(np.linspace(0.3, 0.9, len(sell_sequences)))
|
||
for i, (sequence, info) in enumerate(zip(sell_sequences, sell_info)):
|
||
x_axis = range(len(sequence))
|
||
ax_sell.plot(x_axis, sequence, color=colors_sell[i], alpha=0.6, linewidth=1)
|
||
|
||
ax_sell.axhline(y=0, color='red', linestyle='--', alpha=0.7, linewidth=1.5)
|
||
ax_sell.set_xlabel(f'数据点序号 (相对于大额卖挂订单, 后{length}点)', fontsize=12)
|
||
ax_sell.set_ylabel('相对价格变化 (相对于基准点)', fontsize=12)
|
||
ax_sell.set_title(f'卖挂合计>200的数据点后{length}个相对价格变化走势\n所有序列从基准点(0)开始\n共{len(sell_sequences)}个序列',
|
||
fontsize=14, fontweight='bold')
|
||
ax_sell.grid(True, alpha=0.3)
|
||
plt.tight_layout()
|
||
|
||
sell_output_file = f'total_sell_relative_price_changes_{length}points.png'
|
||
plt.savefig(sell_output_file, dpi=300, bbox_inches='tight')
|
||
print(f"卖挂{length}点分析图表已保存为: {sell_output_file}")
|
||
plt.close()
|
||
|
||
# 显示统计信息
|
||
print(f"\n{'='*50}")
|
||
print(f"详细统计信息 (后{length}点):")
|
||
print(f"{'='*50}")
|
||
|
||
if buy_stats:
|
||
print(f"\n【买挂合计>200】")
|
||
print(f"序列数量: {buy_stats['count']}")
|
||
print(f"平均最终变化: {buy_stats['avg_final_change']:.4f}")
|
||
print(f"变化标准差: {buy_stats['std_final_change']:.4f}")
|
||
print(f"最大上涨: {buy_stats['max_rise']:.4f}")
|
||
print(f"最大下跌: {buy_stats['max_fall']:.4f}")
|
||
|
||
if sell_stats:
|
||
print(f"\n【卖挂合计>200】")
|
||
print(f"序列数量: {sell_stats['count']}")
|
||
print(f"平均最终变化: {sell_stats['avg_final_change']:.4f}")
|
||
print(f"变化标准差: {sell_stats['std_final_change']:.4f}")
|
||
print(f"最大上涨: {sell_stats['max_rise']:.4f}")
|
||
print(f"最大下跌: {sell_stats['max_fall']:.4f}")
|
||
|
||
# 关键时间点分析
|
||
if buy_avg_changes and sell_avg_changes:
|
||
print(f"\n关键时间点对比:")
|
||
if length >= 500:
|
||
key_points = [(49, '第50点'), (199, '第200点'), (499, '第500点')]
|
||
elif length >= 200:
|
||
key_points = [(49, '第50点'), (199, '第200点')]
|
||
else:
|
||
key_points = [(49, '第50点')]
|
||
|
||
for point, name in key_points:
|
||
if point < len(buy_avg_changes) and point < len(sell_avg_changes):
|
||
diff = buy_avg_changes[point] - sell_avg_changes[point]
|
||
print(f"{name}: 买挂={buy_avg_changes[point]:.4f}, 卖挂={sell_avg_changes[point]:.4f}, 差值={diff:.4f}")
|
||
|
||
plt.close('all') # 关闭所有图形以释放内存
|
||
|
||
print(f"\n{'='*60}")
|
||
print("所有分析完成!")
|
||
print(f"{'='*60}")
|
||
|
||
if __name__ == "__main__":
|
||
analyze_total_orders_extended() |