## 核心功能 ### 1. 成交量序列分析 (volume_price_sequence.py) - 按累计成交量排序的价格趋势分析 - 三合一综合图表:价格序列+成交量分布+时间序列 - 关键价格水平自动标注 ### 2. 成交量分布深度分析 (volume_distribution_analysis.py) - 7种专业可视化图表 - 统计特征分析和分布拟合 - 交易模式识别和业务洞察 ### 3. 大额订单分析工具集 (large_orders/) - 买1/卖1量大单分析 (阈值99) - 买卖挂单合计分析 (阈值200) - 当前成交量分析 (阈值150) - 信号抑制优化算法 (38%抑制率) ## 技术特性 - 信号抑制算法:有效减少重复信号干扰 - 多维度分析:支持多种信号类型 - 专业可视化:四宫格综合分析图 - 业务洞察:基于数据的交易建议 ## 分析结果 - 卖1量大单:短期下跌,长期大幅上涨反转 - 买挂合计:各时间窗口小幅正收益 - 信号抑制:短期收益从-0.0778提升至+0.1347 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
156 lines
5.9 KiB
Python
156 lines
5.9 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
为AU2512期货数据添加买挂合计和卖挂合计列
|
||
|
||
这个脚本读取原始数据文件,计算买方和卖方的挂单总量,
|
||
并添加两列:买挂合计(买1量至买5量之和)和卖挂合计(卖1量至卖5量之和)
|
||
|
||
使用方法:
|
||
python add_order_sum_columns.py
|
||
"""
|
||
|
||
import pandas as pd
|
||
import numpy as np
|
||
import os
|
||
from datetime import datetime
|
||
|
||
def add_order_sum_columns():
|
||
"""添加买挂合计和卖挂合计列到数据文件"""
|
||
|
||
# 文件路径
|
||
input_file = 'data/au2512_20251013.parquet'
|
||
backup_file = 'data/au2512_20251013_order_backup.parquet'
|
||
output_file = 'data/au2512_20251013.parquet'
|
||
|
||
print("开始为AU2512期货数据添加买挂合计和卖挂合计列...")
|
||
|
||
# 检查输入文件是否存在
|
||
if not os.path.exists(input_file):
|
||
print(f"错误: 输入文件不存在 - {input_file}")
|
||
return False
|
||
|
||
try:
|
||
# 读取数据
|
||
print(f"正在读取数据文件: {input_file}")
|
||
df = pd.read_parquet(input_file)
|
||
print(f"数据加载成功: {len(df):,} 条记录")
|
||
|
||
# 显示原始列名
|
||
print(f"\n原始列名 ({len(df.columns)} 列):")
|
||
for i, col in enumerate(df.columns):
|
||
print(f" {i:2d}: {col}")
|
||
|
||
# 找到买量和卖量的列
|
||
buy_volume_cols = []
|
||
sell_volume_cols = []
|
||
|
||
for col in df.columns:
|
||
if '买' in col and '量' in col:
|
||
buy_volume_cols.append(col)
|
||
elif '卖' in col and '量' in col:
|
||
sell_volume_cols.append(col)
|
||
|
||
print(f"\n找到的买量列: {buy_volume_cols}")
|
||
print(f"找到的卖量列: {sell_volume_cols}")
|
||
|
||
if len(buy_volume_cols) < 5 or len(sell_volume_cols) < 5:
|
||
print("警告: 找到的买量或卖量列不足5个")
|
||
|
||
# 计算买挂合计(买1量至买5量之和)
|
||
print(f"\n计算买挂合计...")
|
||
df['买挂合计'] = df[buy_volume_cols[:5]].sum(axis=1)
|
||
|
||
# 计算卖挂合计(卖1量至卖5量之和)
|
||
print(f"计算卖挂合计...")
|
||
df['卖挂合计'] = df[sell_volume_cols[:5]].sum(axis=1)
|
||
|
||
# 显示添加的列信息
|
||
print(f"\n添加的列信息:")
|
||
print(f"买挂合计: 最小值 {df['买挂合计'].min()}, 最大值 {df['买挂合计'].max()}, 平均值 {df['买挂合计'].mean():.1f}")
|
||
print(f"卖挂合计: 最小值 {df['卖挂合计'].min()}, 最大值 {df['卖挂合计'].max()}, 平均值 {df['卖挂合计'].mean():.1f}")
|
||
|
||
# 备份原始文件
|
||
if not os.path.exists(backup_file):
|
||
print(f"\n备份原始文件到: {backup_file}")
|
||
df_original = pd.read_parquet(input_file)
|
||
df_original.to_parquet(backup_file, index=False)
|
||
print("原始文件备份完成")
|
||
else:
|
||
print(f"\n备份文件已存在: {backup_file},跳过备份")
|
||
|
||
# 保存增强后的数据
|
||
print(f"\n保存增强后的数据到: {output_file}")
|
||
df.to_parquet(output_file, index=False)
|
||
|
||
# 显示最终列信息
|
||
print(f"\n最终数据列信息 ({len(df.columns)} 列):")
|
||
for i, col in enumerate(df.columns):
|
||
print(f" {i:2d}: {col}")
|
||
|
||
print(f"\n数据增强完成!")
|
||
print(f"新增列:")
|
||
print(f" - 买挂合计: 买1量至买5量之和")
|
||
print(f" - 卖挂合计: 卖1量至卖5量之和")
|
||
|
||
# 显示一些统计信息
|
||
print(f"\n统计信息:")
|
||
print(f"买挂合计 > 0 的记录: {(df['买挂合计'] > 0).sum():,} ({(df['买挂合计'] > 0).mean()*100:.1f}%)")
|
||
print(f"卖挂合计 > 0 的记录: {(df['卖挂合计'] > 0).sum():,} ({(df['卖挂合计'] > 0).mean()*100:.1f}%)")
|
||
print(f"买挂合计最大值: {df['买挂合计'].max():,} 手")
|
||
print(f"卖挂合计最大值: {df['卖挂合计'].max():,} 手")
|
||
print(f"买挂合计平均值: {df['买挂合计'].mean():.1f} 手")
|
||
print(f"卖挂合计平均值: {df['卖挂合计'].mean():.1f} 手")
|
||
|
||
return True
|
||
|
||
except Exception as e:
|
||
print(f"处理过程中发生错误: {e}")
|
||
return False
|
||
|
||
def verify_columns():
|
||
"""验证添加的列是否正确"""
|
||
print("\n" + "="*60)
|
||
print("验证添加的列")
|
||
print("="*60)
|
||
|
||
try:
|
||
# 读取增强后的数据
|
||
df = pd.read_parquet('data/au2512_20251013.parquet')
|
||
|
||
if '买挂合计' in df.columns and '卖挂合计' in df.columns:
|
||
print("[OK] 买挂合计和卖挂合计列已成功添加")
|
||
|
||
# 显示前5行的相关列
|
||
buy_cols = [col for col in df.columns if '买' in col and '量' in col][:5]
|
||
sell_cols = [col for col in df.columns if '卖' in col and '量' in col][:5]
|
||
|
||
print(f"\n前5行数据验证:")
|
||
print("买量列 + 买挂合计:")
|
||
for i in range(min(5, len(df))):
|
||
buy_sum = df[buy_cols].iloc[i].sum()
|
||
buy_total = df['买挂合计'].iloc[i]
|
||
match = "[OK]" if abs(buy_sum - buy_total) < 0.001 else "[ERROR]"
|
||
print(f" 行{i+1}: 买量列求和={buy_sum}, 买挂合计={buy_total} {match}")
|
||
|
||
print("卖量列 + 卖挂合计:")
|
||
for i in range(min(5, len(df))):
|
||
sell_sum = df[sell_cols].iloc[i].sum()
|
||
sell_total = df['卖挂合计'].iloc[i]
|
||
match = "[OK]" if abs(sell_sum - sell_total) < 0.001 else "[ERROR]"
|
||
print(f" 行{i+1}: 卖量列求和={sell_sum}, 卖挂合计={sell_total} {match}")
|
||
|
||
else:
|
||
print("[ERROR] 买挂合计或卖挂合计列未找到")
|
||
|
||
except Exception as e:
|
||
print(f"验证过程中发生错误: {e}")
|
||
|
||
if __name__ == "__main__":
|
||
success = add_order_sum_columns()
|
||
|
||
if success:
|
||
verify_columns()
|
||
print(f"\n[OK] 数据增强完成!")
|
||
else:
|
||
print(f"\n[ERROR] 数据增强失败!") |