self_cv/ai_batch_processor.py
2025-05-11 22:32:52 +08:00

109 lines
4.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
import pyperclip
import math
import os
import json
# --- 配置 ---
CHUNK_SIZE = 5000 # 每次处理文本长度 (字符)
MODEL_NAME = "grok-3" # 使用的 AI 模型
API_URL = "https://aizex.top/v1/chat/completions" # API 地址 (请确认这是正确的聊天补全端点)
# !! 安全警告: 请不要直接将 API 密钥硬编码在脚本中 !!
# 建议使用环境变量 (os.environ.get) 或配置文件读取
API_KEY = os.environ.get("AIZEX_API_KEY", "YOUR_API_KEY_HERE") # 从环境变量读取或替换为你的密钥
# --- AI 调用函数 ---
def call_ai_api(prompt, api_key, model, api_url):
"""调用 AI API 并返回结果"""
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
# 根据常见的 API 结构构建 payload可能需要根据 aizex.top 的具体要求调整
payload = {
"model": model,
"messages": [{"role": "user", "content": prompt}],
# 可以添加其他参数如 temperature, max_tokens 等
# "temperature": 0.7,
}
try:
response = requests.post(api_url, headers=headers, json=payload, timeout=180) # 增加超时时间
response.raise_for_status() # 如果状态码不是 2xx则抛出异常
result = response.json()
# 假设 API 返回格式类似 OpenAI提取内容
if "choices" in result and len(result["choices"]) > 0:
content = result["choices"][0].get("message", {}).get("content", "")
return content.strip(), True
else:
print(f"警告: API 响应格式不符合预期: {result}")
return f"API 响应格式错误: {result}", False
except requests.exceptions.RequestException as e:
print(f"错误: 调用 AI API 时出错: {e}")
return f"API 调用失败: {e}", False
except json.JSONDecodeError:
print(f"错误: 解析 API 响应 JSON 时出错。响应内容: {response.text}")
return f"API 响应解析失败: {response.text}", False
# --- 主处理函数 ---
def process_text_in_batches():
"""主函数:获取剪贴板文本,分批处理并调用 AI"""
if API_KEY == "YOUR_API_KEY_HERE":
print("错误: 请在脚本中或环境变量 AIZEX_API_KEY 中设置您的 API 密钥。")
return
try:
original_text = pyperclip.paste()
if not original_text:
print("错误: 剪贴板中没有文本。")
return
except Exception as e:
print(f"错误: 无法从剪贴板读取文本: {e}")
print("请确保已安装 pyperclip 库并且有所需的权限。")
return
text_length = len(original_text)
num_chunks = math.ceil(text_length / CHUNK_SIZE)
print(f"总字数: {text_length}")
print(f"每块大小: {CHUNK_SIZE}")
print(f"总块数: {num_chunks}")
print("-" * 20)
current_summary = ""
ai_output = ""
for i in range(num_chunks):
start_index = i * CHUNK_SIZE
end_index = min((i + 1) * CHUNK_SIZE, text_length)
current_chunk = original_text[start_index:end_index]
print(f"正在处理块 {i + 1}/{num_chunks}...")
if i == 0:
# 首次调用
prompt = f"请对以下内容进行概括总结:\n\n{current_chunk}"
else:
# 后续调用
prompt = f"这是上一轮的概括内容:\n\n{current_summary}\n\n请结合以上概括内容和下面的新材料,进行融合概括总结,生成一个连贯的整体摘要:\n\n{current_chunk}"
# 调用 AI
ai_output, success = call_ai_api(prompt, API_KEY, MODEL_NAME, API_URL)
if success:
current_summary = ai_output # 更新当前摘要为最新输出
print(f"{i + 1} 处理成功。")
else:
print(f"{i + 1} 处理失败。脚本将中止。")
print(f"失败时的 AI 输出/错误信息: {ai_output}")
return # 如果某一块失败,则停止处理
print("-" * 20)
print("所有块处理完成。最终总结:")
print("=" * 30)
print(current_summary)
print("=" * 30)
# --- 脚本入口 ---
if __name__ == "__main__":
process_text_in_batches()