GPT与Claude实战入门指南：从API调用到生产环境最佳实践

2次阅读

共计 3900 个字符，预计需要花费 10 分钟才能阅读完成。

根据官方文档和社区基准测试，GPT- 4 与 Claude 2 的主要差异如下：

响应速度 ：Claude 2 平均响应时间为 450ms，GPT- 4 为 600ms（测试环境：us-east- 1 区域，128 字符输入）
Token 成本 ：GPT- 4 输入 $0.03/1k tokens，输出 $0.06/1k tokens；Claude 2 统一 $0.02/1k tokens
上下文窗口 ：GPT- 4 最大 32k tokens，Claude 2 支持 100k tokens 长文本处理
代码生成 ：GPT- 4 在 LeetCode 题型上准确率 78%，Claude 2 为 65%（Anthropic 官方报告）

安装必要依赖

pip install openai anthropic python-dotenv

配置环境变量

# .env 文件示例
OPENAI_API_KEY="sk-xxx"
ANTHROPIC_API_KEY="sk-ant-xxx"

import os
from openai import OpenAI
from anthropic import Anthropic
from dotenv import load_dotenv

load_dotenv()

def query_gpt(prompt: str, model: str = "gpt-4") -> str:
    """
    调用 GPT- 4 接口
    :param prompt: 用户输入的提示词
    :param model: 使用的模型版本
    :return: 模型生成的响应文本
    """client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,
            max_tokens=1000
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"GPT 请求失败: {str(e)}")
        raise

# Claude 调用同理，注意消息格式差异

使用 aiohttp 实现异步请求：

import aiohttp
import asyncio
from typing import List

async def batch_query_claude(prompts: List[str]) -> List[str]:
    """
    并发查询 Claude 接口
    :param prompts: 提示词列表
    :return: 响应结果列表
    """headers = {"x-api-key": os.getenv("ANTHROPIC_API_KEY"),"anthropic-version":"2023-06-01","content-type":"application/json"
    }

    async with aiohttp.ClientSession() as session:
        tasks = []
        for prompt in prompts:
            data = {
                "model": "claude-2.1",
                "max_tokens": 1000,
                "messages": [{"role": "user", "content": prompt}]
            }
            task = session.post(
                "https://api.anthropic.com/v1/messages",
                json=data,
                headers=headers
            )
            tasks.append(task)

        responses = await asyncio.gather(*tasks, return_exceptions=True)
        results = []
        for resp in responses:
            if isinstance(resp, Exception):
                results.append(f"Error: {str(resp)}")
            else:
                json_resp = await resp.json()
                results.append(json_resp["content"][0]["text"])
        return results

from collections import deque

class MemoryChatSession:
    def __init__(self, max_history=10):
        self.history = deque(maxlen=max_history)

    def add_message(self, role: str, content: str):
        self.history.append({"role": role, "content": content})

    def get_context(self) -> list:
        return list(self.history)

import redis
import json

class RedisChatSession:
    def __init__(self, session_id: str, redis_conn: redis.Redis):
        self.redis = redis_conn
        self.session_id = f"chat:{session_id}"

    def add_message(self, role: str, content: str):
        message = json.dumps({"role": role, "content": content})
        self.redis.rpush(self.session_id, message)

    def get_context(self, max_history=10) -> list:
        messages = self.redis.lrange(self.session_id, -max_history, -1)
        return [json.loads(msg) for msg in messages]

import re

def sanitize_input(text: str) -> str:
    """
    过滤敏感信息
    :param text: 用户原始输入
    :return: 处理后的安全文本
    """
    # 移除信用卡号
    text = re.sub(r"\b[0-9]{4}[-]?[0-9]{4}[-]?[0-9]{4}[-]?[0-9]{4}\b", "[REDACTED]", text)
    # 移除手机号
    text = re.sub(r"\b1[3-9][0-9]{9}\b", "[REDACTED]", text)
    return text

令牌桶算法示例：

import time
from threading import Lock

class TokenBucket:
    def __init__(self, capacity: int, refill_rate: float):
        self.capacity = capacity
        self.tokens = capacity
        self.refill_rate = refill_rate  # tokens/second
        self.last_refill = time.time()
        self.lock = Lock()

    def consume(self, tokens=1) -> bool:
        with self.lock:
            self._refill()
            if self.tokens >= tokens:
                self.tokens -= tokens
                return True
            return False

    def _refill(self):
        now = time.time()
        elapsed = now - self.last_refill
        refill_amount = elapsed * self.refill_rate
        self.tokens = min(self.capacity, self.tokens + refill_amount)
        self.last_refill = now

Prometheus 格式示例：

from prometheus_client import Counter, Histogram

# 定义指标
API_CALLS = Counter(
    'llm_api_calls_total',
    'Total API calls',
    ['model', 'status']
)

RESPONSE_TIME = Histogram(
    'llm_response_time_seconds',
    'Response time histogram',
    ['model'],
    buckets=[0.1, 0.5, 1, 2, 5]
)

# 在请求函数中添加埋点
@RESPONSE_TIME.labels(model="gpt-4").time()
def query_gpt_with_metrics(prompt: str):
    try:
        result = query_gpt(prompt)
        API_CALLS.labels(model="gpt-4", status="success").inc()
        return result
    except Exception as e:
        API_CALLS.labels(model="gpt-4", status="fail").inc()
        raise