【Spring AI实战】第10章 多大模型统一适配与动态切换
·
1. 统一模型调用接口:兼容百度文心、阿里通义、讯飞星火、OpenAI
我来介绍 Spring AI 的统一模型调用接口设计,以及如何兼容多个主流大模型。
Spring AI 核心概念
Spring AI 提供了统一的抽象接口来调用各种 AI 模型:
核心接口
// 核心聊天接口
public interface ChatClient {
ChatResponse call(Prompt prompt);
}
// 流式聊天接口
public interface StreamingChatClient {
Flux<ChatResponse> stream(Prompt prompt);
}
通用消息结构
public class Prompt {
private List<Message> messages;
// 包含系统指令、用户消息、AI回复等
}
public interface Message {
String getContent();
MessageType getMessageType();
Map<String, Object> getMetadata();
}
多模型兼容实现
2.1 配置文件示例
spring:
ai:
# OpenAI
openai:
api-key: ${OPENAI_API_KEY}
chat:
model: gpt-4
temperature: 0.7
# 百度文心
baidu-qianfan:
api-key: ${BAIDU_API_KEY}
secret-key: ${BAIDU_SECRET_KEY}
chat:
model: ERNIE-Bot-4
temperature: 0.7
# 阿里通义
alibaba-dashscope:
api-key: ${ALIBABA_API_KEY}
chat:
model: qwen-max
temperature: 0.7
# 讯飞星火
iflytek-spark:
app-id: ${IFLYTEK_APP_ID}
api-key: ${IFLYTEK_API_KEY}
api-secret: ${IFLYTEK_API_SECRET}
chat:
model: spark-3.0
temperature: 0.7
2.2 统一调用服务
@Service
public class UnifiedAIService {
@Value("${spring.ai.provider:openai}")
private String aiProvider;
private final Map<String, ChatClient> chatClients;
private final Map<String, StreamingChatClient> streamingClients;
public UnifiedAIService(
ChatClient openAiChatClient,
StreamingChatClient openAiStreamingChatClient,
ChatClient baiduChatClient,
StreamingChatClient baiduStreamingChatClient,
ChatClient alibabaChatClient,
StreamingChatClient alibabaStreamingChatClient,
ChatClient iflytekChatClient,
StreamingChatClient iflytekStreamingChatClient) {
chatClients = Map.of(
"openai", openAiChatClient,
"baidu", baiduChatClient,
"alibaba", alibabaChatClient,
"iflytek", iflytekChatClient
);
streamingClients = Map.of(
"openai", openAiStreamingChatClient,
"baidu", baiduStreamingChatClient,
"alibaba", alibabaStreamingChatClient,
"iflytek", iflytekStreamingChatClient
);
}
public String chat(String message) {
ChatClient client = chatClients.get(aiProvider);
Prompt prompt = new Prompt(new UserMessage(message));
ChatResponse response = client.call(prompt);
return response.getResult().getOutput().getContent();
}
public Flux<String> streamChat(String message) {
StreamingChatClient client = streamingClients.get(aiProvider);
Prompt prompt = new Prompt(new UserMessage(message));
return client.stream(prompt)
.map(response -> response.getResult().getOutput().getContent());
}
// 动态切换提供商
public String chatWithProvider(String provider, String message) {
ChatClient client = chatClients.get(provider);
Prompt prompt = new Prompt(new UserMessage(message));
ChatResponse response = client.call(prompt);
return response.getResult().getOutput().getContent();
}
}
2.3 工厂模式实现
@Component
public class AIClientFactory {
@Autowired
private ChatClient openAiChatClient;
@Autowired
private ChatClient baiduChatClient;
@Autowired
private ChatClient alibabaChatClient;
@Autowired
private ChatClient iflytekChatClient;
public ChatClient getChatClient(AIProvider provider) {
return switch (provider) {
case OPENAI -> openAiChatClient;
case BAIDU -> baiduChatClient;
case ALIBABA -> alibabaChatClient;
case IFLYTEK -> iflytekChatClient;
default -> throw new IllegalArgumentException("Unsupported provider");
};
}
public enum AIProvider {
OPENAI, BAIDU, ALIBABA, IFLYTEK
}
}
高级功能封装
3.1 统一配置类
@Configuration
public class AIConfig {
@Bean
@ConditionalOnProperty(name = "spring.ai.provider", havingValue = "openai")
public ChatClient openAiChatClient(OpenAiChatOptions options) {
return new OpenAiChatClient(options);
}
@Bean
@ConditionalOnProperty(name = "spring.ai.provider", havingValue = "baidu")
public ChatClient baiduChatClient(BaiduQianfanChatOptions options) {
return new BaiduQianfanChatClient(options);
}
@Bean
@ConditionalOnProperty(name = "spring.ai.provider", havingValue = "alibaba")
public ChatClient alibabaChatClient(AlibabaDashScopeChatOptions options) {
return new AlibabaDashScopeChatClient(options);
}
@Bean
@ConditionalOnProperty(name = "spring.ai.provider", havingValue = "iflytek")
public ChatClient iflytekChatClient(IflytekSparkChatOptions options) {
return new IflytekSparkChatClient(options);
}
}
3.2 负载均衡和降级
@Service
public class LoadBalancedAIService {
private final List<ChatClient> clients;
private final AtomicInteger counter = new AtomicInteger(0);
public LoadBalancedAIService(
@Autowired(required = false) ChatClient openAiChatClient,
@Autowired(required = false) ChatClient baiduChatClient,
@Autowired(required = false) ChatClient alibabaChatClient,
@Autowired(required = false) ChatClient iflytekChatClient) {
clients = new ArrayList<>();
if (openAiChatClient != null) clients.add(openAiChatClient);
if (baiduChatClient != null) clients.add(baiduChatClient);
if (alibabaChatClient != null) clients.add(alibabaChatClient);
if (iflytekChatClient != null) clients.add(iflytekChatClient);
}
public String chatWithFallback(String message) {
for (ChatClient client : clients) {
try {
Prompt prompt = new Prompt(new UserMessage(message));
ChatResponse response = client.call(prompt);
return response.getResult().getOutput().getContent();
} catch (Exception e) {
// 记录日志,尝试下一个
continue;
}
}
throw new RuntimeException("All AI services are unavailable");
}
public String roundRobinChat(String message) {
if (clients.isEmpty()) {
throw new RuntimeException("No AI clients available");
}
int index = counter.getAndIncrement() % clients.size();
ChatClient client = clients.get(index);
Prompt prompt = new Prompt(new UserMessage(message));
ChatResponse response = client.call(prompt);
return response.getResult().getOutput().getContent();
}
}
3.3 统一异常处理
@RestControllerAdvice
public class AIExceptionHandler {
@ExceptionHandler(ApiException.class)
public ResponseEntity<ErrorResponse> handleAIException(ApiException e) {
ErrorResponse error = new ErrorResponse(
"AI_SERVICE_ERROR",
e.getMessage(),
LocalDateTime.now()
);
return ResponseEntity.status(500).body(error);
}
@ExceptionHandler(RateLimitException.class)
public ResponseEntity<ErrorResponse> handleRateLimit(RateLimitException e) {
ErrorResponse error = new ErrorResponse(
"RATE_LIMIT_EXCEEDED",
"API rate limit exceeded, please try again later",
LocalDateTime.now()
);
return ResponseEntity.status(429).body(error);
}
}
使用示例
4.1 REST API 接口
@RestController
@RequestMapping("/api/ai")
public class AIController {
@Autowired
private UnifiedAIService aiService;
@PostMapping("/chat")
public ResponseEntity<ChatResponse> chat(@RequestBody ChatRequest request) {
String response = aiService.chat(request.getMessage());
return ResponseEntity.ok(new ChatResponse(response));
}
@PostMapping("/chat/stream")
public Flux<ServerSentEvent<String>> streamChat(@RequestBody ChatRequest request) {
return aiService.streamChat(request.getMessage())
.map(content -> ServerSentEvent.builder(content).build());
}
@PostMapping("/chat/{provider}")
public ResponseEntity<ChatResponse> chatWithProvider(
@PathVariable String provider,
@RequestBody ChatRequest request) {
String response = aiService.chatWithProvider(provider, request.getMessage());
return ResponseEntity.ok(new ChatResponse(response));
}
}
4.2 DTO 对象
@Data
public class ChatRequest {
private String message;
private String model; // 可选,指定模型
private Double temperature; // 可选
private Integer maxTokens; // 可选
}
@Data
@AllArgsConstructor
public class ChatResponse {
private String content;
private String model;
private LocalDateTime timestamp;
public ChatResponse(String content) {
this.content = content;
this.model = "default";
this.timestamp = LocalDateTime.now();
}
}
最佳实践建议
- 配置管理 : 使用 Spring Cloud Config 或 Apollo 管理不同环境的配置
- 监控指标 : 集成 Micrometer 收集调用延迟、成功率等指标
- 缓存策略 : 对常见问题结果进行缓存
- 限流熔断 : 使用 Resilience4j 实现限流和熔断
- 异步处理 : 对耗时请求使用异步处理
- 成本控制 : 记录各模型使用量,控制成本
这样设计的好处是:
- 统一接口 : 业务代码无需关心底层模型实现
- 灵活切换 : 可动态切换不同 AI 提供商
- 易于扩展 : 新增模型只需添加配置和客户端
- 容错能力强 : 支持降级和负载均衡
- 维护方便 : 集中管理所有 AI 相关配置
2. 动态模型路由:根据业务场景自动切换模型
我来详细介绍一下 Spring AI 中的动态模型路由方案,以及如何根据业务场景自动切换模型。
Spring AI 模型路由架构
核心组件
// 1. 模型提供者接口
public interface ModelProvider {
String generate(String prompt);
boolean supports(ModelType type);
}
// 2. 路由策略接口
public interface ModelRouter {
ModelProvider route(String scenario, Map<String, Object> context);
}
基于场景的路由实现
2.1 配置类定义
@Configuration
@EnableConfigurationProperties(ModelRoutingProperties.class)
public class ModelRoutingConfiguration {
@Bean
public ModelRouter modelRouter(
List<ModelProvider> providers,
ModelRoutingProperties properties
) {
return new ScenarioBasedModelRouter(providers, properties);
}
@Bean
@ConditionalOnMissingBean
public AiClient aiClient(ModelRouter router) {
return new RoutingAiClient(router);
}
}
2.2 路由属性配置
spring:
ai:
routing:
enabled: true
default-model: gpt-3.5-turbo
scenarios:
creative-writing:
model: gpt-4
temperature: 0.9
max-tokens: 2000
code-generation:
model: claude-3-opus
temperature: 0.2
max-tokens: 4000
data-analysis:
model: gemini-pro
temperature: 0.1
cost-sensitive:
model: gpt-3.5-turbo
max-cost: 0.01
2.3 场景路由实现
@Component
public class ScenarioBasedModelRouter implements ModelRouter {
private final Map<String, ModelProvider> providerMap;
private final ModelRoutingProperties properties;
private final ModelProvider defaultProvider;
@Override
public ModelProvider route(String scenario, Map<String, Object> context) {
// 1. 检查场景配置
ModelRoutingProperties.ScenarioConfig config =
properties.getScenarios().get(scenario);
if (config != null) {
return selectByScenario(config, context);
}
// 2. 智能路由
return intelligentRoute(context);
}
private ModelProvider selectByScenario(
ScenarioConfig config,
Map<String, Object> context
) {
// 基于场景规则选择
if ("high-accuracy".equals(config.getPriority())) {
return providerMap.get(config.getModel());
}
// 考虑成本因素
Double budget = (Double) context.get("budget");
if (budget != null && budget < config.getMaxCost()) {
return getCostEffectiveProvider();
}
// 考虑响应时间
Integer timeout = (Integer) context.get("timeout");
if (timeout != null && timeout < 5000) {
return getFastProvider();
}
return providerMap.get(config.getModel());
}
private ModelProvider intelligentRoute(Map<String, Object> context) {
// 基于内容分析的路由
String content = (String) context.get("content");
ModelType type = analyzeContentType(content);
return providers.stream()
.filter(p -> p.supports(type))
.findFirst()
.orElse(defaultProvider);
}
}
动态路由策略
3.1 基于内容类型的路由
@Component
public class ContentBasedRouter {
public ModelProvider routeByContent(String content) {
ContentType type = analyzeContent(content);
switch (type) {
case CODE:
return codeModelProvider();
case CREATIVE:
return creativeModelProvider();
case TECHNICAL:
return technicalModelProvider();
case MULTIMODAL:
return multimodalModelProvider();
default:
return defaultModelProvider();
}
}
private ContentType analyzeContent(String content) {
// 使用规则或ML模型分析内容类型
if (containsCode(content)) return ContentType.CODE;
if (containsCreativeMarkers(content)) return ContentType.CREATIVE;
if (containsTechnicalTerms(content)) return ContentType.TECHNICAL;
if (containsMultimediaRefs(content)) return ContentType.MULTIMODAL;
return ContentType.GENERAL;
}
}
3.2 基于性能指标的路由
@Component
@RefreshScope
public class PerformanceBasedRouter {
@Autowired
private ModelMetricsCollector metricsCollector;
public ModelProvider routeByPerformance(String scenario) {
List<ModelPerformance> performances =
metricsCollector.getRecentPerformance(scenario);
return performances.stream()
.min(Comparator.comparingDouble(p ->
calculateScore(p.getLatency(), p.getCost(), p.getAccuracy())
))
.map(ModelPerformance::getProvider)
.orElse(defaultProvider);
}
private double calculateScore(double latency, double cost, double accuracy) {
// 加权评分算法
return latency * 0.3 + cost * 0.4 + (1 - accuracy) * 0.3;
}
}
注解驱动的路由
4.1 定义路由注解
@Target({ElementType.METHOD, ElementType.TYPE})
@Retention(RetentionPolicy.RUNTIME)
public @interface ModelRoute {
String scenario() default "";
ModelType type() default ModelType.GENERAL;
double maxCost() default 0.1;
int timeout() default 30000;
}
4.2 AOP 路由拦截器
@Aspect
@Component
public class ModelRoutingAspect {
@Autowired
private ModelRouter modelRouter;
@Around("@annotation(modelRoute)")
public Object routeModel(ProceedingJoinPoint joinPoint, ModelRoute modelRoute) {
// 获取路由上下文
Map<String, Object> context = buildContext(joinPoint, modelRoute);
// 路由到合适的模型
ModelProvider provider = modelRouter.route(
modelRoute.scenario(),
context
);
// 执行模型调用
return executeWithProvider(joinPoint, provider);
}
private Map<String, Object> buildContext(
ProceedingJoinPoint joinPoint,
ModelRoute modelRoute
) {
Map<String, Object> context = new HashMap<>();
context.put("scenario", modelRoute.scenario());
context.put("type", modelRoute.type());
context.put("maxCost", modelRoute.maxCost());
context.put("timeout", modelRoute.timeout());
// 添加方法参数
Object[] args = joinPoint.getArgs();
if (args.length > 0 && args[0] instanceof String) {
context.put("content", args[0]);
}
return context;
}
}
4.3 使用示例
@Service
public class ContentService {
@ModelRoute(scenario = "creative-writing", type = ModelType.CREATIVE)
public String generateStory(String prompt) {
// 自动路由到创意写作模型
return aiClient.generate(prompt);
}
@ModelRoute(scenario = "code-generation", maxCost = 0.05)
public String generateCode(String requirement) {
// 自动路由到代码生成模型
return aiClient.generate(requirement);
}
}
动态配置更新
5.1 配置热更新
@Component
@RefreshScope
public class DynamicRoutingManager {
@Autowired
private ModelRouter modelRouter;
@EventListener
public void onConfigUpdate(EnvironmentChangeEvent event) {
if (event.getKeys().stream()
.anyMatch(k -> k.startsWith("spring.ai.routing"))) {
refreshRoutingRules();
}
}
@Scheduled(fixedDelay = 60000) // 每分钟更新一次
public void updateRoutingBasedOnMetrics() {
// 基于实时指标更新路由策略
updatePerformanceBasedRouting();
}
}
5.2 路由规则配置
@ConfigurationProperties("spring.ai.routing.rules")
@Data
public class RoutingRules {
private List<RoutingRule> rules = new ArrayList<>();
@Data
public static class RoutingRule {
private String scenario;
private String condition;
private String model;
private Map<String, Object> parameters;
}
}
完整的服务示例
6.1 路由服务
@Service
public class SmartModelRoutingService {
@Autowired
private ModelRouter modelRouter;
@Autowired
private ModelMetricsService metricsService;
@Autowired
private CostCalculator costCalculator;
public CompletableFuture<String> generate(
String prompt,
String scenario,
RoutingContext context
) {
return CompletableFuture.supplyAsync(() -> {
// 1. 路由到合适的模型
ModelProvider provider = modelRouter.route(scenario, context.toMap());
// 2. 执行生成
long start = System.currentTimeMillis();
String result = provider.generate(prompt);
long latency = System.currentTimeMillis() - start;
// 3. 记录指标
metricsService.recordCall(
scenario,
provider.getName(),
latency,
result.length()
);
// 4. 计算成本
double cost = costCalculator.calculate(
provider.getName(),
prompt.length(),
result.length()
);
metricsService.recordCost(scenario, cost);
return result;
});
}
}
6.2 客户端封装
@Component
public class RoutingAiClient {
@Autowired
private SmartModelRoutingService routingService;
public String generate(String prompt) {
return generate(prompt, "default", new RoutingContext());
}
public String generate(String prompt, String scenario) {
return generate(prompt, scenario, new RoutingContext());
}
public String generate(String prompt, String scenario, RoutingContext context) {
// 自动检测场景
if ("auto".equals(scenario)) {
scenario = detectScenario(prompt);
}
return routingService.generate(prompt, scenario, context).join();
}
private String detectScenario(String prompt) {
// 使用简单规则或ML模型检测场景
if (prompt.contains("写一个") || prompt.contains("创作")) {
return "creative-writing";
} else if (prompt.contains("代码") || prompt.contains("编程")) {
return "code-generation";
} else if (prompt.contains("分析") || prompt.contains("总结")) {
return "data-analysis";
}
return "general";
}
}
监控和熔断
7.1 熔断器配置
@Configuration
public class CircuitBreakerConfig {
@Bean
public CircuitBreakerFactory circuitBreakerFactory() {
return new Resilience4JCircuitBreakerFactory();
}
@Bean
public ModelProvider fallbackProvider() {
return new FallbackModelProvider();
}
}
7.2 带熔断的路由
@Service
@CircuitBreaker(name = "modelService", fallbackMethod = "fallback")
public class ResilientModelService {
@Autowired
private ModelRouter modelRouter;
public String generateWithCircuitBreaker(String prompt, String scenario) {
ModelProvider provider = modelRouter.route(scenario, Map.of());
return provider.generate(prompt);
}
public String fallback(String prompt, String scenario, Throwable t) {
log.warn("Fallback triggered for scenario: {}", scenario, t);
return "服务暂时不可用,请稍后重试";
}
}
总结
Spring AI 动态模型路由的关键点:
- 多层路由策略 :基于场景、内容、性能的多维度路由
- 注解驱动 :使用注解声明路由规则,减少侵入性
- 动态配置 :支持热更新路由配置
- 智能决策 :基于历史数据和实时指标优化路由
- 容错机制 :熔断、降级、重试等保障可用性
- 成本控制 :根据预算自动选择性价比最优的模型
这种架构可以实现:
- 根据业务场景自动选择最合适的模型
- 动态调整路由策略
- 成本控制和性能优化
- 高可用性和容错能力
3. 模型降级、熔断、容错机制(企业高可用方案)
Spring AI 的高可用性方案需要结合降级、熔断、容错 机制来确保企业级应用的稳定性。以下是完整的实现方案:
一、核心架构设计
多模型降级策略
@Component
public class ModelDegradationManager {
@Autowired
private List<AiModelProvider> modelProviders; // 按优先级排序
public String executeWithDegradation(String prompt) {
for (AiModelProvider provider : modelProviders) {
try {
return provider.generate(prompt);
} catch (Exception e) {
log.warn("Provider {} failed, trying next", provider.getName());
// 记录失败指标
Metrics.recordFailure(provider.getName());
}
}
throw new AllModelsFailedException("All AI models unavailable");
}
}
熔断器实现(Resilience4j集成)
# application.yml
resilience4j:
circuitbreaker:
instances:
openai-primary:
failure-rate-threshold: 50
wait-duration-in-open-state: 10s
permitted-number-of-calls-in-half-open-state: 3
sliding-window-size: 10
minimum-number-of-calls: 5
openai-fallback:
failure-rate-threshold: 30
wait-duration-in-open-state: 5s
@Configuration
public class CircuitBreakerConfig {
@Bean
public CircuitBreakerRegistry circuitBreakerRegistry() {
return CircuitBreakerRegistry.ofDefaults();
}
@Bean
@CircuitBreaker(name = "openai-primary", fallbackMethod = "fallbackResponse")
public String callPrimaryModel(String prompt) {
return openAiClient.generate(prompt);
}
public String fallbackResponse(String prompt, Exception e) {
// 1. 返回缓存结果
String cached = cacheService.get(prompt);
if (cached != null) return cached;
// 2. 调用降级模型
return fallbackModel.generate(prompt);
// 3. 返回默认响应
return "系统繁忙,请稍后重试";
}
}
三、智能路由与负载均衡
@Component
public class SmartModelRouter {
@Autowired
private ModelHealthChecker healthChecker;
private final WeightedRoundRobinSelector selector =
new WeightedRoundRobinSelector();
public AiModel selectOptimalModel(ModelRequest request) {
// 1. 基于健康检查
List<AiModel> healthyModels = healthChecker.getHealthyModels();
// 2. 基于性能评分
healthyModels.sort(Comparator
.comparingDouble(AiModel::getPerformanceScore)
.reversed());
// 3. 基于成本控制(企业级)
if (isCostSensitive(request)) {
return selectCostEffectiveModel(healthyModels);
}
// 4. 基于负载均衡
return selector.select(healthyModels);
}
}
四、完整的容错处理链
@Service
public class RobustAIService {
@Autowired
private RetryTemplate retryTemplate;
@Autowired
private Bulkhead bulkhead;
@Autowired
private RateLimiter rateLimiter;
@Autowired
private CacheService cacheService;
public AiResponse executeWithFaultTolerance(AiRequest request) {
return retryTemplate.execute(context -> {
try {
// 1. 检查缓存
AiResponse cached = cacheService.get(request);
if (cached != null) return cached;
// 2. 限流保护
rateLimiter.acquirePermission();
// 3. 舱壁隔离
return bulkhead.executeSupplier(() -> {
// 4. 智能路由选择模型
AiModel model = modelRouter.select(request);
// 5. 执行请求(带超时控制)
return CompletableFuture.supplyAsync(() ->
model.generate(request)
).orTimeout(30, TimeUnit.SECONDS)
.exceptionally(ex -> {
// 6. 异常处理
return handleException(ex, request);
}).join();
});
} catch (Exception e) {
// 7. 最终降级
return ultimateFallback(request);
}
});
}
private AiResponse handleException(Exception ex, AiRequest request) {
if (ex instanceof RateLimitException) {
// 限流异常:排队或拒绝
return queueRequest(request);
} else if (ex instanceof TimeoutException) {
// 超时:快速失败
Metrics.recordTimeout(request.getModel());
throw new FastFailException("Request timeout");
} else if (ex instanceof ModelOverloadedException) {
// 模型过载:切换实例
return switchModelInstance(request);
}
throw new RuntimeException(ex);
}
}
五、监控与告警配置
@Configuration
@EnableScheduling
public class MonitoringConfig {
@Scheduled(fixedDelay = 60000)
public void monitorModelHealth() {
modelProviders.forEach(provider -> {
HealthStatus status = checkHealth(provider);
// 1. 记录指标
Metrics.recordLatency(provider.getName(), status.getLatency());
Metrics.recordSuccessRate(provider.getName(), status.getSuccessRate());
// 2. 触发告警
if (status.getSuccessRate() < 95) {
alertService.sendAlert(
"AI_MODEL_DEGRADED",
provider.getName(),
status
);
}
// 3. 自动调整权重
loadBalancer.adjustWeight(
provider.getName(),
calculateWeight(status)
);
});
}
@EventListener
public void onCircuitBreakerEvent(CircuitBreakerOnStateTransitionEvent event) {
// 熔断状态变更通知
notificationService.notify(
"CircuitBreaker " + event.getCircuitBreakerName()
+ " changed to " + event.getStateTransition()
);
}
}
六、配置管理
# application-high-availability.yml
spring:
ai:
high-availability:
enabled: true
strategies:
degradation:
enabled: true
chain: [gpt-4, gpt-3.5, claude-2, ernie-bot]
fallback-response: "服务暂时不可用"
circuit-breaker:
enabled: true
failure-threshold: 50%
timeout-ms: 30000
retry:
max-attempts: 3
backoff-delay: 1000
bulkhead:
max-concurrent-calls: 50
max-wait-duration: 1000
monitoring:
metrics:
enabled: true
export:
prometheus:
enabled: true
alert:
webhooks:
- url: ${ALERT_WEBHOOK_URL}
thresholds:
error-rate: 5%
latency-p99: 5000ms
七、最佳实践建议
1. 分级降级策略
- 一级降级 :主模型 → 备用模型
- 二级降级 :复杂模型 → 简化模型
- 三级降级 :AI服务 → 规则引擎
- 四级降级 :返回缓存或默认响应
2. 熔断恢复策略
// 渐进式恢复
@CircuitBreaker(name = "model-service",
fallbackMethod = "fallback",
successThreshold = 3, // 连续成功3次才关闭熔断
recoveryStrategy = "gradual" // 逐步增加流量
)
3. 多区域容灾
@Primary
@Bean(name = "usEastModel")
public AiModel usEastModel() {
return new OpenAiModel("us-east-api.openai.com");
}
@Bean(name = "euWestModel")
@ConditionalOnProperty(name = "region", havingValue = "eu")
public AiModel euWestModel() {
return new OpenAiModel("eu-west-api.openai.com");
}
4. 测试策略
@SpringBootTest
@TestPropertySource(properties = {
"spring.ai.high-availability.test-mode=true",
"spring.ai.circuit-breaker.force-open=true"
})
public class FaultToleranceTest {
@Test
public void testCircuitBreaker() {
// 模拟失败触发熔断
mockServer.setFailureRate(100);
// 验证降级逻辑
assertThat(service.execute(request))
.isEqualTo(fallbackResponse);
}
}
这个方案提供了从客户端到服务端的完整高可用保障,可以根据具体业务需求进行调整和扩展。
AtomGit 是由开放原子开源基金会联合 CSDN 等生态伙伴共同推出的新一代开源与人工智能协作平台。平台坚持“开放、中立、公益”的理念,把代码托管、模型共享、数据集托管、智能体开发体验和算力服务整合在一起,为开发者提供从开发、训练到部署的一站式体验。
更多推荐



所有评论(0)