第二节:OpenAI SDK与Azure AI Foundry的集成 概述 在您掌握Foundry Local基础知识的基础上,本节课将重点介绍高级OpenAI SDK集成模式,这些模式能够无缝支持Microsoft Foundry Local和Azure OpenAI。您将学习如何构建灵活的AI应用程序,这些应用程序既可以在本地运行以确保隐私和开发需求,又可以通过Azure OpenAI实现云端扩展。
在您掌握Foundry Local基础知识的基础上,本节课将重点介绍高级OpenAI SDK集成模式,这些模式能够无缝支持Microsoft Foundry Local和Azure OpenAI。您将学习如何构建灵活的AI应用程序,这些应用程序既可以在本地运行以确保隐私和开发需求,又可以通过Azure OpenAI实现云端扩展。
完成本节课程后,您将能够:
pip install openai foundry-local-sdk)构建同时支持Foundry Local和Azure OpenAI的应用程序需要灵活的客户端创建模式:
# sdk_integration.py - Sample 02 pattern import os from openai import OpenAI from typing import Tuple try: from foundry_local import FoundryLocalManager FOUNDRY_SDK_AVAILABLE = True except ImportError: FOUNDRY_SDK_AVAILABLE = False def create_azure_client() -> Tuple[OpenAI, str]: """Create Azure OpenAI client.""" azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT") azure_api_key = os.environ.get("AZURE_OPENAI_API_KEY") azure_api_version = os.environ.get("AZURE_OPENAI_API_VERSION", "2024-08-01-preview") if not azure_endpoint or not azure_api_key: raise ValueError("Azure OpenAI endpoint and API key are required") model = os.environ.get("MODEL", "your-deployment-name") client = OpenAI( base_url=f"{azure_endpoint}/openai", api_key=azure_api_key, default_query={"api-version": azure_api_version}, ) print(f" Azure OpenAI client created with model: {model}") return client, model def create_foundry_client() -> Tuple[OpenAI, str]: """Create Foundry Local client with SDK management.""" alias = os.environ.get("MODEL", "phi-4-mini") if FOUNDRY_SDK_AVAILABLE: try: # Use FoundryLocalManager for proper service management manager = FoundryLocalManager(alias) model_info = manager.get_model_info(alias) # Configure OpenAI client to use local Foundry service client = OpenAI( base_url=manager.endpoint, api_key=manager.api_key ) print(f" Foundry Local SDK initialized with model: {model_info.id}") return client, model_info.id except Exception as e: print(f"⚠️ Could not use Foundry SDK ({e}), falling back to manual configuration") # Fallback to manual configuration base_url = os.environ.get("BASE_URL", "http://localhost:8000") api_key = os.environ.get("API_KEY", "") client = OpenAI( base_url=f"{base_url}/v1", api_key=api_key ) print(f" Manual configuration with model: {alias}") return client, alias def initialize_client() -> Tuple[OpenAI, str, str]: """Initialize the appropriate OpenAI client.""" # Check for Azure OpenAI configuration azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT") azure_api_key = os.environ.get("AZURE_OPENAI_API_KEY") if azure_endpoint and azure_api_key: try: client, model = create_azure_client() return client, model, "azure" except Exception as e: print(f"❌ Azure OpenAI initialization failed: {e}") print(" Falling back to Foundry Local...") # Use Foundry Local client, model = create_foundry_client() return client, model, "foundry"
流式响应通过实时显示生成的内容提供更好的用户体验:
# streaming_chat.py - Following Sample 02 patterns def streaming_chat_completion(client: OpenAI, model: str, prompt: str, max_tokens: int = 300): """Demonstrate streaming responses for better UX.""" try: print(" Assistant (streaming):") # Create streaming completion stream = client.chat.completions.create( model=model, messages=[{"role": "user", "content": prompt}], max_tokens=max_tokens, stream=True ) full_response = "" for chunk in stream: if chunk.choices[0].delta.content is not None: content = chunk.choices[0].delta.content print(content, end="", flush=True) full_response += content print("\n") # New line after streaming return full_response except Exception as e: error_msg = f"Error: {e}" print(error_msg) return error_msg # Usage example client, model, provider = initialize_client() prompt = "Explain the key benefits of using Microsoft Foundry Local for AI development." response = streaming_chat_completion(client, model, prompt)
# conversation_manager.py class ConversationManager: """Manages multi-turn conversations with context preservation.""" def __init__(self, client: OpenAI, model: str, system_prompt: str = None): self.client = client self.model = model self.messages = [] if system_prompt: self.messages.append({"role": "system", "content": system_prompt}) def send_message(self, user_message: str, max_tokens: int = 200, stream: bool = False): """Send a message and get response while maintaining context.""" # Add user message to conversation self.messages.append({"role": "user", "content": user_message}) try: if stream: return self._stream_response(max_tokens) else: return self._regular_response(max_tokens) except Exception as e: return f"Error: {e}" def _regular_response(self, max_tokens: int): """Get regular (non-streaming) response.""" response = self.client.chat.completions.create( model=self.model, messages=self.messages, max_tokens=max_tokens ) assistant_message = response.choices[0].message.content self.messages.append({"role": "assistant", "content": assistant_message}) return assistant_message def _stream_response(self, max_tokens: int): """Get streaming response.""" stream = self.client.chat.completions.create( model=self.model, messages=self.messages, max_tokens=max_tokens, stream=True ) full_response = "" for chunk in stream: if chunk.choices[0].delta.content: content = chunk.choices[0].delta.content print(content, end="", flush=True) full_response += content print() # New line self.messages.append({"role": "assistant", "content": full_response}) return full_response def get_conversation_length(self) -> int: """Get the number of messages in the conversation.""" return len(self.messages) def clear_conversation(self, keep_system: bool = True): """Clear conversation history.""" if keep_system and self.messages and self.messages[0]["role"] == "system": self.messages = [self.messages[0]] else: self.messages = [] # Example usage client, model, provider = initialize_client() system_prompt = "You are a helpful AI assistant specialized in explaining AI and machine learning concepts." conversation = ConversationManager(client, model, system_prompt) # Multi-turn conversation conversation_turns = [ "What is the difference between AI inference on-device vs in the cloud?", "Which approach is better for privacy?", "What about performance and latency considerations?" ] for i, turn in enumerate(conversation_turns, 1): print(f"\nTurn {i}: {turn}") response = conversation.send_message(turn, stream=True)
测量并比较不同配置下的性能:
# performance_benchmark.py - Sample 02 patterns import time from typing import Dict, List from openai import OpenAI def benchmark_response_time(client: OpenAI, model: str, prompt: str, iterations: int = 3) -> Dict: """Benchmark response time for a given prompt.""" times = [] responses = [] for i in range(iterations): start_time = time.time() try: response = client.chat.completions.create( model=model, messages=[{"role": "user", "content": prompt}], max_tokens=50 # Keep responses short for timing ) end_time = time.time() response_time = end_time - start_time times.append(response_time) responses.append(response.choices[0].message.content) except Exception as e: print(f"Error in iteration {i+1}: {e}") if times: return { "average_time": sum(times) / len(times), "min_time": min(times), "max_time": max(times), "all_times": times, "sample_response": responses[0] if responses else None, "success_rate": len(times) / iterations * 100 } return {"error": "No successful responses"} def compare_providers(foundry_client: OpenAI, foundry_model: str, azure_client: OpenAI, azure_model: str, test_prompts: List[str]): """Compare performance between Foundry Local and Azure OpenAI.""" results = { "foundry_local": [], "azure_openai": [] } for prompt in test_prompts: print(f"\nTesting prompt: '{prompt}'") # Test Foundry Local foundry_result = benchmark_response_time(foundry_client, foundry_model, prompt) results["foundry_local"].append({ "prompt": prompt, "benchmark": foundry_result }) # Test Azure OpenAI azure_result = benchmark_response_time(azure_client, azure_model, prompt) results["azure_openai"].append({ "prompt": prompt, "benchmark": azure_result }) # Compare results if "error" not in foundry_result and "error" not in azure_result: foundry_time = foundry_result["average_time"] azure_time = azure_result["average_time"] print(f" Foundry Local: {foundry_time:.2f}s") print(f" Azure OpenAI: {azure_time:.2f}s") print(f" Winner: {'Foundry Local' if foundry_time < azure_time else 'Azure OpenAI'}") return results # Example usage benchmark_prompts = [ "What is AI?", "Explain machine learning in simple terms.", "List 3 benefits of edge computing." ] # Initialize clients foundry_client, foundry_model, _ = initialize_client() # azure_client, azure_model = create_azure_client() # Uncomment if Azure is configured for prompt in benchmark_prompts: print(f"\n Benchmarking: '{prompt}'") result = benchmark_response_time(foundry_client, foundry_model, prompt) if "error" not in result: print(f" ⏰ Average time: {result['average_time']:.2f}s") print(f" ⚡ Fastest: {result['min_time']:.2f}s") print(f" Slowest: {result['max_time']:.2f}s") print(f" ✅ Success rate: {result['success_rate']:.1f}%")
# parameter_testing.py def test_temperature_effects(client: OpenAI, model: str, prompt: str): """Test how different temperature values affect responses.""" temperatures = [0.1, 0.5, 0.9] print(f"Testing prompt: '{prompt}'") print("=" * 60) for temp in temperatures: print(f"\n️ Temperature: {temp}") print("-" * 30) try: response = client.chat.completions.create( model=model, messages=[{"role": "user", "content": prompt}], max_tokens=100, temperature=temp ) print(f"Response: {response.choices[0].message.content[:150]}...") except Exception as e: print(f"Error with temperature {temp}: {e}") # Test creative vs analytical prompts creative_prompt = "Write a creative short story about AI." analytical_prompt = "Explain the technical differences between GPT and BERT models." test_temperature_effects(foundry_client, foundry_model, creative_prompt) test_temperature_effects(foundry_client, foundry_model, analytical_prompt)
# health_monitoring.py - Sample 02 patterns def comprehensive_health_check(client: OpenAI, model: str, provider: str) -> Dict: """Perform comprehensive health check of the AI service.""" print(" Comprehensive Health Check") print("=" * 50) health_results = { "provider": provider, "model": model, "timestamp": time.time(), "tests": {} } # Test 1: Model listing try: models_response = client.models.list() available_models = [m.id for m in models_response.data] health_results["tests"]["model_listing"] = { "status": "success", "available_models": available_models, "current_model_available": model in available_models } print(f"✅ Model listing: SUCCESS ({len(available_models)} models)") except Exception as e: health_results["tests"]["model_listing"] = { "status": "failed", "error": str(e) } print(f"❌ Model listing: FAILED - {e}") # Test 2: Basic completion try: start_time = time.time() test_response = client.chat.completions.create( model=model, messages=[{"role": "user", "content": "Say 'Health check successful'"}], max_tokens=10 ) response_time = time.time() - start_time health_results["tests"]["basic_completion"] = { "status": "success", "response_time": response_time, "response": test_response.choices[0].message.content } print(f"✅ Basic completion: SUCCESS ({response_time:.2f}s)") except Exception as e: health_results["tests"]["basic_completion"] = { "status": "failed", "error": str(e) } print(f"❌ Basic completion: FAILED - {e}") # Test 3: Streaming try: start_time = time.time() stream = client.chat.completions.create( model=model, messages=[{"role": "user", "content": "Count to 3"}], max_tokens=20, stream=True ) stream_content = "" chunk_count = 0 for chunk in stream: if chunk.choices[0].delta.content: stream_content += chunk.choices[0].delta.content chunk_count += 1 streaming_time = time.time() - start_time health_results["tests"]["streaming"] = { "status": "success", "response_time": streaming_time, "chunks_received": chunk_count, "content": stream_content.strip() } print(f"✅ Streaming: SUCCESS ({streaming_time:.2f}s, {chunk_count} chunks)") except Exception as e: health_results["tests"]["streaming"] = { "status": "failed", "error": str(e) } print(f"❌ Streaming: FAILED - {e}") # Overall health score successful_tests = sum(1 for test in health_results["tests"].values() if test["status"] == "success") total_tests = len(health_results["tests"]) health_score = (successful_tests / total_tests) * 100 health_results["overall_health"] = { "score": health_score, "successful_tests": successful_tests, "total_tests": total_tests, "status": "healthy" if health_score >= 70 else "degraded" if health_score >= 30 else "unhealthy" } print(f"\n Overall Health: {health_score:.1f}% ({health_results['overall_health']['status'].upper()})") return health_results # Usage example client, model, provider = initialize_client() health_status = comprehensive_health_check(client, model, provider)
# config_validator.py def validate_environment_configuration() -> Dict: """Validate environment configuration for both providers.""" validation_results = { "foundry_local": {}, "azure_openai": {}, "recommendations": [] } # Check Foundry Local configuration foundry_sdk_available = FOUNDRY_SDK_AVAILABLE base_url = os.environ.get("BASE_URL", "http://localhost:8000") validation_results["foundry_local"] = { "sdk_available": foundry_sdk_available, "base_url": base_url, "model": os.environ.get("MODEL", "phi-4-mini"), "api_key": bool(os.environ.get("API_KEY")) } if not foundry_sdk_available: validation_results["recommendations"].append( "Install Foundry Local SDK: pip install foundry-local-sdk" ) # Check Azure OpenAI configuration azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT") azure_api_key = os.environ.get("AZURE_OPENAI_API_KEY") azure_api_version = os.environ.get("AZURE_OPENAI_API_VERSION") validation_results["azure_openai"] = { "endpoint_configured": bool(azure_endpoint), "api_key_configured": bool(azure_api_key), "api_version": azure_api_version or "2024-08-01-preview", "model": os.environ.get("MODEL", "your-deployment-name") } if azure_endpoint and not azure_api_key: validation_results["recommendations"].append( "Azure endpoint is set but API key is missing" ) # Overall assessment can_use_foundry = foundry_sdk_available or base_url can_use_azure = azure_endpoint and azure_api_key if not can_use_foundry and not can_use_azure: validation_results["recommendations"].append( "No valid configuration found. Set up either Foundry Local or Azure OpenAI." ) validation_results["summary"] = { "foundry_ready": can_use_foundry, "azure_ready": can_use_azure, "total_options": sum([can_use_foundry, can_use_azure]) } return validation_results # Display configuration status config_status = validate_environment_configuration() print("⚙️ Environment Configuration Status") print("=" * 40) print(f" Foundry Local Ready: {'✅' if config_status['summary']['foundry_ready'] else '❌'}") print(f" Azure OpenAI Ready: {'✅' if config_status['summary']['azure_ready'] else '❌'}") print(f" Available Options: {config_status['summary']['total_options']}") if config_status["recommendations"]: print("\n Recommendations:") for rec in config_status["recommendations"]: print(f" • {rec}")
完整的配置参考,用于设置两个提供商:
# config_reference.py - Sample 02 patterns import os from typing import Dict, Optional class ConfigurationManager: """Manages environment configuration for multi-provider setup.""" @staticmethod def get_foundry_config() -> Dict[str, Optional[str]]: """Get Foundry Local configuration from environment.""" return { "MODEL": os.environ.get("MODEL", "phi-4-mini"), "BASE_URL": os.environ.get("BASE_URL", "http://localhost:8000"), "API_KEY": os.environ.get("API_KEY", ""), } @staticmethod def get_azure_config() -> Dict[str, Optional[str]]: """Get Azure OpenAI configuration from environment.""" return { "AZURE_OPENAI_ENDPOINT": os.environ.get("AZURE_OPENAI_ENDPOINT"), "AZURE_OPENAI_API_KEY": os.environ.get("AZURE_OPENAI_API_KEY"), "AZURE_OPENAI_API_VERSION": os.environ.get("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"), "MODEL": os.environ.get("MODEL", "your-deployment-name"), } @staticmethod def display_current_config(): """Display current configuration status.""" print("⚙️ Current Configuration") print("=" * 40) foundry_config = ConfigurationManager.get_foundry_config() azure_config = ConfigurationManager.get_azure_config() print(" Foundry Local:") for key, value in foundry_config.items(): display_value = value if value else "(not set)" if key == "API_KEY" and value: display_value = "***" + value[-4:] if len(value) > 4 else "***" print(f" {key}: {display_value}") print("\n Azure OpenAI:") for key, value in azure_config.items(): display_value = value if value else "(not set)" if "KEY" in key and value: display_value = "***" + value[-4:] if len(value) > 4 else "***" print(f" {key}: {display_value}") # Determine active provider azure_ready = azure_config["AZURE_OPENAI_ENDPOINT"] and azure_config["AZURE_OPENAI_API_KEY"] foundry_ready = True # Foundry can always fallback to defaults print(f"\n Provider Status:") print(f" Azure OpenAI: {'✅ Ready' if azure_ready else '❌ Not configured'}") print(f" Foundry Local: {'✅ Ready' if foundry_ready else '❌ Not available'}") print(f" Active: {'Azure OpenAI' if azure_ready else 'Foundry Local'}") # Display current configuration config_manager = ConfigurationManager() config_manager.display_current_config()
Windows命令提示符设置:
REM Foundry Local configuration set MODEL=phi-4-mini set BASE_URL=http://localhost:8000 set API_KEY= REM Azure OpenAI configuration (alternative) set AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com set AZURE_OPENAI_API_KEY=your-api-key-here set AZURE_OPENAI_API_VERSION=2024-08-01-preview set MODEL=your-deployment-name REM Run the sample python samples\02\sdk_quickstart.py
PowerShell设置:
# Foundry Local configuration $env:MODEL = "phi-4-mini" $env:BASE_URL = "http://localhost:8000" $env:API_KEY = "" # Azure OpenAI configuration (alternative) $env:AZURE_OPENAI_ENDPOINT = "https://your-resource.openai.azure.com" $env:AZURE_OPENAI_API_KEY = "your-api-key-here" $env:AZURE_OPENAI_API_VERSION = "2024-08-01-preview" $env:MODEL = "your-deployment-name" # Run the sample python samples/02/sdk_quickstart.py
构建一个能够无缝切换提供商的完整应用程序:
# exercise_1_multi_provider.py from openai import OpenAI from typing import Tuple, Dict, Any import time class MultiProviderSDKDemo: """Demonstrates seamless switching between Foundry Local and Azure OpenAI.""" def __init__(self): self.clients = {} self.models = {} self.setup_clients() def setup_clients(self): """Initialize all available clients.""" # Try to initialize Foundry Local try: foundry_client, foundry_model, _ = initialize_client() self.clients["foundry"] = foundry_client self.models["foundry"] = foundry_model print("✅ Foundry Local client ready") except Exception as e: print(f"❌ Foundry Local setup failed: {e}") # Try to initialize Azure OpenAI try: if os.environ.get("AZURE_OPENAI_ENDPOINT") and os.environ.get("AZURE_OPENAI_API_KEY"): azure_client, azure_model = create_azure_client() self.clients["azure"] = azure_client self.models["azure"] = azure_model print("✅ Azure OpenAI client ready") except Exception as e: print(f"❌ Azure OpenAI setup failed: {e}") def compare_providers(self, prompt: str, max_tokens: int = 100) -> Dict[str, Any]: """Compare responses from all available providers.""" results = {} for provider_name, client in self.clients.items(): model = self.models[provider_name] print(f"\nTesting {provider_name} ({model})...") start_time = time.time() try: response = client.chat.completions.create( model=model, messages=[{"role": "user", "content": prompt}], max_tokens=max_tokens ) response_time = time.time() - start_time results[provider_name] = { "model": model, "response": response.choices[0].message.content, "response_time": response_time, "status": "success" } print(f" ✅ Success ({response_time:.2f}s)") except Exception as e: results[provider_name] = { "model": model, "error": str(e), "status": "failed" } print(f" ❌ Failed: {e}") return results def streaming_comparison(self, prompt: str, max_tokens: int = 150): """Compare streaming responses from providers.""" for provider_name, client in self.clients.items(): model = self.models[provider_name] print(f"\n Streaming from {provider_name} ({model}):") print("-" * 50) try: stream = client.chat.completions.create( model=model, messages=[{"role": "user", "content": prompt}], max_tokens=max_tokens, stream=True ) for chunk in stream: if chunk.choices[0].delta.content: print(chunk.choices[0].delta.content, end="", flush=True) print("\n") except Exception as e: print(f"Streaming failed: {e}") # Run Exercise 1 exercise_1 = MultiProviderSDKDemo() test_prompt = "Explain the benefits of running AI models locally versus in the cloud." print(f"️ Exercise 1: Multi-Provider Comparison") print(f"Prompt: {test_prompt}") print("=" * 60) comparison_results = exercise_1.compare_providers(test_prompt) exercise_1.streaming_comparison(test_prompt)
# exercise_2_conversation.py class AdvancedConversationManager: """Advanced conversation management with multiple features.""" def __init__(self, client: OpenAI, model: str): self.client = client self.model = model self.conversations = {} # Multiple conversation sessions def create_conversation(self, session_id: str, system_prompt: str = None) -> str: """Create a new conversation session.""" self.conversations[session_id] = { "messages": [], "created_at": time.time(), "message_count": 0 } if system_prompt: self.conversations[session_id]["messages"].append({ "role": "system", "content": system_prompt }) return f"Conversation {session_id} created" def send_message(self, session_id: str, message: str, temperature: float = 0.7, max_tokens: int = 200) -> Dict[str, Any]: """Send message in a specific conversation session.""" if session_id not in self.conversations: return {"error": f"Conversation {session_id} not found"} conversation = self.conversations[session_id] conversation["messages"].append({"role": "user", "content": message}) try: response = self.client.chat.completions.create( model=self.model, messages=conversation["messages"], temperature=temperature, max_tokens=max_tokens ) assistant_message = response.choices[0].message.content conversation["messages"].append({ "role": "assistant", "content": assistant_message }) conversation["message_count"] += 2 # User + assistant return { "session_id": session_id, "response": assistant_message, "message_count": conversation["message_count"], "status": "success" } except Exception as e: return {"error": str(e), "session_id": session_id} def get_conversation_summary(self, session_id: str) -> Dict[str, Any]: """Get summary of conversation session.""" if session_id not in self.conversations: return {"error": f"Conversation {session_id} not found"} conversation = self.conversations[session_id] return { "session_id": session_id, "message_count": conversation["message_count"], "created_at": conversation["created_at"], "duration": time.time() - conversation["created_at"], "has_system_prompt": len(conversation["messages"]) > 0 and conversation["messages"][0]["role"] == "system" } def export_conversation(self, session_id: str) -> str: """Export conversation as formatted text.""" if session_id not in self.conversations: return f"Conversation {session_id} not found" conversation = self.conversations[session_id] export_text = f"Conversation Export: {session_id}\n" export_text += "=" * 50 + "\n\n" for msg in conversation["messages"]: role = msg["role"].title() content = msg["content"] export_text += f"{role}: {content}\n\n" return export_text # Run Exercise 2 client, model, provider = initialize_client() conv_manager = AdvancedConversationManager(client, model) # Create multiple conversation sessions print(" Exercise 2: Advanced Conversation Management") print("=" * 60) # Technical discussion conv_manager.create_conversation("tech_discussion", "You are a technical expert explaining AI concepts clearly.") # Creative session conv_manager.create_conversation("creative_session", "You are a creative writing assistant helping with storytelling.") # Test conversations tech_questions = [ "What is the difference between inference and training?", "How does quantization improve model performance?" ] creative_prompts = [ "Start a story about an AI that lives on an edge device.", "Continue the story with a plot twist." ] # Technical conversation print("\n Technical Discussion:") for question in tech_questions: result = conv_manager.send_message("tech_discussion", question) print(f"Q: {question}") print(f"A: {result['response'][:100]}...\n") # Creative conversation print(" Creative Session:") for prompt in creative_prompts: result = conv_manager.send_message("creative_session", prompt, temperature=0.9) print(f"Prompt: {prompt}") print(f"Response: {result['response'][:100]}...\n") # Show conversation summaries print(" Conversation Summaries:") for session_id in conv_manager.conversations.keys(): summary = conv_manager.get_conversation_summary(session_id) print(f" {session_id}: {summary['message_count']} messages, {summary['duration']:.1f}s")
# exercise_3_monitoring.py class ProductionHealthMonitor: """Production-ready health monitoring for AI services.""" def __init__(self): self.health_history = [] self.alert_thresholds = { "response_time": 5.0, "error_rate": 10.0, "availability": 95.0 } def run_comprehensive_check(self, client: OpenAI, model: str, provider: str) -> Dict[str, Any]: """Run comprehensive health check with detailed reporting.""" check_results = { "timestamp": time.time(), "provider": provider, "model": model, "tests": {}, "overall_health": "unknown" } # Test 1: Basic connectivity connectivity_result = self._test_connectivity(client) check_results["tests"]["connectivity"] = connectivity_result # Test 2: Model availability model_result = self._test_model_availability(client, model) check_results["tests"]["model_availability"] = model_result # Test 3: Response time benchmark performance_result = self._test_performance(client, model) check_results["tests"]["performance"] = performance_result # Test 4: Stress test stress_result = self._test_stress(client, model) check_results["tests"]["stress_test"] = stress_result # Calculate overall health check_results["overall_health"] = self._calculate_health_score(check_results["tests"]) # Store for trending self.health_history.append(check_results) return check_results def _test_connectivity(self, client: OpenAI) -> Dict[str, Any]: """Test basic service connectivity.""" try: start_time = time.time() models = client.models.list() response_time = time.time() - start_time return { "status": "success", "response_time": response_time, "models_count": len(models.data) } except Exception as e: return {"status": "failed", "error": str(e)} def _test_model_availability(self, client: OpenAI, model: str) -> Dict[str, Any]: """Test specific model availability.""" try: response = client.chat.completions.create( model=model, messages=[{"role": "user", "content": "Health check"}], max_tokens=5 ) return { "status": "success", "model": model, "response_received": bool(response.choices[0].message.content) } except Exception as e: return {"status": "failed", "error": str(e)} def _test_performance(self, client: OpenAI, model: str) -> Dict[str, Any]: """Test response time performance.""" response_times = [] for i in range(3): try: start_time = time.time() client.chat.completions.create( model=model, messages=[{"role": "user", "content": f"Test {i+1}"}], max_tokens=10 ) response_time = time.time() - start_time response_times.append(response_time) except Exception: pass if response_times: avg_time = sum(response_times) / len(response_times) return { "status": "success", "average_response_time": avg_time, "min_time": min(response_times), "max_time": max(response_times), "within_threshold": avg_time < self.alert_thresholds["response_time"] } else: return {"status": "failed", "error": "No successful responses"} def _test_stress(self, client: OpenAI, model: str) -> Dict[str, Any]: """Test service under concurrent requests.""" import concurrent.futures def single_request(): try: client.chat.completions.create( model=model, messages=[{"role": "user", "content": "Stress test"}], max_tokens=5 ) return True except Exception: return False # Run 5 concurrent requests with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: futures = [executor.submit(single_request) for _ in range(5)] results = [future.result() for future in concurrent.futures.as_completed(futures)] success_rate = (sum(results) / len(results)) * 100 return { "status": "success" if success_rate > 80 else "degraded", "concurrent_requests": len(results), "success_rate": success_rate, "within_threshold": success_rate >= self.alert_thresholds["availability"] } def _calculate_health_score(self, tests: Dict[str, Any]) -> str: """Calculate overall health score.""" successful_tests = sum(1 for test in tests.values() if test["status"] == "success") total_tests = len(tests) health_percentage = (successful_tests / total_tests) * 100 if health_percentage >= 90: return "healthy" elif health_percentage >= 70: return "degraded" else: return "unhealthy" def generate_health_report(self) -> str: """Generate formatted health report.""" if not self.health_history: return "No health data available" latest = self.health_history[-1] report = f"Health Report - {time.ctime(latest['timestamp'])}\n" report += "=" * 60 + "\n" report += f"Provider: {latest['provider']}\n" report += f"Model: {latest['model']}\n" report += f"Overall Health: {latest['overall_health'].upper()}\n\n" for test_name, test_result in latest["tests"].items(): status_icon = "✅" if test_result["status"] == "success" else "❌" report += f"{status_icon} {test_name.replace('_', ' ').title()}: {test_result['status']}\n" return report # Run Exercise 3 client, model, provider = initialize_client() health_monitor = ProductionHealthMonitor() print(" Exercise 3: Production Health Monitoring") print("=" * 60) health_results = health_monitor.run_comprehensive_check(client, model, provider) print(health_monitor.generate_health_report())
在本节课程中,您已经掌握了:
客户端工厂模式:
Environment Detection → Provider Selection → Client Creation → Model Configuration ↓ ↓ ↓ ↓ Azure/Local Azure OpenAI/ OpenAI Client Model Selection Credentials Foundry Local Initialization and Validation
流式响应流程:
User Input → Chat Completion → Stream Processing → Real-time Display ↓ ↓ ↓ ↓ Prompt Stream=True Token Chunks Progressive UI
| 场景 | 推荐提供商 | 原因 |
|---|---|---|
| 开发 | Foundry Local | 快速迭代,无API成本 |
| 隐私敏感 | Foundry Local | 数据不离开设备 |
| 高容量生产 | Azure OpenAI | 更好的扩展性,企业级SLA |
| 最新模型 | Azure OpenAI | 获取最新模型版本 |
| 离线需求 | Foundry Local | 无需互联网依赖 |
| 成本敏感 | Foundry Local | 无按令牌收费 |
REM Foundry Local (default) set MODEL=phi-4-mini set BASE_URL=http://localhost:8000 set API_KEY= REM Azure OpenAI (cloud) set AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com set AZURE_OPENAI_API_KEY=your-api-key set AZURE_OPENAI_API_VERSION=2024-08-01-preview set MODEL=your-deployment-name
常见问题:
REM Issue: Could not use Foundry SDK pip install foundry-local-sdk REM Issue: Connection refused foundry service status foundry model run phi-4-mini REM Issue: Azure authentication failed echo %AZURE_OPENAI_ENDPOINT% echo %AZURE_OPENAI_API_KEY% REM Issue: Model not found foundry model list curl http://localhost:8000/v1/models
现在,您已经具备构建复杂AI应用程序的能力,这些应用程序能够无缝集成本地和云端AI功能,为每个具体用例选择合适的提供商,同时保持一致的开发模式。