diff --git a/config/profiles/lmdeploy.yaml b/config/profiles/lmdeploy.yaml
new file mode 100644
index 0000000..78d3356
--- /dev/null
+++ b/config/profiles/lmdeploy.yaml
@@ -0,0 +1,173 @@
+# LMDeploy inference platform profile
+name: lmdeploy
+home: "https://github.com/InternLM/lmdeploy"
+version: "1.0"
+display_name: "LMDeploy"
+description: "LMDeploy efficient inference and serving for LLMs and VLMs"
+
+# Routing configuration
+routing:
+  prefixes:
+    - lmdeploy
+
+# API compatibility
+api:
+  openai_compatible: true
+  paths:
+    # Health and system endpoints
+    - /health              # 0: health check (HTTP 200, empty body)
+
+    # Model management
+    - /v1/models           # 1: list models (OpenAI-compatible ModelList)
+
+    # Text generation endpoints (OpenAI-compatible)
+    - /v1/chat/completions # 2: chat completions with SSE streaming
+    - /v1/completions      # 3: text completions
+
+    # LMDeploy-specific endpoints
+    - /v1/encode           # 4: token encoding (LMDeploy-specific)
+    - /generate            # 5: native generation endpoint
+    - /pooling             # 6: reward/score pooling (the real embeddings path)
+    # NOTE: /v1/embeddings is intentionally excluded — LMDeploy returns HTTP 400
+    # unconditionally for all backends on that path.
+
+    # Sleep state probe (LMDeploy-specific, used for auto-detection)
+    - /is_sleeping         # 7: sleep state probe
+
+  model_discovery_path: /v1/models
+  health_check_path: /health
+
+# Platform characteristics
+characteristics:
+  timeout: 2m
+  max_concurrent_requests: 100
+  default_priority: 82      # between SGLang (85) and vLLM (80)
+  streaming_support: true
+
+# Detection hints for auto-discovery
+detection:
+  path_indicators:
+    - "/v1/encode"    # LMDeploy-specific token encoding
+    - "/generate"     # LMDeploy native generation
+    - "/pooling"      # LMDeploy reward/score path
+    - "/is_sleeping"  # distinct from vLLM and SGLang
+  default_ports:
+    - 23333           # api_server default (not 8000 which is proxy_server)
+
+# Request/response handling
+request:
+  model_field_paths:
+    - "model"
+  response_format: "lmdeploy"
+  parsing_rules:
+    chat_completions_path: "/v1/chat/completions"
+    completions_path: "/v1/completions"
+    model_field_name: "model"
+    supports_streaming: true
+
+# Path indices for specific functions
+path_indices:
+  health: 0
+  models: 1
+  chat_completions: 2
+  completions: 3
+
+# Model handling
+models:
+  name_format: "{{.Name}}"
+  capability_patterns:
+    chat:
+      - "*-Chat-*"
+      - "*-Instruct*"
+      - "*-chat-*"
+    vision:
+      - "*vision*"
+      - "*llava*"
+      - "*VL*"
+    code:
+      - "*code*"
+      - "*Code*"
+  # Context window patterns for common LMDeploy models
+  context_patterns:
+    - pattern: "*llama-3.1*"
+      context: 131072
+    - pattern: "*llama-3*"
+      context: 8192
+    - pattern: "*internlm2_5*"
+      context: 32768
+    - pattern: "*internlm2*"
+      context: 32768
+    - pattern: "*mistral*"
+      context: 32768
+    - pattern: "*qwen2*"
+      context: 32768
+
+# Resource management
+resources:
+  model_sizes:
+    - patterns: ["*70b*", "*72b*"]
+      min_memory_gb: 140
+      recommended_memory_gb: 160
+      min_gpu_memory_gb: 140
+      estimated_load_time_ms: 60000
+    - patterns: ["*34b*", "*33b*", "*30b*"]
+      min_memory_gb: 70
+      recommended_memory_gb: 80
+      min_gpu_memory_gb: 70
+      estimated_load_time_ms: 45000
+    - patterns: ["*13b*", "*14b*"]
+      min_memory_gb: 30
+      recommended_memory_gb: 40
+      min_gpu_memory_gb: 30
+      estimated_load_time_ms: 30000
+    - patterns: ["*7b*", "*8b*"]
+      min_memory_gb: 16
+      recommended_memory_gb: 24
+      min_gpu_memory_gb: 16
+      estimated_load_time_ms: 20000
+    - patterns: ["*3b*"]
+      min_memory_gb: 8
+      recommended_memory_gb: 12
+      min_gpu_memory_gb: 8
+      estimated_load_time_ms: 15000
+    - patterns: ["*1b*", "*1.1b*", "*1.5b*"]
+      min_memory_gb: 4
+      recommended_memory_gb: 8
+      min_gpu_memory_gb: 4
+      estimated_load_time_ms: 10000
+
+  defaults:
+    min_memory_gb: 8
+    recommended_memory_gb: 16
+    min_gpu_memory_gb: 8
+    requires_gpu: true
+    estimated_load_time_ms: 30000
+
+  concurrency_limits:
+    - min_memory_gb: 100
+      max_concurrent: 10
+    - min_memory_gb: 50
+      max_concurrent: 20
+    - min_memory_gb: 20
+      max_concurrent: 50
+    - min_memory_gb: 0
+      max_concurrent: 100
+
+  timeout_scaling:
+    base_timeout_seconds: 120
+    load_time_buffer: true
+
+# Metrics extraction for LMDeploy responses
+metrics:
+  extraction:
+    enabled: true
+    source: response_body
+    format: json
+    paths:
+      model: "$.model"
+      finish_reason: "$.choices[0].finish_reason"
+      input_tokens: "$.usage.prompt_tokens"
+      output_tokens: "$.usage.completion_tokens"
+      total_tokens: "$.usage.total_tokens"
+    calculations:
+      is_complete: 'len(finish_reason) > 0'
diff --git a/docs/content/api-reference/lmdeploy.md b/docs/content/api-reference/lmdeploy.md
new file mode 100644
index 0000000..5c3158d
--- /dev/null
+++ b/docs/content/api-reference/lmdeploy.md
@@ -0,0 +1,313 @@
+# LMDeploy API
+
+Proxy endpoints for LMDeploy inference servers. Available through the `/olla/lmdeploy/` prefix.
+
+## Endpoints Overview
+
+| Method | URI | Description |
+|--------|-----|-------------|
+| GET | `/olla/lmdeploy/health` | Health check |
+| GET | `/olla/lmdeploy/v1/models` | List available models |
+| POST | `/olla/lmdeploy/v1/chat/completions` | Chat completion |
+| POST | `/olla/lmdeploy/v1/completions` | Text completion |
+| POST | `/olla/lmdeploy/v1/encode` | Token encoding |
+| POST | `/olla/lmdeploy/generate` | Native generation |
+| POST | `/olla/lmdeploy/pooling` | Reward/score pooling |
+| GET | `/olla/lmdeploy/is_sleeping` | Sleep state probe |
+
+!!! warning "/v1/embeddings is not supported"
+    LMDeploy returns HTTP 400 on `/v1/embeddings` unconditionally across all backends. Use `/pooling` for reward/score tasks instead.
+
+---
+
+## GET /olla/lmdeploy/health
+
+Check LMDeploy server health.
+
+### Request
+
+```bash
+curl http://localhost:40114/olla/lmdeploy/health
+```
+
+### Response
+
+```json
+{}
+```
+
+LMDeploy returns an empty body with HTTP 200 on a healthy server.
+
+---
+
+## GET /olla/lmdeploy/v1/models
+
+List models available on the LMDeploy server.
+
+### Request
+
+```bash
+curl http://localhost:40114/olla/lmdeploy/v1/models
+```
+
+### Response
+
+```json
+{
+  "object": "list",
+  "data": [
+    {
+      "id": "internlm/internlm2_5-7b-chat",
+      "object": "model",
+      "created": 1705334400,
+      "owned_by": "lmdeploy",
+      "root": "internlm/internlm2_5-7b-chat",
+      "permission": []
+    }
+  ]
+}
+```
+
+---
+
+## POST /olla/lmdeploy/v1/chat/completions
+
+OpenAI-compatible chat completion.
+
+### Request
+
+```bash
+curl -X POST http://localhost:40114/olla/lmdeploy/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "internlm/internlm2_5-7b-chat",
+    "messages": [
+      {"role": "user", "content": "Explain the TurboMind engine in one paragraph"}
+    ],
+    "temperature": 0.7,
+    "max_tokens": 200,
+    "stream": false
+  }'
+```
+
+### Response
+
+```json
+{
+  "id": "chatcmpl-lmdeploy-abc123",
+  "object": "chat.completion",
+  "created": 1705334400,
+  "model": "internlm/internlm2_5-7b-chat",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "TurboMind is LMDeploy's C++/CUDA inference engine..."
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 18,
+    "completion_tokens": 64,
+    "total_tokens": 82
+  }
+}
+```
+
+### Streaming Response
+
+When `"stream": true`:
+
+```text
+data: {"id":"chatcmpl-lmdeploy-abc123","object":"chat.completion.chunk","created":1705334400,"model":"internlm/internlm2_5-7b-chat","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-lmdeploy-abc123","object":"chat.completion.chunk","created":1705334400,"model":"internlm/internlm2_5-7b-chat","choices":[{"index":0,"delta":{"content":"TurboMind"},"finish_reason":null}]}
+
+...
+
+data: [DONE]
+```
+
+---
+
+## POST /olla/lmdeploy/v1/completions
+
+Text completion (OpenAI-compatible).
+
+### Request
+
+```bash
+curl -X POST http://localhost:40114/olla/lmdeploy/v1/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "internlm/internlm2_5-7b-chat",
+    "prompt": "LMDeploy uses TurboMind because",
+    "max_tokens": 100,
+    "temperature": 0.8,
+    "stream": false
+  }'
+```
+
+### Response
+
+```json
+{
+  "id": "cmpl-lmdeploy-xyz789",
+  "object": "text_completion",
+  "created": 1705334400,
+  "model": "internlm/internlm2_5-7b-chat",
+  "choices": [
+    {
+      "text": " it provides efficient GPU utilisation through continuous batching...",
+      "index": 0,
+      "finish_reason": "stop"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 9,
+    "completion_tokens": 55,
+    "total_tokens": 64
+  }
+}
+```
+
+---
+
+## POST /olla/lmdeploy/v1/encode
+
+Token encoding (LMDeploy-specific). Encodes input text to token IDs without running inference.
+
+### Request
+
+```bash
+curl -X POST http://localhost:40114/olla/lmdeploy/v1/encode \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "internlm/internlm2_5-7b-chat",
+    "input": "Hello, world!"
+  }'
+```
+
+### Response
+
+```json
+{
+  "input_ids": [1, 22172, 29892, 3186, 29991],
+  "length": 5
+}
+```
+
+---
+
+## POST /olla/lmdeploy/generate
+
+LMDeploy's native generation endpoint.
+
+### Request
+
+```bash
+curl -X POST http://localhost:40114/olla/lmdeploy/generate \
+  -H "Content-Type: application/json" \
+  -d '{
+    "inputs": "def fibonacci(n):\n    ",
+    "parameters": {
+      "temperature": 0.2,
+      "max_new_tokens": 100
+    }
+  }'
+```
+
+### Response
+
+```json
+{
+  "generated_text": "def fibonacci(n):\n    if n <= 1:\n        return n\n    return fibonacci(n-1) + fibonacci(n-2)",
+  "finish_reason": "stop"
+}
+```
+
+---
+
+## POST /olla/lmdeploy/pooling
+
+Reward or score pooling for embedding-style tasks. This is the correct path for pooling operations — `/v1/embeddings` is not supported.
+
+### Request
+
+```bash
+curl -X POST http://localhost:40114/olla/lmdeploy/pooling \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "internlm/internlm2_5-7b-chat",
+    "input": "The quick brown fox"
+  }'
+```
+
+### Response
+
+```json
+{
+  "object": "list",
+  "data": [
+    {
+      "object": "embedding",
+      "index": 0,
+      "embedding": [0.0123, -0.0456, 0.0789, ...]
+    }
+  ],
+  "model": "internlm/internlm2_5-7b-chat",
+  "usage": {
+    "prompt_tokens": 5,
+    "total_tokens": 5
+  }
+}
+```
+
+---
+
+## GET /olla/lmdeploy/is_sleeping
+
+Probe whether the LMDeploy engine is in sleep mode. Sleeping instances return HTTP 503 on generation endpoints — Olla's health checker treats this as a transient failure rather than a hard outage.
+
+### Request
+
+```bash
+curl http://localhost:40114/olla/lmdeploy/is_sleeping
+```
+
+### Response
+
+```json
+{"is_sleeping": false}
+```
+
+---
+
+## Configuration Example
+
+```yaml
+discovery:
+  static:
+    endpoints:
+      - url: "http://192.168.0.100:23333"
+        name: "lmdeploy-server"
+        type: "lmdeploy"
+        priority: 82
+        model_url: "/v1/models"
+        health_check_url: "/health"
+        check_interval: 5s
+        check_timeout: 2s
+```
+
+The default port for `lmdeploy serve api_server` is **23333**. The `proxy_server` component runs on 8000 but does not expose `/health` and is not supported by Olla.
+
+## Response Headers
+
+All responses include:
+
+- `X-Olla-Endpoint` - Backend endpoint name (e.g., `lmdeploy-server`)
+- `X-Olla-Model` - Model used for the request
+- `X-Olla-Backend-Type` - Always `lmdeploy` for these endpoints
+- `X-Olla-Response-Time` - Total processing time
diff --git a/docs/content/api-reference/overview.md b/docs/content/api-reference/overview.md
index 242083d..d9a0db6 100644
--- a/docs/content/api-reference/overview.md
+++ b/docs/content/api-reference/overview.md
@@ -52,6 +52,12 @@ Proxy endpoints for OpenAI-compatible services.
 
 - `/olla/openai/*` - OpenAI API endpoints
 
+### [LMDeploy API](lmdeploy.md)
+Proxy endpoints for LMDeploy inference servers.
+
+- `/olla/lmdeploy/*` - LMDeploy API endpoints
+- OpenAI-compatible endpoints plus token encoding and reward pooling
+
 ### [vLLM API](vllm.md)
 Proxy endpoints for vLLM servers.
 
@@ -147,7 +153,7 @@ All responses include:
 | `X-Olla-Request-ID` | Unique request identifier |
 | `X-Olla-Endpoint` | Backend endpoint name |
 | `X-Olla-Model` | Model used (if applicable) |
-| `X-Olla-Backend-Type` | Provider type, examples: <br> `ollama/lm-studio/llamacpp/openai/openai-compatible/vllm/sglang/lemonade` |
+| `X-Olla-Backend-Type` | Provider type, examples: <br> `ollama/lm-studio/llamacpp/openai/openai-compatible/vllm/sglang/lemonade/lmdeploy` |
 | `X-Olla-Response-Time` | Total processing time |
 | `X-Olla-Routing-Strategy` | Routing strategy used (when model routing is active) |
 | `X-Olla-Routing-Decision` | Routing decision made (routed/fallback/rejected) |
diff --git a/docs/content/concepts/overview.md b/docs/content/concepts/overview.md
index ed0f8ec..3726a46 100644
--- a/docs/content/concepts/overview.md
+++ b/docs/content/concepts/overview.md
@@ -66,7 +66,7 @@ Profiles optimise response handling for specific workload patterns.
 ### [Profile System](profile-system.md)
 Provider-specific configuration templates:
 
-- Pre-configured profiles for Ollama, LM Studio, vLLM, SGLang, Lemonade SDK, LiteLLM, OpenAI
+- Pre-configured profiles for Ollama, LM Studio, LMDeploy, vLLM, SGLang, Lemonade SDK, LiteLLM, OpenAI
 - Custom header mappings and endpoint patterns
 - Model format converters for each provider
 
diff --git a/docs/content/faq.md b/docs/content/faq.md
index 036f589..ef1d6d5 100644
--- a/docs/content/faq.md
+++ b/docs/content/faq.md
@@ -10,7 +10,7 @@ keywords: olla faq, troubleshooting, common questions, proxy help
 
 ### What is Olla?
 
-Olla is a high-performance proxy and load balancer specifically designed for LLM infrastructure. It intelligently routes requests across multiple LLM backends (Ollama, LM Studio, vLLM, SGLang, Lemonade SDK, LiteLLM, and OpenAI-compatible endpoints) while providing load balancing, health checking, and unified model management.
+Olla is a high-performance proxy and load balancer specifically designed for LLM infrastructure. It intelligently routes requests across multiple LLM backends (Ollama, LM Studio, LMDeploy, vLLM, SGLang, Lemonade SDK, LiteLLM, and OpenAI-compatible endpoints) while providing load balancing, health checking, and unified model management.
 
 See how Olla compares to [other tools](compare/overview.md) in the ecosystem.
 
diff --git a/docs/content/index.md b/docs/content/index.md
index dc9f6bd..7c918c1 100644
--- a/docs/content/index.md
+++ b/docs/content/index.md
@@ -16,7 +16,7 @@ keywords: llm proxy, ollama proxy, lm studio proxy, vllm proxy, sglang, lemonade
     <a href="https://github.com/vllm-project/vllm"><img src="https://img.shields.io/badge/vLLM-native-lightgreen.svg" alt="vLLM: Native Support"></a>
     <a href="https://github.com/sgl-project/sglang"><img src="https://img.shields.io/badge/SGLang-native-lightgreen.svg" alt="SGLang: Native Support"></a>
     <a href="https://github.com/BerriAI/litellm"><img src="https://img.shields.io/badge/LiteLLM-native-lightgreen.svg" alt="LiteLLM: Native Support"></a>
-    <a href="https://github.com/InternLM/lmdeploy"><img src="https://img.shields.io/badge/LM Deploy-openai-lightblue.svg" alt="LM Deploy: OpenAI Compatible"></a> <br/> 
+    <a href="https://github.com/InternLM/lmdeploy"><img src="https://img.shields.io/badge/LM Deploy-native-lightgreen.svg" alt="LM Deploy: Native Support"></a> <br/> 
     <a href="https://github.com/waybarrios/vllm-mlx/"><img src="https://img.shields.io/badge/vLLM--MLX-native-lightgreen.svg" alt="vLLM-MLX: Native Support"></a>
     <a href="https://docs.docker.com/ai/model-runner/"><img src="https://img.shields.io/badge/Docker Model Runner-native-lightgreen.svg" alt="Docker Model Runner: Native Support"></a><br/>
     <a href="https://ollama.com"><img src="https://img.shields.io/badge/Ollama-native-lightgreen.svg" alt="Ollama: Native Support"></a>
@@ -97,7 +97,7 @@ Olla provides detailed response headers for observability:
 |--------|-------------|
 | `X-Olla-Endpoint` | Backend endpoint name |
 | `X-Olla-Model` | Model used for the request |
-| `X-Olla-Backend-Type` | Backend type (ollama/openai/openai-compatible/lm-studio/llamacpp/vllm/sglang/lemonade) |
+| `X-Olla-Backend-Type` | Backend type (ollama/openai/openai-compatible/lm-studio/llamacpp/vllm/sglang/lemonade/lmdeploy) |
 | `X-Olla-Request-ID` | Unique request identifier |
 | `X-Olla-Response-Time` | Total processing time |
 
diff --git a/docs/content/integrations/backend/lmdeploy.md b/docs/content/integrations/backend/lmdeploy.md
new file mode 100644
index 0000000..afcbfd6
--- /dev/null
+++ b/docs/content/integrations/backend/lmdeploy.md
@@ -0,0 +1,377 @@
+---
+title: LMDeploy Integration - High-Performance LLM Inference with Olla
+description: Configure LMDeploy with Olla proxy for efficient LLM serving. TurboMind engine, OpenAI-compatible API, VLM support, and GPU-optimised inference from InternLM.
+keywords: LMDeploy, Olla proxy, TurboMind, InternLM, LLM inference, GPU optimisation, high throughput
+---
+
+# LMDeploy Integration
+
+<table>
+    <tr>
+        <th>Home</th>
+        <td><a href="https://github.com/InternLM/lmdeploy">github.com/InternLM/lmdeploy</a></td>
+    </tr>
+    <tr>
+        <th>Since</th>
+        <td>Olla <code>v0.0.21</code></td>
+    </tr>
+    <tr>
+        <th>Type</th>
+        <td><code>lmdeploy</code> (use in <a href="/olla/configuration/overview/#endpoint-configuration">endpoint configuration</a>)</td>
+    </tr>
+    <tr>
+        <th>Profile</th>
+        <td><code>lmdeploy.yaml</code> (see <a href="https://github.com/thushan/olla/blob/main/config/profiles/lmdeploy.yaml">latest</a>)</td>
+    </tr>
+    <tr>
+        <th>Features</th>
+        <td>
+            <ul>
+                <li>Proxy Forwarding</li>
+                <li>Health Check (native)</li>
+                <li>Model Unification</li>
+                <li>Model Detection &amp; Normalisation</li>
+                <li>OpenAI API Compatibility</li>
+                <li>Token Encoding API</li>
+                <li>Reward/Score Pooling</li>
+                <li>VLM Inference (same <code>api_server</code>)</li>
+            </ul>
+        </td>
+    </tr>
+    <tr>
+        <th>Unsupported</th>
+        <td>
+            <ul>
+                <li><code>/v1/embeddings</code> (returns HTTP 400 — use <code>/pooling</code>)</li>
+                <li><code>proxy_server</code> component (no <code>/health</code> endpoint)</li>
+                <li>Model Management (loading/unloading)</li>
+            </ul>
+        </td>
+    </tr>
+    <tr>
+        <th>Attributes</th>
+        <td>
+            <ul>
+                <li>OpenAI Compatible</li>
+                <li>GPU Optimised (TurboMind C++/CUDA engine)</li>
+                <li>Continuous Batching</li>
+                <li>VLM Support</li>
+            </ul>
+        </td>
+    </tr>
+    <tr>
+        <th>Prefixes</th>
+        <td>
+            <ul>
+                <li><code>/lmdeploy</code> (see <a href="/olla/concepts/profile-system/#routing-prefixes">Routing Prefixes</a>)</li>
+            </ul>
+        </td>
+    </tr>
+    <tr>
+        <th>Endpoints</th>
+        <td>
+            See <a href="#endpoints-supported">below</a>
+        </td>
+    </tr>
+</table>
+
+## Configuration
+
+### Basic Setup
+
+Register an LMDeploy `api_server` instance with Olla:
+
+```yaml
+discovery:
+  static:
+    endpoints:
+      - url: "http://localhost:23333"
+        name: "local-lmdeploy"
+        type: "lmdeploy"
+        priority: 82
+        model_url: "/v1/models"
+        health_check_url: "/health"
+        check_interval: 5s
+        check_timeout: 2s
+```
+
+The default port for `lmdeploy serve api_server` is **23333**. Register individual `api_server` instances directly — do not point Olla at the `proxy_server` component, which lacks a `/health` endpoint and only forwards a subset of routes.
+
+### Authentication
+
+LMDeploy supports optional Bearer-token authentication via the `--api-keys` flag. Configure the token in Olla's endpoint headers so it is forwarded on every proxied request:
+
+```yaml
+discovery:
+  static:
+    endpoints:
+      - url: "http://gpu-server:23333"
+        name: "lmdeploy-prod"
+        type: "lmdeploy"
+        priority: 82
+        health_check_url: "/health"
+        check_interval: 10s
+        check_timeout: 5s
+        headers:
+          Authorization: "Bearer ${LMDEPLOY_API_KEY}"
+```
+
+The `/health` endpoint is auth-exempt on LMDeploy, so health checks will succeed even when a key is required for inference.
+
+### Multiple Instances
+
+```yaml
+discovery:
+  static:
+    endpoints:
+      - url: "http://gpu1:23333"
+        name: "lmdeploy-1"
+        type: "lmdeploy"
+        priority: 100
+
+      - url: "http://gpu2:23333"
+        name: "lmdeploy-2"
+        type: "lmdeploy"
+        priority: 100
+
+proxy:
+  engine: "olla"
+  load_balancer: "least-connections"
+```
+
+## Endpoints Supported
+
+<table>
+  <tr>
+    <th style="text-align: left;">Path</th>
+    <th style="text-align: left;">Description</th>
+  </tr>
+  <tr>
+    <td><code>/health</code></td>
+    <td>Health Check</td>
+  </tr>
+  <tr>
+    <td><code>/v1/models</code></td>
+    <td>List Models (OpenAI format)</td>
+  </tr>
+  <tr>
+    <td><code>/v1/chat/completions</code></td>
+    <td>Chat Completions (OpenAI format)</td>
+  </tr>
+  <tr>
+    <td><code>/v1/completions</code></td>
+    <td>Text Completions (OpenAI format)</td>
+  </tr>
+  <tr>
+    <td><code>/v1/encode</code></td>
+    <td>Token Encoding (LMDeploy-specific)</td>
+  </tr>
+  <tr>
+    <td><code>/generate</code></td>
+    <td>Native Generation Endpoint</td>
+  </tr>
+  <tr>
+    <td><code>/pooling</code></td>
+    <td>Reward/Score Pooling (not <code>/v1/embeddings</code>)</td>
+  </tr>
+  <tr>
+    <td><code>/is_sleeping</code></td>
+    <td>Sleep State Probe</td>
+  </tr>
+</table>
+
+## Usage Examples
+
+### Chat Completion
+
+```bash
+curl -X POST http://localhost:40114/olla/lmdeploy/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "internlm/internlm2_5-7b-chat",
+    "messages": [
+      {"role": "system", "content": "You are a helpful assistant."},
+      {"role": "user", "content": "What is TurboMind?"}
+    ],
+    "temperature": 0.7,
+    "max_tokens": 300
+  }'
+```
+
+### Streaming
+
+```bash
+curl -X POST http://localhost:40114/olla/lmdeploy/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "internlm/internlm2_5-7b-chat",
+    "messages": [{"role": "user", "content": "Write a short story"}],
+    "stream": true,
+    "temperature": 0.8
+  }'
+```
+
+### Token Encoding
+
+```bash
+curl -X POST http://localhost:40114/olla/lmdeploy/v1/encode \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "internlm/internlm2_5-7b-chat",
+    "input": "Hello, world!"
+  }'
+```
+
+### Pooling (Reward/Score)
+
+```bash
+# Use /pooling — not /v1/embeddings (which returns HTTP 400)
+curl -X POST http://localhost:40114/olla/lmdeploy/pooling \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "internlm/internlm2_5-7b-chat",
+    "input": "The quick brown fox"
+  }'
+```
+
+## Starting LMDeploy
+
+### Basic Start
+
+```bash
+pip install lmdeploy
+
+lmdeploy serve api_server internlm/internlm2_5-7b-chat \
+  --server-port 23333
+```
+
+### TurboMind Backend (Default, GPU)
+
+```bash
+lmdeploy serve api_server internlm/internlm2_5-7b-chat \
+  --backend turbomind \
+  --server-port 23333 \
+  --tp 1
+```
+
+### PyTorch Backend
+
+Use `pytorch` when a model is not supported by TurboMind, or for CPU inference:
+
+```bash
+lmdeploy serve api_server internlm/internlm2_5-7b-chat \
+  --backend pytorch \
+  --server-port 23333
+```
+
+### With Authentication
+
+```bash
+lmdeploy serve api_server internlm/internlm2_5-7b-chat \
+  --server-port 23333 \
+  --api-keys my-secret-key
+```
+
+### VLM Inference
+
+Vision-language models use the same `api_server` entrypoint — no separate binary:
+
+```bash
+lmdeploy serve api_server InternLM/internlm-xcomposer2-7b \
+  --server-port 23333
+```
+
+### Docker
+
+```bash
+docker run --gpus all \
+  -p 23333:23333 \
+  openmmlab/lmdeploy:latest \
+  lmdeploy serve api_server internlm/internlm2_5-7b-chat \
+  --server-port 23333
+```
+
+## LMDeploy Specifics
+
+### Sleep/Wake
+
+LMDeploy supports a sleep mode to release GPU memory when idle:
+
+```bash
+# Suspend the engine (GPU memory freed)
+curl -X POST http://localhost:23333/sleep
+
+# Resume the engine
+curl -X POST http://localhost:23333/wakeup
+
+# Check state (proxied via Olla)
+curl http://localhost:40114/olla/lmdeploy/is_sleeping
+```
+
+Olla treats a sleeping engine as transiently unavailable and will route around it if other healthy instances exist. Once the engine wakes, health checks recover it automatically.
+
+### Embeddings vs Pooling
+
+LMDeploy does not implement `/v1/embeddings`. The correct path for reward-model scoring and embedding-style pooling is `/pooling`. This is a deliberate upstream design decision — using TurboMind's native pooling path rather than the OpenAI embeddings spec.
+
+### Model Naming
+
+LMDeploy serves models by their HuggingFace identifiers:
+
+- `internlm/internlm2_5-7b-chat`
+- `meta-llama/Meta-Llama-3.1-8B-Instruct`
+- `mistralai/Mistral-7B-Instruct-v0.2`
+- `Qwen/Qwen2.5-7B-Instruct`
+
+### Proxy Server vs API Server
+
+LMDeploy ships two server components:
+
+| Component | Port | Use with Olla? |
+|-----------|------|----------------|
+| `api_server` | 23333 | Yes — has `/health`, full route support |
+| `proxy_server` | 8000 | No — no `/health`, limited routes |
+
+Always register individual `api_server` instances. The `proxy_server` is LMDeploy's own load balancer and is redundant when Olla is in the stack.
+
+## Profile Customisation
+
+Create `config/profiles/lmdeploy-custom.yaml` to override defaults. See [Profile Configuration](../../concepts/profile-system.md) for the full schema.
+
+```yaml
+name: lmdeploy
+version: "1.0"
+
+# Add a shorter routing prefix
+routing:
+  prefixes:
+    - lmdeploy
+    - turbomind
+
+# Increase timeout for large 70B models
+characteristics:
+  timeout: 5m
+```
+
+## OpenAI SDK
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://localhost:40114/olla/lmdeploy/v1",
+    api_key="not-needed"  # omit if no --api-keys set on lmdeploy
+)
+
+response = client.chat.completions.create(
+    model="internlm/internlm2_5-7b-chat",
+    messages=[{"role": "user", "content": "Hello!"}]
+)
+```
+
+## Next Steps
+
+- [LMDeploy API Reference](../../api-reference/lmdeploy.md) - Endpoint details and response formats
+- [Profile Configuration](../../concepts/profile-system.md) - Customise LMDeploy behaviour
+- [Load Balancing](../../concepts/load-balancing.md) - Scale across multiple LMDeploy instances
+- [Health Checking](../../concepts/health-checking.md) - Circuit breakers and failover
diff --git a/docs/content/integrations/overview.md b/docs/content/integrations/overview.md
index 0b9d045..eb3d985 100644
--- a/docs/content/integrations/overview.md
+++ b/docs/content/integrations/overview.md
@@ -17,6 +17,7 @@ Olla natively supports the following backends:
 | [Ollama](./backend/ollama.md) | `ollama` | Native support for [Ollama](https://github.com/ollama/ollama), including model unification |
 | [LM Studio](./backend/lmstudio.md) | `lm-studio` | Native support for [LM Studio](https://lmstudio.ai/), including model unification |
 | [llama.cpp](./backend/llamacpp.md) | `llamacpp` | Native support for [llama.cpp](https://github.com/ggml-org/llama.cpp) lightweight C++ inference server with GGUF models, including slot management, code infill, and CPU-first design for edge deployment |
+| [LMDeploy](./backend/lmdeploy.md) | `lmdeploy` | Native support for [LMDeploy](https://github.com/InternLM/lmdeploy), InternLM's high-performance inference server with TurboMind C++/CUDA engine |
 | [vLLM](./backend/vllm.md) | `vllm` | Native support for [vLLM](https://github.com/vllm-project/vllm), including model unification |
 | [vLLM-MLX](./backend/vllm-mlx.md) | `vllm-mlx` | Native support for [vLLM-MLX](https://github.com/waybarrios/vllm-mlx), Apple Silicon inference using the MLX framework with native Anthropic Messages API |
 | [SGLang](./backend/sglang.md) | `sglang` | Native support for [SGLang](https://github.com/sgl-project/sglang) with RadixAttention and Frontend Language, including model unification and vision support |
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
index f8d23f6..306e59c 100644
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -157,6 +157,7 @@ nav:
         - Backends:
             - Ollama: integrations/backend/ollama.md
             - LM Studio: integrations/backend/lmstudio.md
+            - LMDeploy: integrations/backend/lmdeploy.md
             - vLLM: integrations/backend/vllm.md
             - vLLM-MLX: integrations/backend/vllm-mlx.md
             - SGLang: integrations/backend/sglang.md
@@ -190,6 +191,7 @@ nav:
       - Provider APIs:
           - Ollama: api-reference/ollama.md
           - LM Studio: api-reference/lmstudio.md
+          - LMDeploy: api-reference/lmdeploy.md
           - vLLM: api-reference/vllm.md
           - vLLM-MLX: api-reference/vllm-mlx.md
           - SGLang: api-reference/sglang.md
diff --git a/internal/adapter/converter/factory.go b/internal/adapter/converter/factory.go
index 8c53812..b14def3 100644
--- a/internal/adapter/converter/factory.go
+++ b/internal/adapter/converter/factory.go
@@ -24,6 +24,7 @@ func NewConverterFactory() *ConverterFactory {
 	factory.RegisterConverter(NewOllamaConverter())
 	factory.RegisterConverter(NewLemonadeConverter())
 	factory.RegisterConverter(NewLMStudioConverter())
+	factory.RegisterConverter(NewLMDeployConverter())
 	factory.RegisterConverter(NewSGLangConverter())
 	factory.RegisterConverter(NewVLLMConverter())
 	factory.RegisterConverter(NewLlamaCppConverter())
diff --git a/internal/adapter/converter/factory_test.go b/internal/adapter/converter/factory_test.go
index f976adb..7a3ccde 100644
--- a/internal/adapter/converter/factory_test.go
+++ b/internal/adapter/converter/factory_test.go
@@ -23,6 +23,7 @@ func TestConverterFactory(t *testing.T) {
 			{"ollama", "*converter.OllamaConverter"},
 			{"lemonade", "*converter.LemonadeConverter"},
 			{"llamacpp", "*converter.LlamaCppConverter"},
+			{"lmdeploy", "*converter.LMDeployConverter"},
 			{"lmstudio", "*converter.LMStudioConverter"},
 			{"sglang", "*converter.SGLangConverter"},
 			{"vllm", "*converter.VLLMConverter"},
@@ -56,6 +57,7 @@ func TestConverterFactory(t *testing.T) {
 		assert.Contains(t, qpErr.Reason, "ollama")
 		assert.Contains(t, qpErr.Reason, "lemonade")
 		assert.Contains(t, qpErr.Reason, "llamacpp")
+		assert.Contains(t, qpErr.Reason, "lmdeploy")
 		assert.Contains(t, qpErr.Reason, "lmstudio")
 		assert.Contains(t, qpErr.Reason, "sglang")
 		assert.Contains(t, qpErr.Reason, "vllm")
@@ -65,7 +67,7 @@ func TestConverterFactory(t *testing.T) {
 
 	t.Run("GetSupportedFormats returns all formats", func(t *testing.T) {
 		formats := factory.GetSupportedFormats()
-		assert.Len(t, formats, 10)
+		assert.Len(t, formats, 11)
 
 		// Check all expected formats are present
 		formatMap := make(map[string]bool)
@@ -78,6 +80,7 @@ func TestConverterFactory(t *testing.T) {
 		assert.True(t, formatMap["ollama"])
 		assert.True(t, formatMap["lemonade"])
 		assert.True(t, formatMap["llamacpp"])
+		assert.True(t, formatMap["lmdeploy"])
 		assert.True(t, formatMap["sglang"])
 		assert.True(t, formatMap["vllm"])
 		assert.True(t, formatMap["vllm-mlx"])
diff --git a/internal/adapter/converter/lmdeploy_converter.go b/internal/adapter/converter/lmdeploy_converter.go
new file mode 100644
index 0000000..475f6ac
--- /dev/null
+++ b/internal/adapter/converter/lmdeploy_converter.go
@@ -0,0 +1,108 @@
+package converter
+
+import (
+	"strings"
+	"time"
+
+	"github.com/thushan/olla/internal/adapter/registry/profile"
+	"github.com/thushan/olla/internal/core/constants"
+	"github.com/thushan/olla/internal/core/domain"
+	"github.com/thushan/olla/internal/core/ports"
+)
+
+// Type aliases for backward compatibility with tests
+type LMDeployModelResponse = profile.LMDeployResponse
+type LMDeployModelData = profile.LMDeployModel
+
+// LMDeployConverter converts models to LMDeploy-compatible format.
+// LMDeploy's /v1/models shape is OpenAI-compatible but without max_model_len
+// and with owned_by defaulting to "lmdeploy".
+type LMDeployConverter struct {
+	*BaseConverter
+}
+
+// NewLMDeployConverter creates a new LMDeploy format converter.
+func NewLMDeployConverter() ports.ModelResponseConverter {
+	return &LMDeployConverter{
+		BaseConverter: NewBaseConverter(constants.ProviderTypeLMDeploy),
+	}
+}
+
+func (c *LMDeployConverter) GetFormatName() string {
+	return constants.ProviderTypeLMDeploy
+}
+
+func (c *LMDeployConverter) ConvertToFormat(models []*domain.UnifiedModel, filters ports.ModelFilters) (interface{}, error) {
+	filtered := filterModels(models, filters)
+
+	data := make([]profile.LMDeployModel, 0, len(filtered))
+	for _, model := range filtered {
+		m := c.convertModel(model)
+		if m != nil {
+			data = append(data, *m)
+		}
+	}
+
+	return profile.LMDeployResponse{
+		Object: "list",
+		Data:   data,
+	}, nil
+}
+
+func (c *LMDeployConverter) convertModel(model *domain.UnifiedModel) *profile.LMDeployModel {
+	now := time.Now().Unix()
+
+	modelID := c.findLMDeployNativeName(model)
+	if modelID == "" {
+		if len(model.Aliases) > 0 {
+			modelID = model.Aliases[0].Name
+		} else {
+			modelID = model.ID
+		}
+	}
+
+	m := &profile.LMDeployModel{
+		ID:      modelID,
+		Object:  "model",
+		Created: now,
+		OwnedBy: c.determineOwner(modelID),
+	}
+
+	// LMDeploy does not expose max_model_len on the wire; omit it here too.
+
+	// Generate standard permissions mirroring the LMDeploy default.
+	m.Permission = []profile.LMDeployModelPermission{
+		{
+			ID:                 "modelperm-olla-" + strings.ReplaceAll(modelID, "/", "-"),
+			Object:             "model_permission",
+			Created:            now,
+			AllowCreateEngine:  false,
+			AllowSampling:      true,
+			AllowLogprobs:      true,
+			AllowSearchIndices: false,
+			AllowView:          true,
+			AllowFineTuning:    false,
+			Organization:       "*",
+			IsBlocking:         false,
+		},
+	}
+
+	return m
+}
+
+func (c *LMDeployConverter) findLMDeployNativeName(model *domain.UnifiedModel) string {
+	alias, found := c.BaseConverter.FindProviderAlias(model)
+	if found {
+		return alias
+	}
+	return ""
+}
+
+// determineOwner extracts the organisation from org/model-name style IDs,
+// defaulting to "lmdeploy" when there is no slash.
+func (c *LMDeployConverter) determineOwner(modelID string) string {
+	if parts := strings.SplitN(modelID, "/", 2); len(parts) == 2 {
+		return parts[0]
+	}
+	return constants.ProviderTypeLMDeploy
+}
diff --git a/internal/adapter/converter/lmdeploy_converter_test.go b/internal/adapter/converter/lmdeploy_converter_test.go
new file mode 100644
index 0000000..8ef5503
--- /dev/null
+++ b/internal/adapter/converter/lmdeploy_converter_test.go
@@ -0,0 +1,150 @@
+package converter
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"github.com/thushan/olla/internal/adapter/registry/profile"
+	"github.com/thushan/olla/internal/core/constants"
+	"github.com/thushan/olla/internal/core/domain"
+	"github.com/thushan/olla/internal/core/ports"
+)
+
+func TestNewLMDeployConverter(t *testing.T) {
+	t.Parallel()
+
+	c := NewLMDeployConverter()
+	assert.NotNil(t, c)
+	assert.Equal(t, constants.ProviderTypeLMDeploy, c.GetFormatName())
+}
+
+func TestLMDeployConverter_ConvertToFormat_Empty(t *testing.T) {
+	t.Parallel()
+
+	c := NewLMDeployConverter()
+	result, err := c.ConvertToFormat([]*domain.UnifiedModel{}, ports.ModelFilters{})
+
+	require.NoError(t, err)
+	resp, ok := result.(profile.LMDeployResponse)
+	require.True(t, ok)
+	assert.Equal(t, "list", resp.Object)
+	assert.Empty(t, resp.Data)
+}
+
+func TestLMDeployConverter_ConvertToFormat_SingleModel(t *testing.T) {
+	t.Parallel()
+
+	c := NewLMDeployConverter()
+
+	model := &domain.UnifiedModel{
+		ID: "internlm/internlm2_5-7b-chat",
+		Aliases: []domain.AliasEntry{
+			{
+				Name:   "internlm/internlm2_5-7b-chat",
+				Source: constants.ProviderTypeLMDeploy,
+			},
+		},
+	}
+
+	result, err := c.ConvertToFormat([]*domain.UnifiedModel{model}, ports.ModelFilters{})
+
+	require.NoError(t, err)
+	resp, ok := result.(profile.LMDeployResponse)
+	require.True(t, ok)
+	require.Len(t, resp.Data, 1)
+
+	m := resp.Data[0]
+	assert.Equal(t, "internlm/internlm2_5-7b-chat", m.ID)
+	assert.Equal(t, "model", m.Object)
+	assert.NotZero(t, m.Created)
+	// org extracted from ID
+	assert.Equal(t, "internlm", m.OwnedBy)
+	// LMDeploy does not expose max_model_len — the field should remain zero-value
+	// (permissions are always generated)
+	require.Len(t, m.Permission, 1)
+	assert.True(t, m.Permission[0].AllowSampling)
+}
+
+func TestLMDeployConverter_ConvertToFormat_NoOrgInID(t *testing.T) {
+	t.Parallel()
+
+	c := NewLMDeployConverter()
+
+	model := &domain.UnifiedModel{
+		ID: "simple-model",
+	}
+
+	result, err := c.ConvertToFormat([]*domain.UnifiedModel{model}, ports.ModelFilters{})
+
+	require.NoError(t, err)
+	resp, ok := result.(profile.LMDeployResponse)
+	require.True(t, ok)
+	require.Len(t, resp.Data, 1)
+	// Default owner when there is no org/model-name slash
+	assert.Equal(t, constants.ProviderTypeLMDeploy, resp.Data[0].OwnedBy)
+}
+
+func TestLMDeployConverter_ConvertToFormat_MultipleModels(t *testing.T) {
+	t.Parallel()
+
+	c := NewLMDeployConverter()
+
+	models := []*domain.UnifiedModel{
+		{
+			ID: "internlm/internlm2_5-7b-chat",
+			Aliases: []domain.AliasEntry{
+				{Name: "internlm/internlm2_5-7b-chat", Source: constants.ProviderTypeLMDeploy},
+			},
+		},
+		{
+			ID: "meta-llama/Meta-Llama-3.1-8B-Instruct",
+			Aliases: []domain.AliasEntry{
+				{Name: "meta-llama/Meta-Llama-3.1-8B-Instruct", Source: constants.ProviderTypeLMDeploy},
+			},
+		},
+	}
+
+	result, err := c.ConvertToFormat(models, ports.ModelFilters{})
+
+	require.NoError(t, err)
+	resp, ok := result.(profile.LMDeployResponse)
+	require.True(t, ok)
+	assert.Equal(t, "list", resp.Object)
+	require.Len(t, resp.Data, 2)
+	assert.Equal(t, "internlm/internlm2_5-7b-chat", resp.Data[0].ID)
+	assert.Equal(t, "meta-llama/Meta-Llama-3.1-8B-Instruct", resp.Data[1].ID)
+}
+
+func TestLMDeployConverter_FallbackToAliasOrID(t *testing.T) {
+	t.Parallel()
+
+	c := NewLMDeployConverter()
+
+	// No LMDeploy-sourced alias — should fall back to first alias
+	modelWithOtherAlias := &domain.UnifiedModel{
+		ID: "fallback-id",
+		Aliases: []domain.AliasEntry{
+			{Name: "alias-from-ollama", Source: constants.ProviderTypeOllama},
+		},
+	}
+
+	result, err := c.ConvertToFormat([]*domain.UnifiedModel{modelWithOtherAlias}, ports.ModelFilters{})
+	require.NoError(t, err)
+	resp, ok := result.(profile.LMDeployResponse)
+	require.True(t, ok)
+	require.Len(t, resp.Data, 1)
+	assert.Equal(t, "alias-from-ollama", resp.Data[0].ID)
+
+	// No aliases at all — should use unified ID
+	modelWithNoAlias := &domain.UnifiedModel{
+		ID: "bare-id",
+	}
+
+	result2, err2 := c.ConvertToFormat([]*domain.UnifiedModel{modelWithNoAlias}, ports.ModelFilters{})
+	require.NoError(t, err2)
+	resp2, ok2 := result2.(profile.LMDeployResponse)
+	require.True(t, ok2)
+	require.Len(t, resp2.Data, 1)
+	assert.Equal(t, "bare-id", resp2.Data[0].ID)
+}
diff --git a/internal/adapter/registry/profile/lmdeploy.go b/internal/adapter/registry/profile/lmdeploy.go
new file mode 100644
index 0000000..63ce734
--- /dev/null
+++ b/internal/adapter/registry/profile/lmdeploy.go
@@ -0,0 +1,38 @@
+package profile
+
+// LMDeployResponse is the response structure from LMDeploy /v1/models endpoint.
+// The shape follows the OpenAI ModelList format but with LMDeploy-specific field
+// values — notably owned_by defaults to "lmdeploy" and there is no max_model_len.
+type LMDeployResponse struct {
+	Object string          `json:"object"`
+	Data   []LMDeployModel `json:"data"`
+}
+
+// LMDeployModel represents a single model entry in the LMDeploy /v1/models response.
+// Key difference from vLLM: no max_model_len field. Key difference from vLLM/SGLang:
+// owned_by defaults to "lmdeploy" (not "vllm" or "sglang").
+type LMDeployModel struct {
+	Root       *string                   `json:"root,omitempty"`
+	Parent     *string                   `json:"parent,omitempty"`
+	ID         string                    `json:"id"`
+	Object     string                    `json:"object"`
+	OwnedBy    string                    `json:"owned_by"`
+	Permission []LMDeployModelPermission `json:"permission,omitempty"`
+	Created    int64                     `json:"created"`
+}
+
+// LMDeployModelPermission mirrors the OpenAI permission shape that LMDeploy exposes.
+type LMDeployModelPermission struct {
+	Group              *string `json:"group"`
+	ID                 string  `json:"id"`
+	Object             string  `json:"object"`
+	Organization       string  `json:"organization"`
+	Created            int64   `json:"created"`
+	AllowCreateEngine  bool    `json:"allow_create_engine"`
+	AllowSampling      bool    `json:"allow_sampling"`
+	AllowLogprobs      bool    `json:"allow_logprobs"`
+	AllowSearchIndices bool    `json:"allow_search_indices"`
+	AllowView          bool    `json:"allow_view"`
+	AllowFineTuning    bool    `json:"allow_fine_tuning"`
+	IsBlocking         bool    `json:"is_blocking"`
+}
diff --git a/internal/adapter/registry/profile/lmdeploy_parser.go b/internal/adapter/registry/profile/lmdeploy_parser.go
new file mode 100644
index 0000000..fce8757
--- /dev/null
+++ b/internal/adapter/registry/profile/lmdeploy_parser.go
@@ -0,0 +1,64 @@
+package profile
+
+import (
+	"fmt"
+	"time"
+
+	"github.com/thushan/olla/internal/core/domain"
+)
+
+type lmdeployParser struct{}
+
+func (p *lmdeployParser) Parse(data []byte) ([]*domain.ModelInfo, error) {
+	if len(data) == 0 {
+		return make([]*domain.ModelInfo, 0), nil
+	}
+
+	var response LMDeployResponse
+	if err := json.Unmarshal(data, &response); err != nil {
+		return nil, fmt.Errorf("failed to parse LMDeploy response: %w", err)
+	}
+
+	models := make([]*domain.ModelInfo, 0, len(response.Data))
+	now := time.Now()
+
+	for _, model := range response.Data {
+		if model.ID == "" {
+			continue
+		}
+
+		modelInfo := &domain.ModelInfo{
+			Name:     model.ID,
+			Type:     "lmdeploy",
+			LastSeen: now,
+		}
+
+		details := &domain.ModelDetails{}
+		hasDetails := false
+
+		if model.Created > 0 {
+			createdTime := time.Unix(model.Created, 0)
+			details.ModifiedAt = &createdTime
+			hasDetails = true
+		}
+
+		// Skip the default owned_by value to avoid storing noise.
+		if model.OwnedBy != "" && model.OwnedBy != "lmdeploy" {
+			details.Publisher = &model.OwnedBy
+			hasDetails = true
+		}
+
+		if model.Parent != nil {
+			details.ParentModel = model.Parent
+			hasDetails = true
+		}
+
+		if hasDetails {
+			modelInfo.Details = details
+		}
+
+		models = append(models, modelInfo)
+	}
+
+	return models, nil
+}
diff --git a/internal/adapter/registry/profile/lmdeploy_parser_test.go b/internal/adapter/registry/profile/lmdeploy_parser_test.go
new file mode 100644
index 0000000..3b7fbb3
--- /dev/null
+++ b/internal/adapter/registry/profile/lmdeploy_parser_test.go
@@ -0,0 +1,220 @@
+package profile
+
+import (
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestLMDeployParser_Parse(t *testing.T) {
+	t.Parallel()
+
+	parser := &lmdeployParser{}
+
+	t.Run("parses valid response with full metadata", func(t *testing.T) {
+		t.Parallel()
+
+		response := `{
+			"object": "list",
+			"data": [
+				{
+					"id": "internlm/internlm2_5-7b-chat",
+					"object": "model",
+					"created": 1754535984,
+					"owned_by": "lmdeploy",
+					"root": "internlm/internlm2_5-7b-chat",
+					"parent": null,
+					"permission": []
+				},
+				{
+					"id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+					"object": "model",
+					"created": 1754535985,
+					"owned_by": "meta-llama",
+					"root": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+					"parent": null,
+					"permission": []
+				}
+			]
+		}`
+
+		models, err := parser.Parse([]byte(response))
+		require.NoError(t, err)
+		require.Len(t, models, 2)
+
+		internlm := models[0]
+		assert.Equal(t, "internlm/internlm2_5-7b-chat", internlm.Name)
+		assert.Equal(t, "lmdeploy", internlm.Type)
+		// owned_by "lmdeploy" is the default — publisher should not be set
+		require.NotNil(t, internlm.Details)
+		assert.Nil(t, internlm.Details.Publisher)
+		require.NotNil(t, internlm.Details.ModifiedAt)
+		assert.Equal(t, time.Unix(1754535984, 0), *internlm.Details.ModifiedAt)
+
+		llama := models[1]
+		assert.Equal(t, "meta-llama/Meta-Llama-3.1-8B-Instruct", llama.Name)
+		assert.Equal(t, "lmdeploy", llama.Type)
+		require.NotNil(t, llama.Details)
+		require.NotNil(t, llama.Details.Publisher)
+		assert.Equal(t, "meta-llama", *llama.Details.Publisher)
+	})
+
+	t.Run("handles fine-tuned model with parent", func(t *testing.T) {
+		t.Parallel()
+
+		response := `{
+			"object": "list",
+			"data": [
+				{
+					"id": "custom/fine-tuned-internlm",
+					"object": "model",
+					"created": 1754535986,
+					"owned_by": "custom-org",
+					"parent": "internlm/internlm2_5-7b-chat"
+				}
+			]
+		}`
+
+		models, err := parser.Parse([]byte(response))
+		require.NoError(t, err)
+		require.Len(t, models, 1)
+
+		model := models[0]
+		require.NotNil(t, model.Details)
+		require.NotNil(t, model.Details.ParentModel)
+		assert.Equal(t, "internlm/internlm2_5-7b-chat", *model.Details.ParentModel)
+		require.NotNil(t, model.Details.Publisher)
+		assert.Equal(t, "custom-org", *model.Details.Publisher)
+	})
+
+	t.Run("skips models without ID", func(t *testing.T) {
+		t.Parallel()
+
+		response := `{
+			"object": "list",
+			"data": [
+				{
+					"object": "model",
+					"created": 1754535987,
+					"owned_by": "lmdeploy"
+				},
+				{
+					"id": "valid-model",
+					"object": "model",
+					"created": 1754535988,
+					"owned_by": "lmdeploy"
+				}
+			]
+		}`
+
+		models, err := parser.Parse([]byte(response))
+		require.NoError(t, err)
+		require.Len(t, models, 1)
+		assert.Equal(t, "valid-model", models[0].Name)
+	})
+
+	t.Run("handles empty response bytes", func(t *testing.T) {
+		t.Parallel()
+
+		models, err := parser.Parse([]byte{})
+		require.NoError(t, err)
+		assert.Empty(t, models)
+	})
+
+	t.Run("handles empty data array", func(t *testing.T) {
+		t.Parallel()
+
+		response := `{"object": "list", "data": []}`
+		models, err := parser.Parse([]byte(response))
+		require.NoError(t, err)
+		assert.Empty(t, models)
+	})
+
+	t.Run("returns error for malformed JSON", func(t *testing.T) {
+		t.Parallel()
+
+		invalidJSON := `{"object": "list", "data": [{"id": "m", invalid}]}`
+		models, err := parser.Parse([]byte(invalidJSON))
+		assert.Error(t, err)
+		assert.Contains(t, err.Error(), "failed to parse LMDeploy response")
+		assert.Nil(t, models)
+	})
+
+	t.Run("details nil when no metadata beyond default owned_by", func(t *testing.T) {
+		t.Parallel()
+
+		response := `{
+			"object": "list",
+			"data": [
+				{
+					"id": "simple-model",
+					"object": "model",
+					"created": 0,
+					"owned_by": "lmdeploy"
+				}
+			]
+		}`
+
+		models, err := parser.Parse([]byte(response))
+		require.NoError(t, err)
+		require.Len(t, models, 1)
+		assert.Equal(t, "simple-model", models[0].Name)
+		// No created timestamp and owned_by is the default — details should be nil
+		assert.Nil(t, models[0].Details)
+	})
+
+	t.Run("no max_model_len field — LMDeploy differs from vLLM here", func(t *testing.T) {
+		t.Parallel()
+
+		// LMDeploy /v1/models does not include max_model_len; verify we tolerate
+		// the field being absent (and don't panic or error if it somehow appears).
+		response := `{
+			"object": "list",
+			"data": [
+				{
+					"id": "qwen/Qwen2-7B-Instruct",
+					"object": "model",
+					"created": 1754535990,
+					"owned_by": "lmdeploy"
+				}
+			]
+		}`
+
+		models, err := parser.Parse([]byte(response))
+		require.NoError(t, err)
+		require.Len(t, models, 1)
+		model := models[0]
+		assert.Equal(t, "qwen/Qwen2-7B-Instruct", model.Name)
+		// MaxContextLength is never populated from the wire response for LMDeploy
+		if model.Details != nil {
+			assert.Nil(t, model.Details.MaxContextLength)
+		}
+	})
+
+	t.Run("preserves LastSeen timestamp", func(t *testing.T) {
+		t.Parallel()
+
+		response := `{
+			"object": "list",
+			"data": [
+				{
+					"id": "test-model",
+					"object": "model",
+					"created": 1754535991,
+					"owned_by": "lmdeploy"
+				}
+			]
+		}`
+
+		before := time.Now()
+		models, err := parser.Parse([]byte(response))
+		after := time.Now()
+
+		require.NoError(t, err)
+		require.Len(t, models, 1)
+		assert.True(t, !models[0].LastSeen.Before(before))
+		assert.True(t, !models[0].LastSeen.After(after))
+	})
+}
diff --git a/internal/adapter/registry/profile/parsers.go b/internal/adapter/registry/profile/parsers.go
index 501fc78..ea91e53 100644
--- a/internal/adapter/registry/profile/parsers.go
+++ b/internal/adapter/registry/profile/parsers.go
@@ -24,6 +24,8 @@ func getParserForFormat(format string) ModelResponseParser {
 		return &lemonadeParser{}
 	case constants.ProviderPrefixLMStudio1:
 		return &lmStudioParser{}
+	case constants.ProviderTypeLMDeploy:
+		return &lmdeployParser{}
 	case constants.ProviderTypeSGLang:
 		return &sglangParser{}
 	case constants.ProviderTypeVLLM:
diff --git a/internal/app/handlers/handler_common.go b/internal/app/handlers/handler_common.go
index ac6fb8d..1dccc62 100644
--- a/internal/app/handlers/handler_common.go
+++ b/internal/app/handlers/handler_common.go
@@ -80,6 +80,7 @@ func (a *Application) isProviderSupported(provider string) bool {
 	// this ensures consistency between validation and route registration
 	staticProviders := map[string]bool{
 		constants.ProviderTypeLemonade: true,
+		constants.ProviderTypeLMDeploy: true,
 		constants.ProviderTypeLMStudio: true,
 		constants.ProviderTypeOllama:   true,
 		constants.ProviderTypeOpenAI:   true,
diff --git a/internal/app/handlers/server_routes.go b/internal/app/handlers/server_routes.go
index cd8d8d4..8e5cd05 100644
--- a/internal/app/handlers/server_routes.go
+++ b/internal/app/handlers/server_routes.go
@@ -315,6 +315,13 @@ func getStaticProviders(a *Application) map[string]staticProvider {
 				{path: "", handler: a.providerProxyHandler, description: "OpenAI-compatible proxy", isProxy: true},
 			},
 		},
+		constants.ProviderTypeLMDeploy: {
+			prefixes: []string{constants.ProviderTypeLMDeploy},
+			routes: []staticRoute{
+				{path: "v1/models", handler: a.genericProviderModelsHandler(constants.ProviderTypeLMDeploy, constants.ProviderTypeOpenAI), description: "LMDeploy models (OpenAI format)", method: "GET"},
+				{path: "", handler: a.providerProxyHandler, description: "LMDeploy proxy", isProxy: true},
+			},
+		},
 		constants.ProviderTypeSGLang: {
 			prefixes: []string{constants.ProviderTypeSGLang},
 			routes: []staticRoute{
diff --git a/internal/core/constants/providers.go b/internal/core/constants/providers.go
index 1696ab1..32ad4b1 100644
--- a/internal/core/constants/providers.go
+++ b/internal/core/constants/providers.go
@@ -7,6 +7,7 @@ const (
 	ProviderTypeLMStudio     = "lm-studio"
 	ProviderTypeOpenAI       = "openai"
 	ProviderTypeOpenAICompat = "openai-compatible"
+	ProviderTypeLMDeploy     = "lmdeploy"
 	ProviderTypeSGLang       = "sglang"
 	ProviderTypeVLLM         = "vllm"
 	ProviderTypeVLLMMLX      = "vllm-mlx"
@@ -18,6 +19,7 @@ const (
 	ProviderDisplayLlamaCpp = "llama.cpp"
 	ProviderDisplayLMStudio = "LM Studio"
 	ProviderDisplayOpenAI   = "OpenAI"
+	ProviderDisplayLMDeploy = "LMDeploy"
 	ProviderDisplaySGLang   = "SGLang"
 	ProviderDisplayVLLM     = "vLLM"
 	ProviderDisplayVLLMMLX  = "vLLM-MLX"
diff --git a/internal/core/domain/profile.go b/internal/core/domain/profile.go
index 47e0f89..17420ad 100644
--- a/internal/core/domain/profile.go
+++ b/internal/core/domain/profile.go
@@ -5,6 +5,7 @@ const (
 	ProfileLemonade         = "lemonade"
 	ProfileLlamaCpp         = "llamacpp"
 	ProfileLmStudio         = "lm-studio"
+	ProfileLMDeploy         = "lmdeploy"
 	ProfileSGLang           = "sglang"
 	ProfileVLLM             = "vllm"
 	ProfileVLLMMLX          = "vllm-mlx"
diff --git a/internal/version/version.go b/internal/version/version.go
index 31e9240..041bec1 100644
--- a/internal/version/version.go
+++ b/internal/version/version.go
@@ -37,6 +37,7 @@ var (
 		"lemonade",
 		"litellm",
 		"llamacpp",
+		"lmdeploy",
 		"lm_studio",
 		"sglang",
 		"vllm",
diff --git a/readme.md b/readme.md
index f23e176..7d2497b 100644
--- a/readme.md
+++ b/readme.md
@@ -10,7 +10,7 @@
     <a href="https://github.com/vllm-project/vllm"><img src="https://img.shields.io/badge/vLLM-native-lightgreen.svg" alt="vLLM: Native Support"></a>
     <a href="https://github.com/sgl-project/sglang"><img src="https://img.shields.io/badge/SGLang-native-lightgreen.svg" alt="SGLang: Native Support"></a>
     <a href="https://github.com/BerriAI/litellm"><img src="https://img.shields.io/badge/LiteLLM-native-lightgreen.svg" alt="LiteLLM: Native Support"></a>
-    <a href="https://github.com/InternLM/lmdeploy"><img src="https://img.shields.io/badge/LM Deploy-openai-lightblue.svg" alt="LM Deploy: OpenAI Compatible"></a> <br/> 
+    <a href="https://github.com/InternLM/lmdeploy"><img src="https://img.shields.io/badge/LM Deploy-native-lightgreen.svg" alt="LM Deploy: Native Support"></a> <br/> 
     <a href="https://github.com/waybarrios/vllm-mlx/"><img src="https://img.shields.io/badge/vLLM--MLX-native-lightgreen.svg" alt="vLLM-MLX: Native Support"></a>
     <a href="https://docs.docker.com/ai/model-runner/"><img src="https://img.shields.io/badge/Docker Model Runner-native-lightgreen.svg" alt="Docker Model Runner: Native Support"></a><br/>
     <a href="https://ollama.com"><img src="https://img.shields.io/badge/Ollama-native-lightgreen.svg" alt="Ollama: Native Support"></a>