Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.gmicloud.ai/llms.txt

Use this file to discover all available pages before exploring further.

Model ID
XiaomiMiMo/MiMo-V2.5-Pro

API Usage

You can interact with MiMo-V2.5-Pro through standard OpenAI-compatible HTTP APIs. Below are examples using the chat completions endpoint. Message roles: Examples use OpenAI’s developer role where shown; if your vendor rejects it, use system instead.

API Examples

Generate a model response using the chat completions endpoint of MiMo-V2.5-Pro.

Create chat completion

The Chat Completions API generates a model reply from a list of conversation messages.

Default

curl https://api.gmi-serving.com/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $GMI_API_KEY" \
  -d '{
    "model": "XiaomiMiMo/MiMo-V2.5-Pro",
    "messages": [
      {
        "role": "developer",
        "content": "You are a helpful assistant."
      },
      {
        "role": "user",
        "content": "Hello!"
      }
    ]
  }'
from openai import OpenAI

endpoint = "https://api.gmi-serving.com/v1/"
model_name = "XiaomiMiMo/MiMo-V2.5-Pro"

api_key = "<gmi-api-key>"

client = OpenAI(
    base_url=f"{endpoint}",
    api_key=api_key
)

completion = client.chat.completions.create(
    model=model_name,
    messages=[
        {
            "role": "user",
            "content": "What is the capital of France?",
        }
    ],
)

print(completion.choices[0].message)

Streaming

curl https://api.gmi-serving.com/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $GMI_API_KEY" \
  -d '{
    "model": "XiaomiMiMo/MiMo-V2.5-Pro",
    "messages": [
      {
        "role": "developer",
        "content": "You are a helpful assistant."
      },
      {
        "role": "user",
        "content": "Hello!"
      }
    ],
    "stream": true
  }'
from openai import OpenAI

endpoint = "https://api.gmi-serving.com/v1/"
model_name = "XiaomiMiMo/MiMo-V2.5-Pro"

api_key = "<gmi-api-key>"

client = OpenAI(
    base_url=f"{endpoint}",
    api_key=api_key
)

completion = client.chat.completions.create(
  model=model_name,
  messages=[
    {"role": "developer", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Hello!"}
  ],
  stream=True
)

for chunk in completion:
  print(chunk.choices[0].delta)

Image Input

curl https://api.gmi-serving.com/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $GMI_API_KEY" \
  -d '{
    "model": "XiaomiMiMo/MiMo-V2.5-Pro",
    "messages": [
      {
        "role": "user",
        "content": [
          {
            "type": "text",
            "text": "What is in this image?"
          },
          {
            "type": "image_url",
            "image_url": {
              "url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
            }
          }
        ]
      }
    ],
    "max_completion_tokens": 300
  }'
from openai import OpenAI

endpoint = "https://api.gmi-serving.com/v1/"
model_name = "XiaomiMiMo/MiMo-V2.5-Pro"

api_key = "<gmi-api-key>"

client = OpenAI(
    base_url=f"{endpoint}",
    api_key=api_key
)

completion = client.chat.completions.create(
    model=model_name,
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "What's in this image?"},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
                    }
                },
            ],
        }
    ],
    max_completion_tokens=300,
)

print(completion.choices[0].message)

Functions

curl https://api.gmi-serving.com/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $GMI_API_KEY" \
  -d '{
    "model": "XiaomiMiMo/MiMo-V2.5-Pro",
    "messages": [
      {
        "role": "user",
        "content": "What is the weather like in Boston today?"
      }
    ],
    "tools": [
      {
        "type": "function",
        "function": {
          "name": "get_current_weather",
          "description": "Get the current weather in a given location",
          "parameters": {
            "type": "object",
            "properties": {
              "location": {
                "type": "string",
                "description": "The city and state, e.g. San Francisco, CA"
              },
              "unit": {
                "type": "string",
                "enum": ["celsius", "fahrenheit"]
              }
            },
            "required": ["location"]
          }
        }
      }
    ],
    "tool_choice": "auto"
  }'
from openai import OpenAI

endpoint = "https://api.gmi-serving.com/v1/"
model_name = "XiaomiMiMo/MiMo-V2.5-Pro"

api_key = "<gmi-api-key>"

client = OpenAI(
    base_url=f"{endpoint}",
    api_key=api_key
)

tools = [
  {
    "type": "function",
    "function": {
      "name": "get_current_weather",
      "description": "Get the current weather in a given location",
      "parameters": {
        "type": "object",
        "properties": {
          "location": {
            "type": "string",
            "description": "The city and state, e.g. San Francisco, CA",
          },
          "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
        },
        "required": ["location"],
      },
    }
  }
]
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
completion = client.chat.completions.create(
  model=model_name,
  messages=messages,
  tools=tools,
  tool_choice="auto"
)

print(completion)

Python

import requests
import json

url = "https://api.gmi-serving.com/v1/chat/completions"
headers = {
    "Content-Type": "application/json",
    "Authorization": "Bearer *************"
}

payload = {
    "model": "XiaomiMiMo/MiMo-V2.5-Pro",
    "messages": [
        {"role": "system", "content": "You are a helpful AI assistant"},
        {"role": "user", "content": "List 3 countries and their capitals."}
    ],
    "temperature": 0,
    "max_completion_tokens": 500
}

response = requests.post(url, headers=headers, json=payload)
print(json.dumps(response.json(), indent=2))