{
  "updated": "2026-06-27T21:32:35.274Z",
  "source": "https://api.cloudflare.com/client/v4/accounts/0460574641fdbb98159c98ebf593e2bd/ai/models/search",
  "total": 60,
  "total_upstream": 60,
  "deprecated_filtered": 0,
  "models": [
    {
      "id": "@cf/ai4bharat/indictrans2-en-indic-1B",
      "short_name": "Indictrans2 En Indic 1B",
      "provider": "ai4bharat",
      "task": "translation",
      "params": "1B",
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "fast",
      "description": "IndicTrans2 is the first open-source transformer-based multilingual NMT model that supports high-quality translations across all the 22 scheduled Indic languages",
      "docs_url": null,
      "licence_url": null,
      "flagship": false,
      "created_at": "2025-09-23 18:19:17.382"
    },
    {
      "id": "@cf/aisingapore/gemma-sea-lion-v4-27b-it",
      "short_name": "Gemma Sea Lion V4 27B IT",
      "provider": "aisingapore",
      "task": "text-generation",
      "params": "27B",
      "context_length": 128000,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": false
      },
      "tier": "flagship",
      "description": "SEA-LION stands for Southeast Asian Languages In One Network, which is a collection of Large Language Models (LLMs) which have been pretrained and instruct-tuned for the Southeast Asia (SEA) region.",
      "docs_url": null,
      "licence_url": null,
      "flagship": true,
      "created_at": "2025-09-23 19:27:30.468"
    },
    {
      "id": "@cf/baai/bge-reranker-base",
      "short_name": "BGE Reranker Base",
      "provider": "baai",
      "task": "text-classification",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "flagship",
      "description": "Different from embedding model, reranker uses question and document as input and directly output similarity instead of embedding. You can get a relevance score by inputting query and passage to the reranker. And the score can be mapped to a float value in [0,1] by sigmoid function.\n\n",
      "docs_url": null,
      "licence_url": null,
      "flagship": true,
      "created_at": "2025-02-14 12:28:19.009"
    },
    {
      "id": "@cf/baai/bge-large-en-v1.5",
      "short_name": "BGE Large En V1.5",
      "provider": "baai",
      "task": "text-embeddings",
      "params": null,
      "context_length": null,
      "max_input": 512,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "flagship",
      "description": "BAAI general embedding (Large) model that transforms any given text into a 1024-dimensional vector",
      "docs_url": "https://huggingface.co/BAAI/bge-large-en-v1.5",
      "licence_url": null,
      "flagship": true,
      "created_at": "2023-11-07 15:43:58.042"
    },
    {
      "id": "@cf/baai/bge-m3",
      "short_name": "BGE M3",
      "provider": "baai",
      "task": "text-embeddings",
      "params": null,
      "context_length": 60000,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "flagship",
      "description": "Multi-Functionality, Multi-Linguality, and Multi-Granularity embeddings model.",
      "docs_url": null,
      "licence_url": null,
      "flagship": true,
      "created_at": "2024-05-22 19:27:09.781"
    },
    {
      "id": "@cf/baai/bge-base-en-v1.5",
      "short_name": "BGE Base En V1.5",
      "provider": "baai",
      "task": "text-embeddings",
      "params": null,
      "context_length": 153600,
      "max_input": 512,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "balanced",
      "description": "BAAI general embedding (Base) model that transforms any given text into a 768-dimensional vector",
      "docs_url": "https://huggingface.co/BAAI/bge-base-en-v1.5",
      "licence_url": null,
      "flagship": false,
      "created_at": "2023-09-25 19:21:11.898"
    },
    {
      "id": "@cf/baai/bge-small-en-v1.5",
      "short_name": "BGE Small En V1.5",
      "provider": "baai",
      "task": "text-embeddings",
      "params": null,
      "context_length": null,
      "max_input": 512,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "fast",
      "description": "BAAI general embedding (Small) model that transforms any given text into a 384-dimensional vector",
      "docs_url": "https://huggingface.co/BAAI/bge-small-en-v1.5",
      "licence_url": null,
      "flagship": false,
      "created_at": "2023-11-07 15:43:58.042"
    },
    {
      "id": "@cf/black-forest-labs/flux-1-schnell",
      "short_name": "Flux 1 Schnell",
      "provider": "black-forest-labs",
      "task": "text-to-image",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "flagship",
      "description": "FLUX.1 [schnell] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. ",
      "docs_url": null,
      "licence_url": "https://bfl.ai/legal/terms-of-service",
      "flagship": true,
      "created_at": "2024-08-29 16:37:39.541"
    },
    {
      "id": "@cf/black-forest-labs/flux-2-dev",
      "short_name": "Flux 2 Dev",
      "provider": "black-forest-labs",
      "task": "text-to-image",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "flagship",
      "description": "FLUX.2 [dev] is an image model from Black Forest Labs where you can generate highly realistic and detailed images, with multi-reference support.",
      "docs_url": null,
      "licence_url": "https://bfl.ai/legal/terms-of-service",
      "flagship": true,
      "created_at": "2025-11-24 15:44:06.050"
    },
    {
      "id": "@cf/black-forest-labs/flux-2-klein-9b",
      "short_name": "Flux 2 Klein 9B",
      "provider": "black-forest-labs",
      "task": "text-to-image",
      "params": "9B",
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "balanced",
      "description": "FLUX.2 [klein] 9B is a 9 billion parameter model that can generate images from text descriptions and supports multi-reference editing capabilities.",
      "docs_url": null,
      "licence_url": "https://bfl.ai/legal/terms-of-service",
      "flagship": false,
      "created_at": "2026-01-14 12:55:54.294"
    },
    {
      "id": "@cf/black-forest-labs/flux-2-klein-4b",
      "short_name": "Flux 2 Klein 4B",
      "provider": "black-forest-labs",
      "task": "text-to-image",
      "params": "4B",
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "balanced",
      "description": "FLUX.2 [klein] is an ultra-fast, distilled image model. It unifies image generation and editing in a single model, delivering state-of-the-art quality enabling interactive workflows, real-time previews, and latency-critical applications.",
      "docs_url": null,
      "licence_url": "https://bfl.ai/legal/terms-of-service",
      "flagship": false,
      "created_at": "2026-01-14 12:54:55.024"
    },
    {
      "id": "@cf/bytedance/stable-diffusion-xl-lightning",
      "short_name": "Stable Diffusion Xl Lightning",
      "provider": "bytedance",
      "task": "text-to-image",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "balanced",
      "description": "SDXL-Lightning is a lightning-fast text-to-image generation model. It can generate high-quality 1024px images in a few steps.",
      "docs_url": "https://huggingface.co/ByteDance/SDXL-Lightning",
      "licence_url": null,
      "flagship": false,
      "created_at": "2024-02-27 17:41:29.578"
    },
    {
      "id": "@cf/deepgram/flux",
      "short_name": "Flux",
      "provider": "deepgram",
      "task": "automatic-speech-recognition",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "flagship",
      "description": "Flux is the first conversational speech recognition model built specifically for voice agents.",
      "docs_url": null,
      "licence_url": "https://deepgram.com/terms",
      "flagship": true,
      "created_at": "2025-09-29 21:07:55.114"
    },
    {
      "id": "@cf/deepgram/nova-3",
      "short_name": "Nova 3",
      "provider": "deepgram",
      "task": "automatic-speech-recognition",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "flagship",
      "description": "Transcribe audio using Deepgram’s speech-to-text model",
      "docs_url": null,
      "licence_url": "https://deepgram.com/terms",
      "flagship": true,
      "created_at": "2025-06-05 16:05:15.199"
    },
    {
      "id": "@cf/deepgram/aura-2-es",
      "short_name": "Aura 2 Es",
      "provider": "deepgram",
      "task": "text-to-speech",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "flagship",
      "description": "Aura-2 is a context-aware text-to-speech (TTS) model that applies natural pacing, expressiveness, and fillers based on the context of the provided text. The quality of your text input directly impacts the naturalness of the audio output.",
      "docs_url": null,
      "licence_url": "https://deepgram.com/terms",
      "flagship": true,
      "created_at": "2025-10-09 22:42:37.002"
    },
    {
      "id": "@cf/deepgram/aura-1",
      "short_name": "Aura 1",
      "provider": "deepgram",
      "task": "text-to-speech",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "balanced",
      "description": "Aura is a context-aware text-to-speech (TTS) model that applies natural pacing, expressiveness, and fillers based on the context of the provided text. The quality of your text input directly impacts the naturalness of the audio output.",
      "docs_url": null,
      "licence_url": "https://deepgram.com/terms",
      "flagship": false,
      "created_at": "2025-08-27 01:18:18.880"
    },
    {
      "id": "@cf/deepgram/aura-2-en",
      "short_name": "Aura 2 En",
      "provider": "deepgram",
      "task": "text-to-speech",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "balanced",
      "description": "Aura-2 is a context-aware text-to-speech (TTS) model that applies natural pacing, expressiveness, and fillers based on the context of the provided text. The quality of your text input directly impacts the naturalness of the audio output.",
      "docs_url": null,
      "licence_url": "https://deepgram.com/terms",
      "flagship": false,
      "created_at": "2025-10-09 22:19:34.483"
    },
    {
      "id": "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b",
      "short_name": "DeepSeek R1 Distill Qwen 32B",
      "provider": "deepseek-ai",
      "task": "text-generation",
      "params": "32B",
      "context_length": 80000,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": true,
        "streaming": true,
        "lora": false
      },
      "tier": "flagship",
      "description": "DeepSeek-R1-Distill-Qwen-32B is a model distilled from DeepSeek-R1 based on Qwen2.5. It outperforms OpenAI-o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.",
      "docs_url": null,
      "licence_url": "https://github.com/deepseek-ai/DeepSeek-R1/blob/main/LICENSE",
      "flagship": true,
      "created_at": "2025-01-22 19:48:55.776"
    },
    {
      "id": "@cf/google/embeddinggemma-300m",
      "short_name": "Embeddinggemma 300M",
      "provider": "google",
      "task": "text-embeddings",
      "params": "300M",
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "flagship",
      "description": "EmbeddingGemma is a 300M parameter, state-of-the-art for its size, open embedding model from Google, built from Gemma 3 (with T5Gemma initialization) and the same research and technology used to create Gemini models. EmbeddingGemma produces vector representations of text, making it well-suited for search and retrieval tasks, including classification, clustering, and semantic similarity search. This model was trained with data in 100+ spoken languages.",
      "docs_url": null,
      "licence_url": null,
      "flagship": true,
      "created_at": "2025-09-04 16:38:44.980"
    },
    {
      "id": "@cf/google/gemma-4-26b-a4b-it",
      "short_name": "Gemma 4 26B IT",
      "provider": "google",
      "task": "text-generation",
      "params": "26B",
      "context_length": 256000,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": true,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": false
      },
      "tier": "flagship",
      "description": "Gemma 4 is Google's most intelligent family of open models, built from Gemini 3 research to maximize intelligence-per-parameter.",
      "docs_url": null,
      "licence_url": "https://ai.google.dev/gemma/docs/gemma_4_license",
      "flagship": true,
      "created_at": "2026-04-02 15:05:22.642"
    },
    {
      "id": "@cf/google/gemma-7b-it-lora",
      "short_name": "Gemma 7B IT LORA",
      "provider": "google",
      "task": "text-generation",
      "params": "7B",
      "context_length": 3500,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": true
      },
      "tier": "fast",
      "description": "  This is a Gemma-7B base model that Cloudflare dedicates for inference with LoRA adapters. Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
      "docs_url": null,
      "licence_url": null,
      "flagship": false,
      "created_at": "2024-04-02 00:20:19.633"
    },
    {
      "id": "@cf/google/gemma-2b-it-lora",
      "short_name": "Gemma 2B IT LORA",
      "provider": "google",
      "task": "text-generation",
      "params": "2B",
      "context_length": 8192,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": true
      },
      "tier": "balanced",
      "description": "This is a Gemma-2B base model that Cloudflare dedicates for inference with LoRA adapters. Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
      "docs_url": null,
      "licence_url": null,
      "flagship": false,
      "created_at": "2024-04-02 00:19:34.669"
    },
    {
      "id": "@cf/huggingface/distilbert-sst-2-int8",
      "short_name": "Distilbert Sst 2 INT8",
      "provider": "huggingface",
      "task": "text-classification",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "balanced",
      "description": "Distilled BERT model that was finetuned on SST-2 for sentiment classification",
      "docs_url": "https://huggingface.co/Intel/distilbert-base-uncased-finetuned-sst-2-english-int8-static",
      "licence_url": null,
      "flagship": false,
      "created_at": "2023-09-25 19:21:11.898"
    },
    {
      "id": "@cf/ibm-granite/granite-4.0-h-micro",
      "short_name": "Granite 4.0 H Micro",
      "provider": "ibm-granite",
      "task": "text-generation",
      "params": null,
      "context_length": 131000,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": true,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": false
      },
      "tier": "flagship",
      "description": "Granite 4.0 instruct models deliver strong performance across benchmarks, achieving industry-leading results in key agentic tasks like instruction following and function calling. These efficiencies make the models well-suited for a wide range of use cases like retrieval-augmented generation (RAG), multi-agent workflows, and edge deployments.",
      "docs_url": null,
      "licence_url": null,
      "flagship": true,
      "created_at": "2025-10-07 18:46:29.436"
    },
    {
      "id": "@cf/leonardo/lucid-origin",
      "short_name": "Lucid Origin",
      "provider": "leonardo",
      "task": "text-to-image",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "balanced",
      "description": "Lucid Origin from Leonardo.AI is their most adaptable and prompt-responsive model to date. Whether you're generating images with sharp graphic design, stunning full-HD renders, or highly specific creative direction, it adheres closely to your prompts, renders text with accuracy, and supports a wide array of visual styles and aesthetics – from stylized concept art to crisp product mockups.\n",
      "docs_url": null,
      "licence_url": "https://leonardo.ai/terms-of-service/",
      "flagship": false,
      "created_at": "2025-08-25 19:21:28.770"
    },
    {
      "id": "@cf/leonardo/phoenix-1.0",
      "short_name": "Phoenix 1.0",
      "provider": "leonardo",
      "task": "text-to-image",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "balanced",
      "description": "Phoenix 1.0 is a model by Leonardo.Ai that generates images with exceptional prompt adherence and coherent text.",
      "docs_url": null,
      "licence_url": "https://leonardo.ai/terms-of-service/",
      "flagship": false,
      "created_at": "2025-08-25 18:12:18.073"
    },
    {
      "id": "@cf/llava-hf/llava-1.5-7b-hf",
      "short_name": "Llava 1.5 7B HF",
      "provider": "llava-hf",
      "task": "image-to-text",
      "params": "7B",
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": true,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "fast",
      "description": "LLaVA is an open-source chatbot trained by fine-tuning LLaMA/Vicuna on GPT-generated multimodal instruction-following data. It is an auto-regressive language model, based on the transformer architecture.",
      "docs_url": null,
      "licence_url": null,
      "flagship": false,
      "created_at": "2024-05-01 18:00:39.971"
    },
    {
      "id": "@cf/lykon/dreamshaper-8-lcm",
      "short_name": "Dreamshaper 8 Lcm",
      "provider": "lykon",
      "task": "text-to-image",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "balanced",
      "description": "Stable Diffusion model that has been fine-tuned to be better at photorealism without sacrificing range.",
      "docs_url": "https://huggingface.co/Lykon/DreamShaper",
      "licence_url": null,
      "flagship": false,
      "created_at": "2024-02-27 17:40:38.881"
    },
    {
      "id": "@cf/meta/llama-3.3-70b-instruct-fp8-fast",
      "short_name": "Llama 3.3 70B Instruct FP8 Fast",
      "provider": "meta",
      "task": "text-generation",
      "params": "70B",
      "context_length": 24000,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": true,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": false
      },
      "tier": "flagship",
      "description": "Llama 3.3 70B quantized to fp8 precision, optimized to be faster.",
      "docs_url": null,
      "licence_url": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/LICENSE",
      "flagship": true,
      "created_at": "2024-12-06 17:09:18.338"
    },
    {
      "id": "@cf/meta/llama-4-scout-17b-16e-instruct",
      "short_name": "Llama 4 Scout 17B 16E Instruct",
      "provider": "meta",
      "task": "text-generation",
      "params": "17B",
      "context_length": 131000,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": true,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": false
      },
      "tier": "flagship",
      "description": "Meta's Llama 4 Scout is a 17 billion parameter model with 16 experts that is natively multimodal. These models leverage a mixture-of-experts architecture to offer industry-leading performance in text and image understanding.",
      "docs_url": null,
      "licence_url": "https://github.com/meta-llama/llama-models/blob/main/models/llama4/LICENSE",
      "flagship": true,
      "created_at": "2025-04-05 20:25:56.137"
    },
    {
      "id": "@cf/meta/llama-3.2-11b-vision-instruct",
      "short_name": "Llama 3.2 11B Vision Instruct",
      "provider": "meta",
      "task": "text-generation",
      "params": "11B",
      "context_length": 128000,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": true,
        "reasoning": false,
        "streaming": true,
        "lora": true
      },
      "tier": "flagship",
      "description": " The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image.",
      "docs_url": null,
      "licence_url": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE",
      "flagship": true,
      "created_at": "2024-09-25 05:36:04.547"
    },
    {
      "id": "@cf/meta/llama-3.1-8b-instruct-fp8",
      "short_name": "Llama 3.1 8B Instruct FP8",
      "provider": "meta",
      "task": "text-generation",
      "params": "8B",
      "context_length": 32000,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": false
      },
      "tier": "flagship",
      "description": "Llama 3.1 8B quantized to FP8 precision",
      "docs_url": null,
      "licence_url": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE",
      "flagship": true,
      "created_at": "2024-07-25 17:28:43.328"
    },
    {
      "id": "@cf/meta/llama-guard-3-8b",
      "short_name": "Llama Guard 3 8B",
      "provider": "meta",
      "task": "text-generation",
      "params": "8B",
      "context_length": 131072,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": true
      },
      "tier": "flagship",
      "description": "Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.",
      "docs_url": null,
      "licence_url": null,
      "flagship": true,
      "created_at": "2025-01-22 23:26:23.495"
    },
    {
      "id": "@cf/meta/llama-3.2-3b-instruct",
      "short_name": "Llama 3.2 3B Instruct",
      "provider": "meta",
      "task": "text-generation",
      "params": "3B",
      "context_length": 80000,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": true
      },
      "tier": "fast",
      "description": "The Llama 3.2 instruction-tuned text only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks.",
      "docs_url": null,
      "licence_url": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE",
      "flagship": false,
      "created_at": "2024-09-25 20:05:43.986"
    },
    {
      "id": "@cf/meta/llama-3.2-1b-instruct",
      "short_name": "Llama 3.2 1B Instruct",
      "provider": "meta",
      "task": "text-generation",
      "params": "1B",
      "context_length": 60000,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": false
      },
      "tier": "fast",
      "description": "The Llama 3.2 instruction-tuned text only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks.",
      "docs_url": null,
      "licence_url": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE",
      "flagship": false,
      "created_at": "2024-09-25 21:36:32.050"
    },
    {
      "id": "@cf/meta/m2m100-1.2b",
      "short_name": "M2m100 1.2B",
      "provider": "meta",
      "task": "translation",
      "params": "1.2B",
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "balanced",
      "description": "Multilingual encoder-decoder (seq-to-seq) model trained for Many-to-Many multilingual translation",
      "docs_url": "https://github.com/facebookresearch/fairseq/tree/main/examples/m2m_100",
      "licence_url": "https://github.com/facebookresearch/fairseq/blob/main/LICENSE",
      "flagship": false,
      "created_at": "2023-09-25 19:21:11.898"
    },
    {
      "id": "@cf/meta-llama/llama-2-7b-chat-hf-lora",
      "short_name": "Llama 2 7B Chat HF LORA",
      "provider": "meta-llama",
      "task": "text-generation",
      "params": "7B",
      "context_length": 8192,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": true
      },
      "tier": "fast",
      "description": "This is a Llama2 base model that Cloudflare dedicated for inference with LoRA adapters. Llama 2 is a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 70 billion parameters. This is the repository for the 7B fine-tuned model, optimized for dialogue use cases and converted for the Hugging Face Transformers format. ",
      "docs_url": null,
      "licence_url": null,
      "flagship": false,
      "created_at": "2024-04-02 00:17:18.579"
    },
    {
      "id": "@cf/microsoft/resnet-50",
      "short_name": "Resnet 50",
      "provider": "microsoft",
      "task": "image-classification",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "flagship",
      "description": "50 layers deep image classification CNN trained on more than 1M images from ImageNet",
      "docs_url": "https://www.microsoft.com/en-us/research/blog/microsoft-vision-model-resnet-50-combines-web-scale-data-and-multi-task-learning-to-achieve-state-of-the-art/",
      "licence_url": null,
      "flagship": true,
      "created_at": "2023-09-25 19:21:11.898"
    },
    {
      "id": "@cf/mistral/mistral-7b-instruct-v0.2-lora",
      "short_name": "Mistral 7B Instruct V0.2 LORA",
      "provider": "mistral",
      "task": "text-generation",
      "params": "7B",
      "context_length": 15000,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": true
      },
      "tier": "fast",
      "description": "The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an instruct fine-tuned version of the Mistral-7B-v0.2.",
      "docs_url": null,
      "licence_url": null,
      "flagship": false,
      "created_at": "2024-04-01 22:14:40.529"
    },
    {
      "id": "@cf/mistralai/mistral-small-3.1-24b-instruct",
      "short_name": "Mistral Small 3.1 24B Instruct",
      "provider": "mistralai",
      "task": "text-generation",
      "params": "24B",
      "context_length": 128000,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": true,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": false
      },
      "tier": "flagship",
      "description": "Building upon Mistral Small 3 (2501), Mistral Small 3.1 (2503) adds state-of-the-art vision understanding and enhances long context capabilities up to 128k tokens without compromising text performance. With 24 billion parameters, this model achieves top-tier capabilities in both text and vision tasks.",
      "docs_url": null,
      "licence_url": null,
      "flagship": true,
      "created_at": "2025-03-18 03:28:37.890"
    },
    {
      "id": "@cf/moonshotai/kimi-k2.7-code",
      "short_name": "Kimi K2.7 Code",
      "provider": "moonshotai",
      "task": "text-generation",
      "params": null,
      "context_length": 262144,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": true,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": false
      },
      "tier": "flagship",
      "description": "Kimi K2.7 is a frontier-scale open-source 1T parameter model with a 262.1k context window, multi-turn tool calling, vision inputs, and structured outputs for agentic workloads.",
      "docs_url": null,
      "licence_url": "https://huggingface.co/moonshotai/Kimi-K2.7-Code/blob/main/LICENSE",
      "flagship": true,
      "created_at": "2026-06-12 11:45:20.582"
    },
    {
      "id": "@cf/moonshotai/kimi-k2.6",
      "short_name": "Kimi K2.6",
      "provider": "moonshotai",
      "task": "text-generation",
      "params": null,
      "context_length": 262144,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": true,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": false
      },
      "tier": "balanced",
      "description": "Kimi K2.6 is a frontier-scale open-source 1T parameter model with a 262.1k context window, multi-turn tool calling, vision inputs, and structured outputs for agentic workloads.",
      "docs_url": null,
      "licence_url": "https://huggingface.co/moonshotai/Kimi-K2.6/blob/main/LICENSE",
      "flagship": false,
      "created_at": "2026-04-20 01:40:35.001"
    },
    {
      "id": "@cf/myshell-ai/melotts",
      "short_name": "Melotts",
      "provider": "myshell-ai",
      "task": "text-to-speech",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "balanced",
      "description": "MeloTTS is a high-quality multi-lingual text-to-speech library by MyShell.ai.",
      "docs_url": null,
      "licence_url": null,
      "flagship": false,
      "created_at": "2024-07-19 15:51:04.819"
    },
    {
      "id": "@cf/nvidia/nemotron-3-120b-a12b",
      "short_name": "Nemotron 3 120B",
      "provider": "nvidia",
      "task": "text-generation",
      "params": "120B",
      "context_length": 256000,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": true,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": false
      },
      "tier": "flagship",
      "description": "NVIDIA Nemotron 3 Super is a hybrid MoE model with leading accuracy for multi-agent applications and specialized agentic AI systems.",
      "docs_url": null,
      "licence_url": "https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-nemotron-open-model-license/",
      "flagship": true,
      "created_at": "2026-02-24 23:22:47.215"
    },
    {
      "id": "@cf/openai/whisper-large-v3-turbo",
      "short_name": "Whisper Large V3 Turbo",
      "provider": "openai",
      "task": "automatic-speech-recognition",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "flagship",
      "description": "Whisper is a pre-trained model for automatic speech recognition (ASR) and speech translation. ",
      "docs_url": null,
      "licence_url": null,
      "flagship": true,
      "created_at": "2024-05-22 00:02:18.656"
    },
    {
      "id": "@cf/openai/whisper",
      "short_name": "Whisper",
      "provider": "openai",
      "task": "automatic-speech-recognition",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "balanced",
      "description": "Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse audio and is also a multitasking model that can perform multilingual speech recognition, speech translation, and language identification.",
      "docs_url": "https://openai.com/research/whisper",
      "licence_url": null,
      "flagship": false,
      "created_at": "2023-09-25 19:21:11.898"
    },
    {
      "id": "@cf/openai/whisper-tiny-en",
      "short_name": "Whisper Tiny En",
      "provider": "openai",
      "task": "automatic-speech-recognition",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "fast",
      "description": "Whisper is a pre-trained model for automatic speech recognition (ASR) and speech translation. Trained on 680k hours of labelled data, Whisper models demonstrate a strong ability to generalize to many datasets and domains without the need for fine-tuning. This is the English-only version of the Whisper Tiny model which was trained on the task of speech recognition.",
      "docs_url": null,
      "licence_url": null,
      "flagship": false,
      "created_at": "2024-04-22 20:59:02.731"
    },
    {
      "id": "@cf/openai/gpt-oss-120b",
      "short_name": "Gpt OSS 120B",
      "provider": "openai",
      "task": "text-generation",
      "params": "120B",
      "context_length": 128000,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": true,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": false
      },
      "tier": "flagship",
      "description": "OpenAI’s open-weight models designed for powerful reasoning, agentic tasks, and versatile developer use cases – gpt-oss-120b is for production, general purpose, high reasoning use-cases.",
      "docs_url": null,
      "licence_url": null,
      "flagship": true,
      "created_at": "2025-08-05 10:27:29.131"
    },
    {
      "id": "@cf/openai/gpt-oss-20b",
      "short_name": "Gpt OSS 20B",
      "provider": "openai",
      "task": "text-generation",
      "params": "20B",
      "context_length": 128000,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": true,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": false
      },
      "tier": "flagship",
      "description": "OpenAI’s open-weight models designed for powerful reasoning, agentic tasks, and versatile developer use cases – gpt-oss-20b is for lower latency, and local or specialized use-cases.",
      "docs_url": null,
      "licence_url": null,
      "flagship": true,
      "created_at": "2025-08-05 10:49:53.265"
    },
    {
      "id": "@cf/pfnet/plamo-embedding-1b",
      "short_name": "Plamo Embedding 1B",
      "provider": "pfnet",
      "task": "text-embeddings",
      "params": "1B",
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "fast",
      "description": "PLaMo-Embedding-1B is a Japanese text embedding model developed by Preferred Networks, Inc.\n\nIt can convert Japanese text input into numerical vectors and can be used for a wide range of applications, including information retrieval, text classification, and clustering.",
      "docs_url": null,
      "licence_url": null,
      "flagship": false,
      "created_at": "2025-09-24 18:42:05.576"
    },
    {
      "id": "@cf/pipecat-ai/smart-turn-v2",
      "short_name": "Smart Turn V2",
      "provider": "pipecat-ai",
      "task": "dumb-pipe",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "balanced",
      "description": "An open source, community-driven, native audio turn detection model in 2nd version",
      "docs_url": null,
      "licence_url": null,
      "flagship": false,
      "created_at": "2025-08-04 10:08:04.219"
    },
    {
      "id": "@cf/qwen/qwen3-embedding-0.6b",
      "short_name": "Qwen3 Embedding 0.6B",
      "provider": "qwen",
      "task": "text-embeddings",
      "params": "0.6B",
      "context_length": 8192,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "flagship",
      "description": "The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. ",
      "docs_url": null,
      "licence_url": null,
      "flagship": true,
      "created_at": "2025-06-18 20:23:22.086"
    },
    {
      "id": "@cf/qwen/qwen2.5-coder-32b-instruct",
      "short_name": "Qwen2.5 Coder 32B Instruct",
      "provider": "qwen",
      "task": "text-generation",
      "params": "32B",
      "context_length": 32768,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": true
      },
      "tier": "flagship",
      "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). As of now, Qwen2.5-Coder has covered six mainstream model sizes, 0.5, 1.5, 3, 7, 14, 32 billion parameters, to meet the needs of different developers. Qwen2.5-Coder brings the following improvements upon CodeQwen1.5:",
      "docs_url": null,
      "licence_url": null,
      "flagship": true,
      "created_at": "2025-02-27 00:31:43.829"
    },
    {
      "id": "@cf/qwen/qwq-32b",
      "short_name": "QwQ 32B",
      "provider": "qwen",
      "task": "text-generation",
      "params": "32B",
      "context_length": 24000,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": true,
        "streaming": true,
        "lora": true
      },
      "tier": "flagship",
      "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.",
      "docs_url": null,
      "licence_url": null,
      "flagship": true,
      "created_at": "2025-03-05 21:52:40.974"
    },
    {
      "id": "@cf/qwen/qwen3-30b-a3b-fp8",
      "short_name": "Qwen3 30B FP8",
      "provider": "qwen",
      "task": "text-generation",
      "params": "30B",
      "context_length": 32768,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": true,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": false
      },
      "tier": "flagship",
      "description": "Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support.",
      "docs_url": null,
      "licence_url": null,
      "flagship": true,
      "created_at": "2025-04-30 21:36:10.009"
    },
    {
      "id": "@cf/runwayml/stable-diffusion-v1-5-img2img",
      "short_name": "Stable Diffusion V1 5 Img2img",
      "provider": "runwayml",
      "task": "text-to-image",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "balanced",
      "description": "Stable Diffusion is a latent text-to-image diffusion model capable of generating photo-realistic images. Img2img generate a new image from an input image with Stable Diffusion. ",
      "docs_url": "https://huggingface.co/runwayml/stable-diffusion-v1-5",
      "licence_url": "https://github.com/runwayml/stable-diffusion/blob/main/LICENSE",
      "flagship": false,
      "created_at": "2024-02-27 17:32:28.581"
    },
    {
      "id": "@cf/runwayml/stable-diffusion-v1-5-inpainting",
      "short_name": "Stable Diffusion V1 5 Inpainting",
      "provider": "runwayml",
      "task": "text-to-image",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "balanced",
      "description": "Stable Diffusion Inpainting is a latent text-to-image diffusion model capable of generating photo-realistic images given any text input, with the extra capability of inpainting the pictures by using a mask.",
      "docs_url": "https://huggingface.co/runwayml/stable-diffusion-inpainting",
      "licence_url": "https://github.com/runwayml/stable-diffusion/blob/main/LICENSE",
      "flagship": false,
      "created_at": "2024-02-27 17:23:57.528"
    },
    {
      "id": "@cf/stabilityai/stable-diffusion-xl-base-1.0",
      "short_name": "Stable Diffusion Xl Base 1.0",
      "provider": "stabilityai",
      "task": "text-to-image",
      "params": null,
      "context_length": null,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": false,
        "vision": false,
        "reasoning": false,
        "streaming": false,
        "lora": false
      },
      "tier": "flagship",
      "description": "Diffusion-based text-to-image generative model by Stability AI. Generates and modify images based on text prompts.",
      "docs_url": "https://stability.ai/stable-diffusion",
      "licence_url": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/LICENSE.md",
      "flagship": true,
      "created_at": "2023-11-10 10:54:43.694"
    },
    {
      "id": "@cf/zai-org/glm-5.2",
      "short_name": "GLM 5.2",
      "provider": "zai-org",
      "task": "text-generation",
      "params": null,
      "context_length": 262144,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": true,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": false
      },
      "tier": "flagship",
      "description": "Z.ai's flagship agentic coding model",
      "docs_url": null,
      "licence_url": null,
      "flagship": true,
      "created_at": "2026-06-15 09:51:05.921"
    },
    {
      "id": "@cf/zai-org/glm-4.7-flash",
      "short_name": "GLM 4.7 Flash",
      "provider": "zai-org",
      "task": "text-generation",
      "params": null,
      "context_length": 131072,
      "max_input": null,
      "max_output": null,
      "pricing": null,
      "capabilities": {
        "tools": true,
        "vision": false,
        "reasoning": false,
        "streaming": true,
        "lora": false
      },
      "tier": "balanced",
      "description": "GLM-4.7-Flash is a fast and efficient multilingual text generation model with a 131,072 token context window. Optimized for dialogue, instruction-following, and multi-turn tool calling across 100+ languages.",
      "docs_url": null,
      "licence_url": null,
      "flagship": false,
      "created_at": "2026-01-28 16:04:39.346"
    }
  ]
}