Refine your search
4 vulnerabilities found for by vllm-project
CVE-2025-66448 (GCVE-0-2025-66448)
Vulnerability from cvelistv5
Published
2025-12-01 22:45
Modified
2025-12-02 14:14
Severity ?
VLAI Severity ?
EPSS score ?
CWE
- CWE-94 - Improper Control of Generation of Code ('Code Injection')
Summary
vLLM is an inference and serving engine for large language models (LLMs). Prior to 0.11.1, vllm has a critical remote code execution vector in a config class named Nemotron_Nano_VL_Config. When vllm loads a model config that contains an auto_map entry, the config class resolves that mapping with get_class_from_dynamic_module(...) and immediately instantiates the returned class. This fetches and executes Python from the remote repository referenced in the auto_map string. Crucially, this happens even when the caller explicitly sets trust_remote_code=False in vllm.transformers_utils.config.get_config. In practice, an attacker can publish a benign-looking frontend repo whose config.json points via auto_map to a separate malicious backend repo; loading the frontend will silently run the backend’s code on the victim host. This vulnerability is fixed in 0.11.1.
References
| URL | Tags | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|
|
|||||||||||
Impacted products
| Vendor | Product | Version | ||
|---|---|---|---|---|
| vllm-project | vllm |
Version: < 0.11.1 |
{
"containers": {
"adp": [
{
"metrics": [
{
"other": {
"content": {
"id": "CVE-2025-66448",
"options": [
{
"Exploitation": "none"
},
{
"Automatable": "yes"
},
{
"Technical Impact": "total"
}
],
"role": "CISA Coordinator",
"timestamp": "2025-12-02T14:14:49.921511Z",
"version": "2.0.3"
},
"type": "ssvc"
}
}
],
"providerMetadata": {
"dateUpdated": "2025-12-02T14:14:58.324Z",
"orgId": "134c704f-9b21-4f2e-91b3-4a467353bcc0",
"shortName": "CISA-ADP"
},
"title": "CISA ADP Vulnrichment"
}
],
"cna": {
"affected": [
{
"product": "vllm",
"vendor": "vllm-project",
"versions": [
{
"status": "affected",
"version": "\u003c 0.11.1"
}
]
}
],
"descriptions": [
{
"lang": "en",
"value": "vLLM is an inference and serving engine for large language models (LLMs). Prior to 0.11.1, vllm has a critical remote code execution vector in a config class named Nemotron_Nano_VL_Config. When vllm loads a model config that contains an auto_map entry, the config class resolves that mapping with get_class_from_dynamic_module(...) and immediately instantiates the returned class. This fetches and executes Python from the remote repository referenced in the auto_map string. Crucially, this happens even when the caller explicitly sets trust_remote_code=False in vllm.transformers_utils.config.get_config. In practice, an attacker can publish a benign-looking frontend repo whose config.json points via auto_map to a separate malicious backend repo; loading the frontend will silently run the backend\u2019s code on the victim host. This vulnerability is fixed in 0.11.1."
}
],
"metrics": [
{
"cvssV3_1": {
"attackComplexity": "HIGH",
"attackVector": "NETWORK",
"availabilityImpact": "HIGH",
"baseScore": 7.1,
"baseSeverity": "HIGH",
"confidentialityImpact": "HIGH",
"integrityImpact": "HIGH",
"privilegesRequired": "LOW",
"scope": "UNCHANGED",
"userInteraction": "REQUIRED",
"vectorString": "CVSS:3.1/AV:N/AC:H/PR:L/UI:R/S:U/C:H/I:H/A:H",
"version": "3.1"
}
}
],
"problemTypes": [
{
"descriptions": [
{
"cweId": "CWE-94",
"description": "CWE-94: Improper Control of Generation of Code (\u0027Code Injection\u0027)",
"lang": "en",
"type": "CWE"
}
]
}
],
"providerMetadata": {
"dateUpdated": "2025-12-01T22:45:42.566Z",
"orgId": "a0819718-46f1-4df5-94e2-005712e83aaa",
"shortName": "GitHub_M"
},
"references": [
{
"name": "https://github.com/vllm-project/vllm/security/advisories/GHSA-8fr4-5q9j-m8gm",
"tags": [
"x_refsource_CONFIRM"
],
"url": "https://github.com/vllm-project/vllm/security/advisories/GHSA-8fr4-5q9j-m8gm"
},
{
"name": "https://github.com/vllm-project/vllm/pull/28126",
"tags": [
"x_refsource_MISC"
],
"url": "https://github.com/vllm-project/vllm/pull/28126"
},
{
"name": "https://github.com/vllm-project/vllm/commit/ffb08379d8870a1a81ba82b72797f196838d0c86",
"tags": [
"x_refsource_MISC"
],
"url": "https://github.com/vllm-project/vllm/commit/ffb08379d8870a1a81ba82b72797f196838d0c86"
}
],
"source": {
"advisory": "GHSA-8fr4-5q9j-m8gm",
"discovery": "UNKNOWN"
},
"title": "vLLM vulnerable to remote code execution via transformers_utils/get_config"
}
},
"cveMetadata": {
"assignerOrgId": "a0819718-46f1-4df5-94e2-005712e83aaa",
"assignerShortName": "GitHub_M",
"cveId": "CVE-2025-66448",
"datePublished": "2025-12-01T22:45:42.566Z",
"dateReserved": "2025-12-01T18:22:06.865Z",
"dateUpdated": "2025-12-02T14:14:58.324Z",
"state": "PUBLISHED"
},
"dataType": "CVE_RECORD",
"dataVersion": "5.2"
}
CVE-2025-62372 (GCVE-0-2025-62372)
Vulnerability from cvelistv5
Published
2025-11-21 01:22
Modified
2025-11-24 18:11
Severity ?
VLAI Severity ?
EPSS score ?
CWE
- CWE-129 - Improper Validation of Array Index
Summary
vLLM is an inference and serving engine for large language models (LLMs). From version 0.5.5 to before 0.11.1, users can crash the vLLM engine serving multimodal models by passing multimodal embedding inputs with correct ndim but incorrect shape (e.g. hidden dimension is wrong), regardless of whether the model is intended to support such inputs (as defined in the Supported Models page). This issue has been patched in version 0.11.1.
References
| URL | Tags | |||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
||||||||||||||
Impacted products
| Vendor | Product | Version | ||
|---|---|---|---|---|
| vllm-project | vllm |
Version: >= 0.5.5, < 0.11.1 |
{
"containers": {
"adp": [
{
"metrics": [
{
"other": {
"content": {
"id": "CVE-2025-62372",
"options": [
{
"Exploitation": "none"
},
{
"Automatable": "no"
},
{
"Technical Impact": "partial"
}
],
"role": "CISA Coordinator",
"timestamp": "2025-11-24T17:07:55.989854Z",
"version": "2.0.3"
},
"type": "ssvc"
}
}
],
"providerMetadata": {
"dateUpdated": "2025-11-24T18:11:59.207Z",
"orgId": "134c704f-9b21-4f2e-91b3-4a467353bcc0",
"shortName": "CISA-ADP"
},
"title": "CISA ADP Vulnrichment"
}
],
"cna": {
"affected": [
{
"product": "vllm",
"vendor": "vllm-project",
"versions": [
{
"status": "affected",
"version": "\u003e= 0.5.5, \u003c 0.11.1"
}
]
}
],
"descriptions": [
{
"lang": "en",
"value": "vLLM is an inference and serving engine for large language models (LLMs). From version 0.5.5 to before 0.11.1, users can crash the vLLM engine serving multimodal models by passing multimodal embedding inputs with correct ndim but incorrect shape (e.g. hidden dimension is wrong), regardless of whether the model is intended to support such inputs (as defined in the Supported Models page). This issue has been patched in version 0.11.1."
}
],
"metrics": [
{
"cvssV4_0": {
"attackComplexity": "LOW",
"attackRequirements": "NONE",
"attackVector": "NETWORK",
"baseScore": 8.3,
"baseSeverity": "HIGH",
"privilegesRequired": "LOW",
"subAvailabilityImpact": "HIGH",
"subConfidentialityImpact": "NONE",
"subIntegrityImpact": "NONE",
"userInteraction": "NONE",
"vectorString": "CVSS:4.0/AV:N/AC:L/AT:N/PR:L/UI:N/VC:N/VI:N/VA:H/SC:N/SI:N/SA:H",
"version": "4.0",
"vulnAvailabilityImpact": "HIGH",
"vulnConfidentialityImpact": "NONE",
"vulnIntegrityImpact": "NONE"
}
}
],
"problemTypes": [
{
"descriptions": [
{
"cweId": "CWE-129",
"description": "CWE-129: Improper Validation of Array Index",
"lang": "en",
"type": "CWE"
}
]
}
],
"providerMetadata": {
"dateUpdated": "2025-11-21T01:22:37.121Z",
"orgId": "a0819718-46f1-4df5-94e2-005712e83aaa",
"shortName": "GitHub_M"
},
"references": [
{
"name": "https://github.com/vllm-project/vllm/security/advisories/GHSA-pmqf-x6x8-p7qw",
"tags": [
"x_refsource_CONFIRM"
],
"url": "https://github.com/vllm-project/vllm/security/advisories/GHSA-pmqf-x6x8-p7qw"
},
{
"name": "https://github.com/vllm-project/vllm/pull/27204",
"tags": [
"x_refsource_MISC"
],
"url": "https://github.com/vllm-project/vllm/pull/27204"
},
{
"name": "https://github.com/vllm-project/vllm/pull/6613",
"tags": [
"x_refsource_MISC"
],
"url": "https://github.com/vllm-project/vllm/pull/6613"
},
{
"name": "https://github.com/vllm-project/vllm/commit/58fab50d82838d5014f4a14d991fdb9352c9c84b",
"tags": [
"x_refsource_MISC"
],
"url": "https://github.com/vllm-project/vllm/commit/58fab50d82838d5014f4a14d991fdb9352c9c84b"
}
],
"source": {
"advisory": "GHSA-pmqf-x6x8-p7qw",
"discovery": "UNKNOWN"
},
"title": "vLLM vulnerable to DoS with incorrect shape of multimodal embedding inputs"
}
},
"cveMetadata": {
"assignerOrgId": "a0819718-46f1-4df5-94e2-005712e83aaa",
"assignerShortName": "GitHub_M",
"cveId": "CVE-2025-62372",
"datePublished": "2025-11-21T01:22:37.121Z",
"dateReserved": "2025-10-10T14:22:48.204Z",
"dateUpdated": "2025-11-24T18:11:59.207Z",
"state": "PUBLISHED"
},
"dataType": "CVE_RECORD",
"dataVersion": "5.2"
}
CVE-2025-62426 (GCVE-0-2025-62426)
Vulnerability from cvelistv5
Published
2025-11-21 01:21
Modified
2025-11-24 18:12
Severity ?
VLAI Severity ?
EPSS score ?
CWE
- CWE-770 - Allocation of Resources Without Limits or Throttling
Summary
vLLM is an inference and serving engine for large language models (LLMs). From version 0.5.5 to before 0.11.1, the /v1/chat/completions and /tokenize endpoints allow a chat_template_kwargs request parameter that is used in the code before it is properly validated against the chat template. With the right chat_template_kwargs parameters, it is possible to block processing of the API server for long periods of time, delaying all other requests. This issue has been patched in version 0.11.1.
References
Impacted products
| Vendor | Product | Version | ||
|---|---|---|---|---|
| vllm-project | vllm |
Version: >= 0.5.5, < 0.11.1 |
{
"containers": {
"adp": [
{
"metrics": [
{
"other": {
"content": {
"id": "CVE-2025-62426",
"options": [
{
"Exploitation": "none"
},
{
"Automatable": "no"
},
{
"Technical Impact": "partial"
}
],
"role": "CISA Coordinator",
"timestamp": "2025-11-24T17:12:00.809982Z",
"version": "2.0.3"
},
"type": "ssvc"
}
}
],
"providerMetadata": {
"dateUpdated": "2025-11-24T18:12:23.183Z",
"orgId": "134c704f-9b21-4f2e-91b3-4a467353bcc0",
"shortName": "CISA-ADP"
},
"title": "CISA ADP Vulnrichment"
}
],
"cna": {
"affected": [
{
"product": "vllm",
"vendor": "vllm-project",
"versions": [
{
"status": "affected",
"version": "\u003e= 0.5.5, \u003c 0.11.1"
}
]
}
],
"descriptions": [
{
"lang": "en",
"value": "vLLM is an inference and serving engine for large language models (LLMs). From version 0.5.5 to before 0.11.1, the /v1/chat/completions and /tokenize endpoints allow a chat_template_kwargs request parameter that is used in the code before it is properly validated against the chat template. With the right chat_template_kwargs parameters, it is possible to block processing of the API server for long periods of time, delaying all other requests. This issue has been patched in version 0.11.1."
}
],
"metrics": [
{
"cvssV3_1": {
"attackComplexity": "LOW",
"attackVector": "NETWORK",
"availabilityImpact": "HIGH",
"baseScore": 6.5,
"baseSeverity": "MEDIUM",
"confidentialityImpact": "NONE",
"integrityImpact": "NONE",
"privilegesRequired": "LOW",
"scope": "UNCHANGED",
"userInteraction": "NONE",
"vectorString": "CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:U/C:N/I:N/A:H",
"version": "3.1"
}
}
],
"problemTypes": [
{
"descriptions": [
{
"cweId": "CWE-770",
"description": "CWE-770: Allocation of Resources Without Limits or Throttling",
"lang": "en",
"type": "CWE"
}
]
}
],
"providerMetadata": {
"dateUpdated": "2025-11-21T01:21:29.546Z",
"orgId": "a0819718-46f1-4df5-94e2-005712e83aaa",
"shortName": "GitHub_M"
},
"references": [
{
"name": "https://github.com/vllm-project/vllm/security/advisories/GHSA-69j4-grxj-j64p",
"tags": [
"x_refsource_CONFIRM"
],
"url": "https://github.com/vllm-project/vllm/security/advisories/GHSA-69j4-grxj-j64p"
},
{
"name": "https://github.com/vllm-project/vllm/pull/27205",
"tags": [
"x_refsource_MISC"
],
"url": "https://github.com/vllm-project/vllm/pull/27205"
},
{
"name": "https://github.com/vllm-project/vllm/commit/3ada34f9cb4d1af763fdfa3b481862a93eb6bd2b",
"tags": [
"x_refsource_MISC"
],
"url": "https://github.com/vllm-project/vllm/commit/3ada34f9cb4d1af763fdfa3b481862a93eb6bd2b"
},
{
"name": "https://github.com/vllm-project/vllm/blob/2a6dc67eb520ddb9c4138d8b35ed6fe6226997fb/vllm/entrypoints/chat_utils.py#L1602-L1610",
"tags": [
"x_refsource_MISC"
],
"url": "https://github.com/vllm-project/vllm/blob/2a6dc67eb520ddb9c4138d8b35ed6fe6226997fb/vllm/entrypoints/chat_utils.py#L1602-L1610"
},
{
"name": "https://github.com/vllm-project/vllm/blob/2a6dc67eb520ddb9c4138d8b35ed6fe6226997fb/vllm/entrypoints/openai/serving_engine.py#L809-L814",
"tags": [
"x_refsource_MISC"
],
"url": "https://github.com/vllm-project/vllm/blob/2a6dc67eb520ddb9c4138d8b35ed6fe6226997fb/vllm/entrypoints/openai/serving_engine.py#L809-L814"
}
],
"source": {
"advisory": "GHSA-69j4-grxj-j64p",
"discovery": "UNKNOWN"
},
"title": "vLLM vulnerable to DoS via large Chat Completion or Tokenization requests with specially crafted `chat_template_kwargs`"
}
},
"cveMetadata": {
"assignerOrgId": "a0819718-46f1-4df5-94e2-005712e83aaa",
"assignerShortName": "GitHub_M",
"cveId": "CVE-2025-62426",
"datePublished": "2025-11-21T01:21:29.546Z",
"dateReserved": "2025-10-13T16:26:12.180Z",
"dateUpdated": "2025-11-24T18:12:23.183Z",
"state": "PUBLISHED"
},
"dataType": "CVE_RECORD",
"dataVersion": "5.2"
}
CVE-2025-62164 (GCVE-0-2025-62164)
Vulnerability from cvelistv5
Published
2025-11-21 01:18
Modified
2025-11-24 18:12
Severity ?
VLAI Severity ?
EPSS score ?
CWE
Summary
vLLM is an inference and serving engine for large language models (LLMs). From versions 0.10.2 to before 0.11.1, a memory corruption vulnerability could lead to a crash (denial-of-service) and potentially remote code execution (RCE), exists in the Completions API endpoint. When processing user-supplied prompt embeddings, the endpoint loads serialized tensors using torch.load() without sufficient validation. Due to a change introduced in PyTorch 2.8.0, sparse tensor integrity checks are disabled by default. As a result, maliciously crafted tensors can bypass internal bounds checks and trigger an out-of-bounds memory write during the call to to_dense(). This memory corruption can crash vLLM and potentially lead to code execution on the server hosting vLLM. This issue has been patched in version 0.11.1.
References
| URL | Tags | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|
|
|||||||||||
Impacted products
| Vendor | Product | Version | ||
|---|---|---|---|---|
| vllm-project | vllm |
Version: >= 0.10.2, < 0.11.1 |
{
"containers": {
"adp": [
{
"metrics": [
{
"other": {
"content": {
"id": "CVE-2025-62164",
"options": [
{
"Exploitation": "none"
},
{
"Automatable": "no"
},
{
"Technical Impact": "total"
}
],
"role": "CISA Coordinator",
"timestamp": "2025-11-24T17:15:13.097938Z",
"version": "2.0.3"
},
"type": "ssvc"
}
}
],
"providerMetadata": {
"dateUpdated": "2025-11-24T18:12:44.195Z",
"orgId": "134c704f-9b21-4f2e-91b3-4a467353bcc0",
"shortName": "CISA-ADP"
},
"title": "CISA ADP Vulnrichment"
}
],
"cna": {
"affected": [
{
"product": "vllm",
"vendor": "vllm-project",
"versions": [
{
"status": "affected",
"version": "\u003e= 0.10.2, \u003c 0.11.1"
}
]
}
],
"descriptions": [
{
"lang": "en",
"value": "vLLM is an inference and serving engine for large language models (LLMs). From versions 0.10.2 to before 0.11.1, a memory corruption vulnerability could lead to a crash (denial-of-service) and potentially remote code execution (RCE), exists in the Completions API endpoint. When processing user-supplied prompt embeddings, the endpoint loads serialized tensors using torch.load() without sufficient validation. Due to a change introduced in PyTorch 2.8.0, sparse tensor integrity checks are disabled by default. As a result, maliciously crafted tensors can bypass internal bounds checks and trigger an out-of-bounds memory write during the call to to_dense(). This memory corruption can crash vLLM and potentially lead to code execution on the server hosting vLLM. This issue has been patched in version 0.11.1."
}
],
"metrics": [
{
"cvssV3_1": {
"attackComplexity": "LOW",
"attackVector": "NETWORK",
"availabilityImpact": "HIGH",
"baseScore": 8.8,
"baseSeverity": "HIGH",
"confidentialityImpact": "HIGH",
"integrityImpact": "HIGH",
"privilegesRequired": "LOW",
"scope": "UNCHANGED",
"userInteraction": "NONE",
"vectorString": "CVSS:3.1/AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H",
"version": "3.1"
}
}
],
"problemTypes": [
{
"descriptions": [
{
"cweId": "CWE-20",
"description": "CWE-20: Improper Input Validation",
"lang": "en",
"type": "CWE"
}
]
},
{
"descriptions": [
{
"cweId": "CWE-123",
"description": "CWE-123: Write-what-where Condition",
"lang": "en",
"type": "CWE"
}
]
},
{
"descriptions": [
{
"cweId": "CWE-502",
"description": "CWE-502: Deserialization of Untrusted Data",
"lang": "en",
"type": "CWE"
}
]
},
{
"descriptions": [
{
"cweId": "CWE-787",
"description": "CWE-787: Out-of-bounds Write",
"lang": "en",
"type": "CWE"
}
]
}
],
"providerMetadata": {
"dateUpdated": "2025-11-21T01:18:38.803Z",
"orgId": "a0819718-46f1-4df5-94e2-005712e83aaa",
"shortName": "GitHub_M"
},
"references": [
{
"name": "https://github.com/vllm-project/vllm/security/advisories/GHSA-mrw7-hf4f-83pf",
"tags": [
"x_refsource_CONFIRM"
],
"url": "https://github.com/vllm-project/vllm/security/advisories/GHSA-mrw7-hf4f-83pf"
},
{
"name": "https://github.com/vllm-project/vllm/pull/27204",
"tags": [
"x_refsource_MISC"
],
"url": "https://github.com/vllm-project/vllm/pull/27204"
},
{
"name": "https://github.com/vllm-project/vllm/commit/58fab50d82838d5014f4a14d991fdb9352c9c84b",
"tags": [
"x_refsource_MISC"
],
"url": "https://github.com/vllm-project/vllm/commit/58fab50d82838d5014f4a14d991fdb9352c9c84b"
}
],
"source": {
"advisory": "GHSA-mrw7-hf4f-83pf",
"discovery": "UNKNOWN"
},
"title": "VLLM deserialization vulnerability leading to DoS and potential RCE"
}
},
"cveMetadata": {
"assignerOrgId": "a0819718-46f1-4df5-94e2-005712e83aaa",
"assignerShortName": "GitHub_M",
"cveId": "CVE-2025-62164",
"datePublished": "2025-11-21T01:18:38.803Z",
"dateReserved": "2025-10-07T16:12:03.425Z",
"dateUpdated": "2025-11-24T18:12:44.195Z",
"state": "PUBLISHED"
},
"dataType": "CVE_RECORD",
"dataVersion": "5.2"
}