release: 1.5.0 (#16)

stainless-app[bot] · web-flow · commit bff58fa643ab · 2026-03-02T14:53:52.000-05:00
* chore: update mock server docs

* chore(internal): add request options to SSE classes

* chore(internal):  make `test_proxy_environment_variables` more resilient

* chore(internal): make `test_proxy_environment_variables` more resilient to env

* feat(api): logprobs and top_logprobs in chat completions API

Enable logprobs and top_logprobs in chat completions API

* release: 1.5.0

---------

Co-authored-by: stainless-app[bot] &lt;142633134+stainless-app[bot]@users.noreply.github.com&gt;
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.4.1"
+  ".": "1.5.0"
 }
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 7
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/sambanova%2Fsambanova-67fb54d7a474f361008072996daf572e82688cacbb9b6e83aab3032de2e27146.yml
-openapi_spec_hash: caa3a3a58de67026c1dacf4bed4d95de
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/sambanova%2Fsambanova-9a3a236dd72cf19e3eca6739de243006b82148d4de3bfd2a46e7f6399fcbd658.yml
+openapi_spec_hash: 94f49750e6407334d3cfa9ab5d3c4227
 config_hash: 2daa8a392d338e14be4096c11ce139e8
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,21 @@
 # Changelog
 
+## 1.5.0 (2026-03-02)
+
+Full Changelog: [v1.4.1...v1.5.0](https://github.com/sambanova/sambanova-python/compare/v1.4.1...v1.5.0)
+
+### Features
+
+* **api:** logprobs and top_logprobs in chat completions API ([60c5a73](https://github.com/sambanova/sambanova-python/commit/60c5a73a75dfcd2de76030ea1fc768777619767b))
+
+
+### Chores
+
+* **internal:** add request options to SSE classes ([8bed909](https://github.com/sambanova/sambanova-python/commit/8bed909a64bb234723e77fc5fb8f8d0a92501ff3))
+* **internal:** make `test_proxy_environment_variables` more resilient ([8180c2e](https://github.com/sambanova/sambanova-python/commit/8180c2e6f0c54c0217883b13ef178b3b6e094881))
+* **internal:** make `test_proxy_environment_variables` more resilient to env ([47cf963](https://github.com/sambanova/sambanova-python/commit/47cf963b0eaf44b022ce08e44630256b10bf8cb4))
+* update mock server docs ([e19b34c](https://github.com/sambanova/sambanova-python/commit/e19b34ca6a27e900736fb33b8628a05b39fc6ee0))
+
 ## 1.4.1 (2026-02-13)
 
 Full Changelog: [v1.4.0...v1.4.1](https://github.com/sambanova/sambanova-python/compare/v1.4.0...v1.4.1)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -88,8 +88,7 @@ $ pip install ./path-to-wheel-file.whl
 Most tests require you to [set up a mock server](https://github.com/stoplightio/prism) against the OpenAPI spec to run the tests.
 
 ```sh
-# you will need npm installed
-$ npx prism mock path/to/your/openapi.yml
+$ ./scripts/mock
 ```
 
 ```sh
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "sambanova"
-version = "1.4.1"
+version = "1.5.0"
 description = "The official Python library for the SambaNova API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/src/sambanova/_response.py b/src/sambanova/_response.py
@@ -152,6 +152,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
                         ),
                         response=self.http_response,
                         client=cast(Any, self._client),
+                        options=self._options,
                     ),
                 )
 
@@ -162,6 +163,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
                         cast_to=extract_stream_chunk_type(self._stream_cls),
                         response=self.http_response,
                         client=cast(Any, self._client),
+                        options=self._options,
                     ),
                 )
 
@@ -175,6 +177,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
                     cast_to=cast_to,
                     response=self.http_response,
                     client=cast(Any, self._client),
+                    options=self._options,
                 ),
             )
 
diff --git a/src/sambanova/_streaming.py b/src/sambanova/_streaming.py
@@ -4,7 +4,7 @@
 import json
 import inspect
 from types import TracebackType
-from typing import TYPE_CHECKING, Any, Generic, TypeVar, Iterator, AsyncIterator, cast
+from typing import TYPE_CHECKING, Any, Generic, TypeVar, Iterator, Optional, AsyncIterator, cast
 from typing_extensions import Self, Protocol, TypeGuard, override, get_origin, runtime_checkable
 
 import httpx
@@ -13,6 +13,7 @@
 
 if TYPE_CHECKING:
     from ._client import SambaNova, AsyncSambaNova
+    from ._models import FinalRequestOptions
 
 
 _T = TypeVar("_T")
@@ -22,7 +23,7 @@ class Stream(Generic[_T]):
     """Provides the core interface to iterate over a synchronous stream response."""
 
     response: httpx.Response
-
+    _options: Optional[FinalRequestOptions] = None
     _decoder: SSEBytesDecoder
 
     def __init__(
@@ -31,10 +32,12 @@ def __init__(
         cast_to: type[_T],
         response: httpx.Response,
         client: SambaNova,
+        options: Optional[FinalRequestOptions] = None,
     ) -> None:
         self.response = response
         self._cast_to = cast_to
         self._client = client
+        self._options = options
         self._decoder = client._make_sse_decoder()
         self._iterator = self.__stream__()
 
@@ -104,7 +107,7 @@ class AsyncStream(Generic[_T]):
     """Provides the core interface to iterate over an asynchronous stream response."""
 
     response: httpx.Response
-
+    _options: Optional[FinalRequestOptions] = None
     _decoder: SSEDecoder | SSEBytesDecoder
 
     def __init__(
@@ -113,10 +116,12 @@ def __init__(
         cast_to: type[_T],
         response: httpx.Response,
         client: AsyncSambaNova,
+        options: Optional[FinalRequestOptions] = None,
     ) -> None:
         self.response = response
         self._cast_to = cast_to
         self._client = client
+        self._options = options
         self._decoder = client._make_sse_decoder()
         self._iterator = self.__stream__()
 
diff --git a/src/sambanova/_version.py b/src/sambanova/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "sambanova"
-__version__ = "1.4.1"  # x-release-please-version
+__version__ = "1.5.0"  # x-release-please-version
diff --git a/src/sambanova/resources/chat/completions.py b/src/sambanova/resources/chat/completions.py
@@ -132,9 +132,9 @@ def create(
           logit_bias: This is not yet supported by our models. Modify the likelihood of specified
               tokens appearing in the completion.
 
-          logprobs: This is not yet supported by our models. Whether to return log probabilities of
-              the output tokens or not. If true, returns the log probabilities of each output
-              token returned in the `content` of `message`.
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
 
           max_completion_tokens: The maximum number of tokens that can be generated in the chat completion. The
               total length of input tokens and generated tokens is limited by the model's
@@ -200,10 +200,9 @@ def create(
               means only the first 10 tokens with higher probability are considered. Is
               recommended altering this, top_p or temperature but not more than one of these.
 
-          top_logprobs: This is not yet supported by our models. An integer between 0 and 20 specifying
-              the number of most likely tokens to return at each token position, each with an
-              associated log probability. `logprobs` must be set to `true` if this parameter
-              is used.
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
 
           top_p: Cumulative probability for token choices. An alternative to sampling with
               temperature, called nucleus sampling, where the model considers the results of
@@ -312,9 +311,9 @@ def create(
           logit_bias: This is not yet supported by our models. Modify the likelihood of specified
               tokens appearing in the completion.
 
-          logprobs: This is not yet supported by our models. Whether to return log probabilities of
-              the output tokens or not. If true, returns the log probabilities of each output
-              token returned in the `content` of `message`.
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
 
           max_completion_tokens: The maximum number of tokens that can be generated in the chat completion. The
               total length of input tokens and generated tokens is limited by the model's
@@ -375,10 +374,9 @@ def create(
               means only the first 10 tokens with higher probability are considered. Is
               recommended altering this, top_p or temperature but not more than one of these.
 
-          top_logprobs: This is not yet supported by our models. An integer between 0 and 20 specifying
-              the number of most likely tokens to return at each token position, each with an
-              associated log probability. `logprobs` must be set to `true` if this parameter
-              is used.
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
 
           top_p: Cumulative probability for token choices. An alternative to sampling with
               temperature, called nucleus sampling, where the model considers the results of
@@ -487,9 +485,9 @@ def create(
           logit_bias: This is not yet supported by our models. Modify the likelihood of specified
               tokens appearing in the completion.
 
-          logprobs: This is not yet supported by our models. Whether to return log probabilities of
-              the output tokens or not. If true, returns the log probabilities of each output
-              token returned in the `content` of `message`.
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
 
           max_completion_tokens: The maximum number of tokens that can be generated in the chat completion. The
               total length of input tokens and generated tokens is limited by the model's
@@ -550,10 +548,9 @@ def create(
               means only the first 10 tokens with higher probability are considered. Is
               recommended altering this, top_p or temperature but not more than one of these.
 
-          top_logprobs: This is not yet supported by our models. An integer between 0 and 20 specifying
-              the number of most likely tokens to return at each token position, each with an
-              associated log probability. `logprobs` must be set to `true` if this parameter
-              is used.
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
 
           top_p: Cumulative probability for token choices. An alternative to sampling with
               temperature, called nucleus sampling, where the model considers the results of
@@ -784,9 +781,9 @@ async def create(
           logit_bias: This is not yet supported by our models. Modify the likelihood of specified
               tokens appearing in the completion.
 
-          logprobs: This is not yet supported by our models. Whether to return log probabilities of
-              the output tokens or not. If true, returns the log probabilities of each output
-              token returned in the `content` of `message`.
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
 
           max_completion_tokens: The maximum number of tokens that can be generated in the chat completion. The
               total length of input tokens and generated tokens is limited by the model's
@@ -852,10 +849,9 @@ async def create(
               means only the first 10 tokens with higher probability are considered. Is
               recommended altering this, top_p or temperature but not more than one of these.
 
-          top_logprobs: This is not yet supported by our models. An integer between 0 and 20 specifying
-              the number of most likely tokens to return at each token position, each with an
-              associated log probability. `logprobs` must be set to `true` if this parameter
-              is used.
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
 
           top_p: Cumulative probability for token choices. An alternative to sampling with
               temperature, called nucleus sampling, where the model considers the results of
@@ -964,9 +960,9 @@ async def create(
           logit_bias: This is not yet supported by our models. Modify the likelihood of specified
               tokens appearing in the completion.
 
-          logprobs: This is not yet supported by our models. Whether to return log probabilities of
-              the output tokens or not. If true, returns the log probabilities of each output
-              token returned in the `content` of `message`.
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
 
           max_completion_tokens: The maximum number of tokens that can be generated in the chat completion. The
               total length of input tokens and generated tokens is limited by the model's
@@ -1027,10 +1023,9 @@ async def create(
               means only the first 10 tokens with higher probability are considered. Is
               recommended altering this, top_p or temperature but not more than one of these.
 
-          top_logprobs: This is not yet supported by our models. An integer between 0 and 20 specifying
-              the number of most likely tokens to return at each token position, each with an
-              associated log probability. `logprobs` must be set to `true` if this parameter
-              is used.
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
 
           top_p: Cumulative probability for token choices. An alternative to sampling with
               temperature, called nucleus sampling, where the model considers the results of
@@ -1139,9 +1134,9 @@ async def create(
           logit_bias: This is not yet supported by our models. Modify the likelihood of specified
               tokens appearing in the completion.
 
-          logprobs: This is not yet supported by our models. Whether to return log probabilities of
-              the output tokens or not. If true, returns the log probabilities of each output
-              token returned in the `content` of `message`.
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
 
           max_completion_tokens: The maximum number of tokens that can be generated in the chat completion. The
               total length of input tokens and generated tokens is limited by the model's
@@ -1202,10 +1197,9 @@ async def create(
               means only the first 10 tokens with higher probability are considered. Is
               recommended altering this, top_p or temperature but not more than one of these.
 
-          top_logprobs: This is not yet supported by our models. An integer between 0 and 20 specifying
-              the number of most likely tokens to return at each token position, each with an
-              associated log probability. `logprobs` must be set to `true` if this parameter
-              is used.
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
 
           top_p: Cumulative probability for token choices. An alternative to sampling with
               temperature, called nucleus sampling, where the model considers the results of
diff --git a/src/sambanova/resources/completions.py b/src/sambanova/resources/completions.py
diff --git a/src/sambanova/types/chat/completion_create_params.py b/src/sambanova/types/chat/completion_create_params.py
diff --git a/src/sambanova/types/completion_create_params.py b/src/sambanova/types/completion_create_params.py
diff --git a/tests/test_client.py b/tests/test_client.py

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`{`
`2`		`- ".": "1.4.1"`
	`2`	`+ ".": "1.5.0"`
`3`	`3`	`}`
Original file line number	Diff line number	Diff line change
`@@ -152,6 +152,7 @@ def _parse(self, *, to: type[_T] \| None = None) -> R \| _T:`
`152`	`152`	`),`
`153`	`153`	`response=self.http_response,`
`154`	`154`	`client=cast(Any, self._client),`
	`155`	`+ options=self._options,`
`155`	`156`	`),`
`156`	`157`	`)`
`157`	`158`
`@@ -162,6 +163,7 @@ def _parse(self, *, to: type[_T] \| None = None) -> R \| _T:`
`162`	`163`	`cast_to=extract_stream_chunk_type(self._stream_cls),`
`163`	`164`	`response=self.http_response,`
`164`	`165`	`client=cast(Any, self._client),`
	`166`	`+ options=self._options,`
`165`	`167`	`),`
`166`	`168`	`)`
`167`	`169`
`@@ -175,6 +177,7 @@ def _parse(self, *, to: type[_T] \| None = None) -> R \| _T:`
`175`	`177`	`cast_to=cast_to,`
`176`	`178`	`response=self.http_response,`
`177`	`179`	`client=cast(Any, self._client),`
	`180`	`+ options=self._options,`
`178`	`181`	`),`
`179`	`182`	`)`
`180`	`183`