# coding: utf-8

"""
    Inference Box API

    ## Welcome to Inference Box API  A comprehensive REST API for managing cloud infrastructure resources including ML inference deployments, Kubernetes resources, and application lifecycle management.  ## Overview  Inference Box API is a cloud-native REST API service that provides unified access to manage:  - **ML Inference Deployments**: Create, manage, and scale machine learning inference services - **Slurm Clusters**: Deploy, configure, and manage HPC Slurm clusters for batch workloads - **Kubernetes Resources**: Manage namespaces (projects), pods, secrets, volumes, and other K8s resources - **Application Lifecycle**: Deploy and manage applications from catalog templates - **Resource Management**: Handle flavors, capacities, quotas, and node groups - **User & Access Control**: Manage users, groups, and API keys with RBAC integration - **Monitoring & Metrics**: Query metrics and monitor resource usage via Victoria Metrics  The service is built with Go and integrates with Kubernetes, HashiCorp Vault, and Victoria Metrics to provide a complete cloud infrastructure management solution.  ## Key API Endpoints  - **Inference**: `/v1/{project_name}/inferences` - ML inference deployment management - **Inference (Admin)**: `/v1/admin/{project_name}/inferences` - ML inference deployment management (admin only) - **Slurm Clusters**: `/v1/{project_name}/slurm/clusters` - Slurm cluster deployment and management - **Flavors**: `/v1/flavors` - Compute resource flavor definitions - **Flavors Management**: `/v1/admin/flavors` - Compute resource flavor definitions management - **Users**: `/v1/admin/users` - User management (admin only) - **Projects**: `/v1/{project_name}` - Project-scoped resource operations - **Metrics**: `/v1/{project_name}/metrics` - Resource usage metrics  ## Getting Started  ### Authentication  All API requests require authentication using the `X-API-Key` header. Most operations are scoped to specific projects/namespaces.  ### API Organization  The API is organized into logical groups: - **Public endpoints**: `/v1/*` - General access endpoints - **Project-scoped**: `/v1/{project_name}/*` - Project-specific resources - **Admin endpoints**: `/v1/admin/*` - Administrative operations  ### Documentation  Interactive documentation is available at `/docs` when the service is running. 

    The version of the OpenAPI document: 2.0
    Generated by OpenAPI Generator (https://openapi-generator.tech)

    Do not edit the class manually.
"""  # noqa: E501

import warnings
from pydantic import validate_call, Field, StrictFloat, StrictStr, StrictInt
from typing import Any, Dict, List, Optional, Tuple, Union
from typing_extensions import Annotated

from pydantic import Field, StrictBool, StrictStr
from typing import Optional
from typing_extensions import Annotated
from si_tests.clients.gcore.box_api.models.v1_create_inference_request import V1CreateInferenceRequest
from si_tests.clients.gcore.box_api.models.v1_inference_api_key_secret_response import V1InferenceAPIKeySecretResponse
from si_tests.clients.gcore.box_api.models.v1_inference_logs_response import V1InferenceLogsResponse
from si_tests.clients.gcore.box_api.models.v1_inference_response import V1InferenceResponse
from si_tests.clients.gcore.box_api.models.v1_list_inference_response import V1ListInferenceResponse
from si_tests.clients.gcore.box_api.models.v1_update_inference_request import V1UpdateInferenceRequest

from si_tests.clients.gcore.box_api.api_client import ApiClient, RequestSerialized
from si_tests.clients.gcore.box_api.api_response import ApiResponse
from si_tests.clients.gcore.box_api.rest import RESTResponseType


class InferencesApi:
    """NOTE: This class is auto generated by OpenAPI Generator
    Ref: https://openapi-generator.tech

    Do not edit the class manually.
    """

    def __init__(self, api_client=None) -> None:
        if api_client is None:
            api_client = ApiClient.get_default()
        self.api_client = api_client


    @validate_call
    def v1_create_inference(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        v1_create_inference_request: Annotated[V1CreateInferenceRequest, Field(description="Inference deployment configuration")],
        dry_run: Annotated[Optional[StrictBool], Field(description="Perform validation but do not apply any changes")] = None,
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> V1InferenceResponse:
        """Create inference deployment

        This endpoint allows you to deploy a standalone containerized inference service with specific configuration parameters such as the container image, resource requirements, scaling options, and networking settings. The deployment can be created across multiple regions for high availability.  Inference deployments are containerized services that run machine learning models or related components. They can be created directly using this endpoint or as part of an application deployment from the apps catalog.  Use this endpoint when you need to: - Deploy a single machine learning model or service - Create a custom inference deployment with specific configuration - Deploy an inference that is not available in the apps catalog  Note: This endpoint creates standalone inference deployments that you can manage directly. If you need to deploy a pre-configured application with multiple components (e.g., a model API and a UI), consider using the `/v1/{project_name}/apps/deployments` endpoints instead.

        :param project_name: Project name (required)
        :type project_name: str
        :param v1_create_inference_request: Inference deployment configuration (required)
        :type v1_create_inference_request: V1CreateInferenceRequest
        :param dry_run: Perform validation but do not apply any changes
        :type dry_run: bool
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_create_inference_serialize(
            project_name=project_name,
            v1_create_inference_request=v1_create_inference_request,
            dry_run=dry_run,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1InferenceResponse",
            '204': "str",
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        ).data


    @validate_call
    def v1_create_inference_with_http_info(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        v1_create_inference_request: Annotated[V1CreateInferenceRequest, Field(description="Inference deployment configuration")],
        dry_run: Annotated[Optional[StrictBool], Field(description="Perform validation but do not apply any changes")] = None,
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> ApiResponse[V1InferenceResponse]:
        """Create inference deployment

        This endpoint allows you to deploy a standalone containerized inference service with specific configuration parameters such as the container image, resource requirements, scaling options, and networking settings. The deployment can be created across multiple regions for high availability.  Inference deployments are containerized services that run machine learning models or related components. They can be created directly using this endpoint or as part of an application deployment from the apps catalog.  Use this endpoint when you need to: - Deploy a single machine learning model or service - Create a custom inference deployment with specific configuration - Deploy an inference that is not available in the apps catalog  Note: This endpoint creates standalone inference deployments that you can manage directly. If you need to deploy a pre-configured application with multiple components (e.g., a model API and a UI), consider using the `/v1/{project_name}/apps/deployments` endpoints instead.

        :param project_name: Project name (required)
        :type project_name: str
        :param v1_create_inference_request: Inference deployment configuration (required)
        :type v1_create_inference_request: V1CreateInferenceRequest
        :param dry_run: Perform validation but do not apply any changes
        :type dry_run: bool
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_create_inference_serialize(
            project_name=project_name,
            v1_create_inference_request=v1_create_inference_request,
            dry_run=dry_run,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1InferenceResponse",
            '204': "str",
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        )


    @validate_call
    def v1_create_inference_without_preload_content(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        v1_create_inference_request: Annotated[V1CreateInferenceRequest, Field(description="Inference deployment configuration")],
        dry_run: Annotated[Optional[StrictBool], Field(description="Perform validation but do not apply any changes")] = None,
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> RESTResponseType:
        """Create inference deployment

        This endpoint allows you to deploy a standalone containerized inference service with specific configuration parameters such as the container image, resource requirements, scaling options, and networking settings. The deployment can be created across multiple regions for high availability.  Inference deployments are containerized services that run machine learning models or related components. They can be created directly using this endpoint or as part of an application deployment from the apps catalog.  Use this endpoint when you need to: - Deploy a single machine learning model or service - Create a custom inference deployment with specific configuration - Deploy an inference that is not available in the apps catalog  Note: This endpoint creates standalone inference deployments that you can manage directly. If you need to deploy a pre-configured application with multiple components (e.g., a model API and a UI), consider using the `/v1/{project_name}/apps/deployments` endpoints instead.

        :param project_name: Project name (required)
        :type project_name: str
        :param v1_create_inference_request: Inference deployment configuration (required)
        :type v1_create_inference_request: V1CreateInferenceRequest
        :param dry_run: Perform validation but do not apply any changes
        :type dry_run: bool
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_create_inference_serialize(
            project_name=project_name,
            v1_create_inference_request=v1_create_inference_request,
            dry_run=dry_run,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1InferenceResponse",
            '204': "str",
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        return response_data.response


    def _v1_create_inference_serialize(
        self,
        project_name,
        v1_create_inference_request,
        dry_run,
        _request_auth,
        _content_type,
        _headers,
        _host_index,
    ) -> RequestSerialized:

        _host = None

        _collection_formats: Dict[str, str] = {
        }

        _path_params: Dict[str, str] = {}
        _query_params: List[Tuple[str, str]] = []
        _header_params: Dict[str, Optional[str]] = _headers or {}
        _form_params: List[Tuple[str, str]] = []
        _files: Dict[
            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
        ] = {}
        _body_params: Optional[bytes] = None

        # process the path parameters
        if project_name is not None:
            _path_params['project_name'] = project_name
        # process the query parameters
        if dry_run is not None:
            
            _query_params.append(('dry_run', dry_run))
            
        # process the header parameters
        # process the form parameters
        # process the body parameter
        if v1_create_inference_request is not None:
            _body_params = v1_create_inference_request


        # set the HTTP header `Accept`
        if 'Accept' not in _header_params:
            _header_params['Accept'] = self.api_client.select_header_accept(
                [
                    'application/json'
                ]
            )

        # set the HTTP header `Content-Type`
        if _content_type:
            _header_params['Content-Type'] = _content_type
        else:
            _default_content_type = (
                self.api_client.select_header_content_type(
                    [
                        'application/json'
                    ]
                )
            )
            if _default_content_type is not None:
                _header_params['Content-Type'] = _default_content_type

        # authentication setting
        _auth_settings: List[str] = [
        ]

        return self.api_client.param_serialize(
            method='POST',
            resource_path='/v1/{project_name}/inferences',
            path_params=_path_params,
            query_params=_query_params,
            header_params=_header_params,
            body=_body_params,
            post_params=_form_params,
            files=_files,
            auth_settings=_auth_settings,
            collection_formats=_collection_formats,
            _host=_host,
            _request_auth=_request_auth
        )




    @validate_call
    def v1_delete_inference(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> None:
        """Delete inference deployment

        This endpoint permanently removes a standalone inference deployment and all its resources from all regions where it's deployed.  Inference deployments are containerized services that run machine learning models or related components. Standalone inferences can be deleted directly using this endpoint.  When you delete an inference deployment: - All containers running the inference are terminated - All resources associated with the inference are released - The inference is removed from all regions where it was deployed  Use this endpoint when you need to: - Remove an inference that is no longer needed - Free up resources used by an inference - Clean up unused or obsolete inference deployments  Note: This endpoint can only delete standalone inference deployments. Inferences that are part of application deployments from the apps catalog are read-only and cannot be deleted directly. If you attempt to delete a read-only inference, you will receive an error. Such inferences must be managed through the parent application deployment using the `/v1/{project_name}/apps/deployments` endpoints.  Warning: This operation cannot be undone. Make sure you no longer need the inference and its data before deleting it.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_delete_inference_serialize(
            project_name=project_name,
            inference_name=inference_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '204': None,
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        ).data


    @validate_call
    def v1_delete_inference_with_http_info(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> ApiResponse[None]:
        """Delete inference deployment

        This endpoint permanently removes a standalone inference deployment and all its resources from all regions where it's deployed.  Inference deployments are containerized services that run machine learning models or related components. Standalone inferences can be deleted directly using this endpoint.  When you delete an inference deployment: - All containers running the inference are terminated - All resources associated with the inference are released - The inference is removed from all regions where it was deployed  Use this endpoint when you need to: - Remove an inference that is no longer needed - Free up resources used by an inference - Clean up unused or obsolete inference deployments  Note: This endpoint can only delete standalone inference deployments. Inferences that are part of application deployments from the apps catalog are read-only and cannot be deleted directly. If you attempt to delete a read-only inference, you will receive an error. Such inferences must be managed through the parent application deployment using the `/v1/{project_name}/apps/deployments` endpoints.  Warning: This operation cannot be undone. Make sure you no longer need the inference and its data before deleting it.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_delete_inference_serialize(
            project_name=project_name,
            inference_name=inference_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '204': None,
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        )


    @validate_call
    def v1_delete_inference_without_preload_content(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> RESTResponseType:
        """Delete inference deployment

        This endpoint permanently removes a standalone inference deployment and all its resources from all regions where it's deployed.  Inference deployments are containerized services that run machine learning models or related components. Standalone inferences can be deleted directly using this endpoint.  When you delete an inference deployment: - All containers running the inference are terminated - All resources associated with the inference are released - The inference is removed from all regions where it was deployed  Use this endpoint when you need to: - Remove an inference that is no longer needed - Free up resources used by an inference - Clean up unused or obsolete inference deployments  Note: This endpoint can only delete standalone inference deployments. Inferences that are part of application deployments from the apps catalog are read-only and cannot be deleted directly. If you attempt to delete a read-only inference, you will receive an error. Such inferences must be managed through the parent application deployment using the `/v1/{project_name}/apps/deployments` endpoints.  Warning: This operation cannot be undone. Make sure you no longer need the inference and its data before deleting it.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_delete_inference_serialize(
            project_name=project_name,
            inference_name=inference_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '204': None,
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        return response_data.response


    def _v1_delete_inference_serialize(
        self,
        project_name,
        inference_name,
        _request_auth,
        _content_type,
        _headers,
        _host_index,
    ) -> RequestSerialized:

        _host = None

        _collection_formats: Dict[str, str] = {
        }

        _path_params: Dict[str, str] = {}
        _query_params: List[Tuple[str, str]] = []
        _header_params: Dict[str, Optional[str]] = _headers or {}
        _form_params: List[Tuple[str, str]] = []
        _files: Dict[
            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
        ] = {}
        _body_params: Optional[bytes] = None

        # process the path parameters
        if project_name is not None:
            _path_params['project_name'] = project_name
        if inference_name is not None:
            _path_params['inference_name'] = inference_name
        # process the query parameters
        # process the header parameters
        # process the form parameters
        # process the body parameter


        # set the HTTP header `Accept`
        if 'Accept' not in _header_params:
            _header_params['Accept'] = self.api_client.select_header_accept(
                [
                    'application/json'
                ]
            )


        # authentication setting
        _auth_settings: List[str] = [
        ]

        return self.api_client.param_serialize(
            method='DELETE',
            resource_path='/v1/{project_name}/inferences/{inference_name}',
            path_params=_path_params,
            query_params=_query_params,
            header_params=_header_params,
            body=_body_params,
            post_params=_form_params,
            files=_files,
            auth_settings=_auth_settings,
            collection_formats=_collection_formats,
            _host=_host,
            _request_auth=_request_auth
        )




    @validate_call
    def v1_get_inference(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> V1InferenceResponse:
        """Get inference deployment

        This endpoint retrieves detailed information about a specific inference deployment in the project.  Inference deployments are containerized services that run machine learning models or related components. They can be created directly or as part of an application deployment from the apps catalog.  The response includes: - Configuration details (container image, resources, etc.) - Status information across all regions - Scaling configuration - Networking and endpoint information  Note: Some inference deployments are created and managed by application deployments from the apps catalog. These inferences are marked as read-only and cannot be modified or deleted directly. They must be managed through the parent application deployment using the `/v1/{project_name}/apps/deployments` endpoints.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_get_inference_serialize(
            project_name=project_name,
            inference_name=inference_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1InferenceResponse",
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        ).data


    @validate_call
    def v1_get_inference_with_http_info(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> ApiResponse[V1InferenceResponse]:
        """Get inference deployment

        This endpoint retrieves detailed information about a specific inference deployment in the project.  Inference deployments are containerized services that run machine learning models or related components. They can be created directly or as part of an application deployment from the apps catalog.  The response includes: - Configuration details (container image, resources, etc.) - Status information across all regions - Scaling configuration - Networking and endpoint information  Note: Some inference deployments are created and managed by application deployments from the apps catalog. These inferences are marked as read-only and cannot be modified or deleted directly. They must be managed through the parent application deployment using the `/v1/{project_name}/apps/deployments` endpoints.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_get_inference_serialize(
            project_name=project_name,
            inference_name=inference_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1InferenceResponse",
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        )


    @validate_call
    def v1_get_inference_without_preload_content(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> RESTResponseType:
        """Get inference deployment

        This endpoint retrieves detailed information about a specific inference deployment in the project.  Inference deployments are containerized services that run machine learning models or related components. They can be created directly or as part of an application deployment from the apps catalog.  The response includes: - Configuration details (container image, resources, etc.) - Status information across all regions - Scaling configuration - Networking and endpoint information  Note: Some inference deployments are created and managed by application deployments from the apps catalog. These inferences are marked as read-only and cannot be modified or deleted directly. They must be managed through the parent application deployment using the `/v1/{project_name}/apps/deployments` endpoints.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_get_inference_serialize(
            project_name=project_name,
            inference_name=inference_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1InferenceResponse",
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        return response_data.response


    def _v1_get_inference_serialize(
        self,
        project_name,
        inference_name,
        _request_auth,
        _content_type,
        _headers,
        _host_index,
    ) -> RequestSerialized:

        _host = None

        _collection_formats: Dict[str, str] = {
        }

        _path_params: Dict[str, str] = {}
        _query_params: List[Tuple[str, str]] = []
        _header_params: Dict[str, Optional[str]] = _headers or {}
        _form_params: List[Tuple[str, str]] = []
        _files: Dict[
            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
        ] = {}
        _body_params: Optional[bytes] = None

        # process the path parameters
        if project_name is not None:
            _path_params['project_name'] = project_name
        if inference_name is not None:
            _path_params['inference_name'] = inference_name
        # process the query parameters
        # process the header parameters
        # process the form parameters
        # process the body parameter


        # set the HTTP header `Accept`
        if 'Accept' not in _header_params:
            _header_params['Accept'] = self.api_client.select_header_accept(
                [
                    'application/json'
                ]
            )


        # authentication setting
        _auth_settings: List[str] = [
        ]

        return self.api_client.param_serialize(
            method='GET',
            resource_path='/v1/{project_name}/inferences/{inference_name}',
            path_params=_path_params,
            query_params=_query_params,
            header_params=_header_params,
            body=_body_params,
            post_params=_form_params,
            files=_files,
            auth_settings=_auth_settings,
            collection_formats=_collection_formats,
            _host=_host,
            _request_auth=_request_auth
        )




    @validate_call
    def v1_get_inference_api_key(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> V1InferenceAPIKeySecretResponse:
        """Get inference API key

        This endpoint retrieves the API key that can be used to authenticate requests to the inference deployment. The API key is only available if API key authentication is enabled for the inference deployment.  Inference deployments are containerized services that run machine learning models or related components. Both standalone inferences and those that are part of application deployments can have API keys.  Use this endpoint when you need to: - Retrieve the API key for authenticating requests to an inference - Set up client applications to communicate with the inference - Configure tools or services that need to access the inference  Note: This endpoint will return an error if API key authentication is disabled for the inference. This endpoint works for both standalone inferences and those that are part of application deployments from the apps catalog, even though the latter are read-only for other operations.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_get_inference_api_key_serialize(
            project_name=project_name,
            inference_name=inference_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1InferenceAPIKeySecretResponse",
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        ).data


    @validate_call
    def v1_get_inference_api_key_with_http_info(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> ApiResponse[V1InferenceAPIKeySecretResponse]:
        """Get inference API key

        This endpoint retrieves the API key that can be used to authenticate requests to the inference deployment. The API key is only available if API key authentication is enabled for the inference deployment.  Inference deployments are containerized services that run machine learning models or related components. Both standalone inferences and those that are part of application deployments can have API keys.  Use this endpoint when you need to: - Retrieve the API key for authenticating requests to an inference - Set up client applications to communicate with the inference - Configure tools or services that need to access the inference  Note: This endpoint will return an error if API key authentication is disabled for the inference. This endpoint works for both standalone inferences and those that are part of application deployments from the apps catalog, even though the latter are read-only for other operations.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_get_inference_api_key_serialize(
            project_name=project_name,
            inference_name=inference_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1InferenceAPIKeySecretResponse",
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        )


    @validate_call
    def v1_get_inference_api_key_without_preload_content(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> RESTResponseType:
        """Get inference API key

        This endpoint retrieves the API key that can be used to authenticate requests to the inference deployment. The API key is only available if API key authentication is enabled for the inference deployment.  Inference deployments are containerized services that run machine learning models or related components. Both standalone inferences and those that are part of application deployments can have API keys.  Use this endpoint when you need to: - Retrieve the API key for authenticating requests to an inference - Set up client applications to communicate with the inference - Configure tools or services that need to access the inference  Note: This endpoint will return an error if API key authentication is disabled for the inference. This endpoint works for both standalone inferences and those that are part of application deployments from the apps catalog, even though the latter are read-only for other operations.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_get_inference_api_key_serialize(
            project_name=project_name,
            inference_name=inference_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1InferenceAPIKeySecretResponse",
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        return response_data.response


    def _v1_get_inference_api_key_serialize(
        self,
        project_name,
        inference_name,
        _request_auth,
        _content_type,
        _headers,
        _host_index,
    ) -> RequestSerialized:

        _host = None

        _collection_formats: Dict[str, str] = {
        }

        _path_params: Dict[str, str] = {}
        _query_params: List[Tuple[str, str]] = []
        _header_params: Dict[str, Optional[str]] = _headers or {}
        _form_params: List[Tuple[str, str]] = []
        _files: Dict[
            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
        ] = {}
        _body_params: Optional[bytes] = None

        # process the path parameters
        if project_name is not None:
            _path_params['project_name'] = project_name
        if inference_name is not None:
            _path_params['inference_name'] = inference_name
        # process the query parameters
        # process the header parameters
        # process the form parameters
        # process the body parameter


        # set the HTTP header `Accept`
        if 'Accept' not in _header_params:
            _header_params['Accept'] = self.api_client.select_header_accept(
                [
                    'application/json'
                ]
            )


        # authentication setting
        _auth_settings: List[str] = [
        ]

        return self.api_client.param_serialize(
            method='GET',
            resource_path='/v1/{project_name}/inferences/{inference_name}/apikey',
            path_params=_path_params,
            query_params=_query_params,
            header_params=_header_params,
            body=_body_params,
            post_params=_form_params,
            files=_files,
            auth_settings=_auth_settings,
            collection_formats=_collection_formats,
            _host=_host,
            _request_auth=_request_auth
        )




    @validate_call
    def v1_get_inference_logs(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        region: Annotated[Optional[StrictStr], Field(description="Filter by region name(s)")] = None,
        limit: Annotated[Optional[StrictStr], Field(description="Limit the number of returned log records")] = None,
        order_by: Annotated[Optional[StrictStr], Field(description="Sort order of results (time.asc or time.desc)")] = None,
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> V1InferenceLogsResponse:
        """Get inference logs

        This endpoint retrieves the logs from the containers running the inference deployment. You can filter logs by region, limit the number of returned records, and specify the sort order.  Inference deployments are containerized services that run machine learning models or related components. Both standalone inferences and those that are part of application deployments generate logs.  The logs provide valuable information for: - Debugging issues with the inference deployment - Monitoring the performance and behavior of the model - Troubleshooting errors or unexpected results - Analyzing usage patterns and request handling  Use this endpoint when you need to: - Diagnose problems with an inference deployment - Monitor the activity of an inference - Collect logs for analysis or reporting  Note: This endpoint works for both standalone inferences and those that are part of application deployments from the apps catalog, even though the latter are read-only for other operations.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param region: Filter by region name(s)
        :type region: str
        :param limit: Limit the number of returned log records
        :type limit: str
        :param order_by: Sort order of results (time.asc or time.desc)
        :type order_by: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_get_inference_logs_serialize(
            project_name=project_name,
            inference_name=inference_name,
            region=region,
            limit=limit,
            order_by=order_by,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1InferenceLogsResponse",
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        ).data


    @validate_call
    def v1_get_inference_logs_with_http_info(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        region: Annotated[Optional[StrictStr], Field(description="Filter by region name(s)")] = None,
        limit: Annotated[Optional[StrictStr], Field(description="Limit the number of returned log records")] = None,
        order_by: Annotated[Optional[StrictStr], Field(description="Sort order of results (time.asc or time.desc)")] = None,
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> ApiResponse[V1InferenceLogsResponse]:
        """Get inference logs

        This endpoint retrieves the logs from the containers running the inference deployment. You can filter logs by region, limit the number of returned records, and specify the sort order.  Inference deployments are containerized services that run machine learning models or related components. Both standalone inferences and those that are part of application deployments generate logs.  The logs provide valuable information for: - Debugging issues with the inference deployment - Monitoring the performance and behavior of the model - Troubleshooting errors or unexpected results - Analyzing usage patterns and request handling  Use this endpoint when you need to: - Diagnose problems with an inference deployment - Monitor the activity of an inference - Collect logs for analysis or reporting  Note: This endpoint works for both standalone inferences and those that are part of application deployments from the apps catalog, even though the latter are read-only for other operations.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param region: Filter by region name(s)
        :type region: str
        :param limit: Limit the number of returned log records
        :type limit: str
        :param order_by: Sort order of results (time.asc or time.desc)
        :type order_by: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_get_inference_logs_serialize(
            project_name=project_name,
            inference_name=inference_name,
            region=region,
            limit=limit,
            order_by=order_by,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1InferenceLogsResponse",
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        )


    @validate_call
    def v1_get_inference_logs_without_preload_content(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        region: Annotated[Optional[StrictStr], Field(description="Filter by region name(s)")] = None,
        limit: Annotated[Optional[StrictStr], Field(description="Limit the number of returned log records")] = None,
        order_by: Annotated[Optional[StrictStr], Field(description="Sort order of results (time.asc or time.desc)")] = None,
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> RESTResponseType:
        """Get inference logs

        This endpoint retrieves the logs from the containers running the inference deployment. You can filter logs by region, limit the number of returned records, and specify the sort order.  Inference deployments are containerized services that run machine learning models or related components. Both standalone inferences and those that are part of application deployments generate logs.  The logs provide valuable information for: - Debugging issues with the inference deployment - Monitoring the performance and behavior of the model - Troubleshooting errors or unexpected results - Analyzing usage patterns and request handling  Use this endpoint when you need to: - Diagnose problems with an inference deployment - Monitor the activity of an inference - Collect logs for analysis or reporting  Note: This endpoint works for both standalone inferences and those that are part of application deployments from the apps catalog, even though the latter are read-only for other operations.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param region: Filter by region name(s)
        :type region: str
        :param limit: Limit the number of returned log records
        :type limit: str
        :param order_by: Sort order of results (time.asc or time.desc)
        :type order_by: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_get_inference_logs_serialize(
            project_name=project_name,
            inference_name=inference_name,
            region=region,
            limit=limit,
            order_by=order_by,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1InferenceLogsResponse",
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        return response_data.response


    def _v1_get_inference_logs_serialize(
        self,
        project_name,
        inference_name,
        region,
        limit,
        order_by,
        _request_auth,
        _content_type,
        _headers,
        _host_index,
    ) -> RequestSerialized:

        _host = None

        _collection_formats: Dict[str, str] = {
        }

        _path_params: Dict[str, str] = {}
        _query_params: List[Tuple[str, str]] = []
        _header_params: Dict[str, Optional[str]] = _headers or {}
        _form_params: List[Tuple[str, str]] = []
        _files: Dict[
            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
        ] = {}
        _body_params: Optional[bytes] = None

        # process the path parameters
        if project_name is not None:
            _path_params['project_name'] = project_name
        if inference_name is not None:
            _path_params['inference_name'] = inference_name
        # process the query parameters
        if region is not None:
            
            _query_params.append(('region', region))
            
        if limit is not None:
            
            _query_params.append(('limit', limit))
            
        if order_by is not None:
            
            _query_params.append(('order_by', order_by))
            
        # process the header parameters
        # process the form parameters
        # process the body parameter


        # set the HTTP header `Accept`
        if 'Accept' not in _header_params:
            _header_params['Accept'] = self.api_client.select_header_accept(
                [
                    'application/json'
                ]
            )


        # authentication setting
        _auth_settings: List[str] = [
        ]

        return self.api_client.param_serialize(
            method='GET',
            resource_path='/v1/{project_name}/inferences/{inference_name}/logs',
            path_params=_path_params,
            query_params=_query_params,
            header_params=_header_params,
            body=_body_params,
            post_params=_form_params,
            files=_files,
            auth_settings=_auth_settings,
            collection_formats=_collection_formats,
            _host=_host,
            _request_auth=_request_auth
        )




    @validate_call
    def v1_list_inferences(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> V1ListInferenceResponse:
        """List inference deployments

        This endpoint provides a summary of all inference deployments in the project, including their names, configurations, and current status across all regions where they're deployed.  Inference deployments are containerized services that run machine learning models or related components. They can be created directly or as part of an application deployment from the apps catalog.  Use this endpoint when you need to: - Get an overview of all inference deployments in a project - Monitor the status of your inference deployments - Find specific inference deployments by name or configuration  Note: The list will include both standalone inference deployments and those that are part of application deployments from the apps catalog. Inferences that are part of app deployments are marked as read-only and cannot be modified or deleted directly. They must be managed through the parent application deployment using the `/v1/{project_name}/apps/deployments` endpoints.

        :param project_name: Project name (required)
        :type project_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_list_inferences_serialize(
            project_name=project_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1ListInferenceResponse",
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        ).data


    @validate_call
    def v1_list_inferences_with_http_info(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> ApiResponse[V1ListInferenceResponse]:
        """List inference deployments

        This endpoint provides a summary of all inference deployments in the project, including their names, configurations, and current status across all regions where they're deployed.  Inference deployments are containerized services that run machine learning models or related components. They can be created directly or as part of an application deployment from the apps catalog.  Use this endpoint when you need to: - Get an overview of all inference deployments in a project - Monitor the status of your inference deployments - Find specific inference deployments by name or configuration  Note: The list will include both standalone inference deployments and those that are part of application deployments from the apps catalog. Inferences that are part of app deployments are marked as read-only and cannot be modified or deleted directly. They must be managed through the parent application deployment using the `/v1/{project_name}/apps/deployments` endpoints.

        :param project_name: Project name (required)
        :type project_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_list_inferences_serialize(
            project_name=project_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1ListInferenceResponse",
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        )


    @validate_call
    def v1_list_inferences_without_preload_content(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> RESTResponseType:
        """List inference deployments

        This endpoint provides a summary of all inference deployments in the project, including their names, configurations, and current status across all regions where they're deployed.  Inference deployments are containerized services that run machine learning models or related components. They can be created directly or as part of an application deployment from the apps catalog.  Use this endpoint when you need to: - Get an overview of all inference deployments in a project - Monitor the status of your inference deployments - Find specific inference deployments by name or configuration  Note: The list will include both standalone inference deployments and those that are part of application deployments from the apps catalog. Inferences that are part of app deployments are marked as read-only and cannot be modified or deleted directly. They must be managed through the parent application deployment using the `/v1/{project_name}/apps/deployments` endpoints.

        :param project_name: Project name (required)
        :type project_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_list_inferences_serialize(
            project_name=project_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1ListInferenceResponse",
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        return response_data.response


    def _v1_list_inferences_serialize(
        self,
        project_name,
        _request_auth,
        _content_type,
        _headers,
        _host_index,
    ) -> RequestSerialized:

        _host = None

        _collection_formats: Dict[str, str] = {
        }

        _path_params: Dict[str, str] = {}
        _query_params: List[Tuple[str, str]] = []
        _header_params: Dict[str, Optional[str]] = _headers or {}
        _form_params: List[Tuple[str, str]] = []
        _files: Dict[
            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
        ] = {}
        _body_params: Optional[bytes] = None

        # process the path parameters
        if project_name is not None:
            _path_params['project_name'] = project_name
        # process the query parameters
        # process the header parameters
        # process the form parameters
        # process the body parameter


        # set the HTTP header `Accept`
        if 'Accept' not in _header_params:
            _header_params['Accept'] = self.api_client.select_header_accept(
                [
                    'application/json'
                ]
            )


        # authentication setting
        _auth_settings: List[str] = [
        ]

        return self.api_client.param_serialize(
            method='GET',
            resource_path='/v1/{project_name}/inferences',
            path_params=_path_params,
            query_params=_query_params,
            header_params=_header_params,
            body=_body_params,
            post_params=_form_params,
            files=_files,
            auth_settings=_auth_settings,
            collection_formats=_collection_formats,
            _host=_host,
            _request_auth=_request_auth
        )




    @validate_call
    def v1_pause_inference(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> None:
        """Mark a standalone inference deployment as paused

        Sets the paused flag on the specified deployment and its child resources. Paused deployments are not reconciled  until resumed. Useful for maintenance or troubleshooting.  Use this endpoint when you need to: - Avoid inference or container reconciliation during maintenance - Temporarily halt automatic updates  Note: This endpoint can only update standalone inference deployments. Inferences that are part of application deployments from the apps catalog are read-only and cannot be modified directly.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_pause_inference_serialize(
            project_name=project_name,
            inference_name=inference_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '204': None,
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        ).data


    @validate_call
    def v1_pause_inference_with_http_info(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> ApiResponse[None]:
        """Mark a standalone inference deployment as paused

        Sets the paused flag on the specified deployment and its child resources. Paused deployments are not reconciled  until resumed. Useful for maintenance or troubleshooting.  Use this endpoint when you need to: - Avoid inference or container reconciliation during maintenance - Temporarily halt automatic updates  Note: This endpoint can only update standalone inference deployments. Inferences that are part of application deployments from the apps catalog are read-only and cannot be modified directly.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_pause_inference_serialize(
            project_name=project_name,
            inference_name=inference_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '204': None,
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        )


    @validate_call
    def v1_pause_inference_without_preload_content(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> RESTResponseType:
        """Mark a standalone inference deployment as paused

        Sets the paused flag on the specified deployment and its child resources. Paused deployments are not reconciled  until resumed. Useful for maintenance or troubleshooting.  Use this endpoint when you need to: - Avoid inference or container reconciliation during maintenance - Temporarily halt automatic updates  Note: This endpoint can only update standalone inference deployments. Inferences that are part of application deployments from the apps catalog are read-only and cannot be modified directly.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_pause_inference_serialize(
            project_name=project_name,
            inference_name=inference_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '204': None,
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        return response_data.response


    def _v1_pause_inference_serialize(
        self,
        project_name,
        inference_name,
        _request_auth,
        _content_type,
        _headers,
        _host_index,
    ) -> RequestSerialized:

        _host = None

        _collection_formats: Dict[str, str] = {
        }

        _path_params: Dict[str, str] = {}
        _query_params: List[Tuple[str, str]] = []
        _header_params: Dict[str, Optional[str]] = _headers or {}
        _form_params: List[Tuple[str, str]] = []
        _files: Dict[
            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
        ] = {}
        _body_params: Optional[bytes] = None

        # process the path parameters
        if project_name is not None:
            _path_params['project_name'] = project_name
        if inference_name is not None:
            _path_params['inference_name'] = inference_name
        # process the query parameters
        # process the header parameters
        # process the form parameters
        # process the body parameter


        # set the HTTP header `Accept`
        if 'Accept' not in _header_params:
            _header_params['Accept'] = self.api_client.select_header_accept(
                [
                    'application/json'
                ]
            )


        # authentication setting
        _auth_settings: List[str] = [
        ]

        return self.api_client.param_serialize(
            method='POST',
            resource_path='/v1/admin/{project_name}/inferences/{inference_name}/pause',
            path_params=_path_params,
            query_params=_query_params,
            header_params=_header_params,
            body=_body_params,
            post_params=_form_params,
            files=_files,
            auth_settings=_auth_settings,
            collection_formats=_collection_formats,
            _host=_host,
            _request_auth=_request_auth
        )




    @validate_call
    def v1_resume_inference(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> None:
        """Resume a paused inference deployment

        Resumes a previously paused standalone inference deployment, including all child resources. Once resumed, the deployment re-enters reconciliation. Useful after maintenance or troubleshooting.  Use this endpoint when you need to: - Resume an inference and containers reconciliations after pausing  Note: This endpoint can only update standalone inference deployments. Inferences that are part of application deployments from the apps catalog are read-only and cannot be modified directly.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_resume_inference_serialize(
            project_name=project_name,
            inference_name=inference_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '204': None,
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        ).data


    @validate_call
    def v1_resume_inference_with_http_info(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> ApiResponse[None]:
        """Resume a paused inference deployment

        Resumes a previously paused standalone inference deployment, including all child resources. Once resumed, the deployment re-enters reconciliation. Useful after maintenance or troubleshooting.  Use this endpoint when you need to: - Resume an inference and containers reconciliations after pausing  Note: This endpoint can only update standalone inference deployments. Inferences that are part of application deployments from the apps catalog are read-only and cannot be modified directly.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_resume_inference_serialize(
            project_name=project_name,
            inference_name=inference_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '204': None,
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        )


    @validate_call
    def v1_resume_inference_without_preload_content(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> RESTResponseType:
        """Resume a paused inference deployment

        Resumes a previously paused standalone inference deployment, including all child resources. Once resumed, the deployment re-enters reconciliation. Useful after maintenance or troubleshooting.  Use this endpoint when you need to: - Resume an inference and containers reconciliations after pausing  Note: This endpoint can only update standalone inference deployments. Inferences that are part of application deployments from the apps catalog are read-only and cannot be modified directly.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_resume_inference_serialize(
            project_name=project_name,
            inference_name=inference_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '204': None,
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        return response_data.response


    def _v1_resume_inference_serialize(
        self,
        project_name,
        inference_name,
        _request_auth,
        _content_type,
        _headers,
        _host_index,
    ) -> RequestSerialized:

        _host = None

        _collection_formats: Dict[str, str] = {
        }

        _path_params: Dict[str, str] = {}
        _query_params: List[Tuple[str, str]] = []
        _header_params: Dict[str, Optional[str]] = _headers or {}
        _form_params: List[Tuple[str, str]] = []
        _files: Dict[
            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
        ] = {}
        _body_params: Optional[bytes] = None

        # process the path parameters
        if project_name is not None:
            _path_params['project_name'] = project_name
        if inference_name is not None:
            _path_params['inference_name'] = inference_name
        # process the query parameters
        # process the header parameters
        # process the form parameters
        # process the body parameter


        # set the HTTP header `Accept`
        if 'Accept' not in _header_params:
            _header_params['Accept'] = self.api_client.select_header_accept(
                [
                    'application/json'
                ]
            )


        # authentication setting
        _auth_settings: List[str] = [
        ]

        return self.api_client.param_serialize(
            method='POST',
            resource_path='/v1/admin/{project_name}/inferences/{inference_name}/resume',
            path_params=_path_params,
            query_params=_query_params,
            header_params=_header_params,
            body=_body_params,
            post_params=_form_params,
            files=_files,
            auth_settings=_auth_settings,
            collection_formats=_collection_formats,
            _host=_host,
            _request_auth=_request_auth
        )




    @validate_call
    def v1_update_inference(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        v1_update_inference_request: Annotated[V1UpdateInferenceRequest, Field(description="Updated inference deployment configuration")],
        dry_run: Annotated[Optional[StrictBool], Field(description="Perform validation but do not apply any changes")] = None,
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> V1InferenceResponse:
        """Update inference deployment

        This endpoint allows you to modify the configuration of an existing standalone inference deployment, including its container image, resource requirements, scaling options, and networking settings. You can also update which regions the inference is deployed to.  Inference deployments are containerized services that run machine learning models or related components. Standalone inferences can be updated directly using this endpoint.  Use this endpoint when you need to: - Change the container image or version of an inference - Modify resource allocations (CPU, memory, GPU) - Update scaling parameters - Change the regions where an inference is deployed - Modify environment variables or other configuration  Note: This endpoint can only update standalone inference deployments. Inferences that are part of application deployments from the apps catalog are read-only and cannot be modified directly. If you attempt to update a read-only inference, you will receive an error. Such inferences must be managed through the parent application deployment using the `/v1/{project_name}/apps/deployments` endpoints.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param v1_update_inference_request: Updated inference deployment configuration (required)
        :type v1_update_inference_request: V1UpdateInferenceRequest
        :param dry_run: Perform validation but do not apply any changes
        :type dry_run: bool
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_update_inference_serialize(
            project_name=project_name,
            inference_name=inference_name,
            v1_update_inference_request=v1_update_inference_request,
            dry_run=dry_run,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1InferenceResponse",
            '204': "str",
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        ).data


    @validate_call
    def v1_update_inference_with_http_info(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        v1_update_inference_request: Annotated[V1UpdateInferenceRequest, Field(description="Updated inference deployment configuration")],
        dry_run: Annotated[Optional[StrictBool], Field(description="Perform validation but do not apply any changes")] = None,
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> ApiResponse[V1InferenceResponse]:
        """Update inference deployment

        This endpoint allows you to modify the configuration of an existing standalone inference deployment, including its container image, resource requirements, scaling options, and networking settings. You can also update which regions the inference is deployed to.  Inference deployments are containerized services that run machine learning models or related components. Standalone inferences can be updated directly using this endpoint.  Use this endpoint when you need to: - Change the container image or version of an inference - Modify resource allocations (CPU, memory, GPU) - Update scaling parameters - Change the regions where an inference is deployed - Modify environment variables or other configuration  Note: This endpoint can only update standalone inference deployments. Inferences that are part of application deployments from the apps catalog are read-only and cannot be modified directly. If you attempt to update a read-only inference, you will receive an error. Such inferences must be managed through the parent application deployment using the `/v1/{project_name}/apps/deployments` endpoints.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param v1_update_inference_request: Updated inference deployment configuration (required)
        :type v1_update_inference_request: V1UpdateInferenceRequest
        :param dry_run: Perform validation but do not apply any changes
        :type dry_run: bool
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_update_inference_serialize(
            project_name=project_name,
            inference_name=inference_name,
            v1_update_inference_request=v1_update_inference_request,
            dry_run=dry_run,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1InferenceResponse",
            '204': "str",
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        )


    @validate_call
    def v1_update_inference_without_preload_content(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        inference_name: Annotated[StrictStr, Field(description="Inference deployment name")],
        v1_update_inference_request: Annotated[V1UpdateInferenceRequest, Field(description="Updated inference deployment configuration")],
        dry_run: Annotated[Optional[StrictBool], Field(description="Perform validation but do not apply any changes")] = None,
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> RESTResponseType:
        """Update inference deployment

        This endpoint allows you to modify the configuration of an existing standalone inference deployment, including its container image, resource requirements, scaling options, and networking settings. You can also update which regions the inference is deployed to.  Inference deployments are containerized services that run machine learning models or related components. Standalone inferences can be updated directly using this endpoint.  Use this endpoint when you need to: - Change the container image or version of an inference - Modify resource allocations (CPU, memory, GPU) - Update scaling parameters - Change the regions where an inference is deployed - Modify environment variables or other configuration  Note: This endpoint can only update standalone inference deployments. Inferences that are part of application deployments from the apps catalog are read-only and cannot be modified directly. If you attempt to update a read-only inference, you will receive an error. Such inferences must be managed through the parent application deployment using the `/v1/{project_name}/apps/deployments` endpoints.

        :param project_name: Project name (required)
        :type project_name: str
        :param inference_name: Inference deployment name (required)
        :type inference_name: str
        :param v1_update_inference_request: Updated inference deployment configuration (required)
        :type v1_update_inference_request: V1UpdateInferenceRequest
        :param dry_run: Perform validation but do not apply any changes
        :type dry_run: bool
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_update_inference_serialize(
            project_name=project_name,
            inference_name=inference_name,
            v1_update_inference_request=v1_update_inference_request,
            dry_run=dry_run,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1InferenceResponse",
            '204': "str",
            '404': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        return response_data.response


    def _v1_update_inference_serialize(
        self,
        project_name,
        inference_name,
        v1_update_inference_request,
        dry_run,
        _request_auth,
        _content_type,
        _headers,
        _host_index,
    ) -> RequestSerialized:

        _host = None

        _collection_formats: Dict[str, str] = {
        }

        _path_params: Dict[str, str] = {}
        _query_params: List[Tuple[str, str]] = []
        _header_params: Dict[str, Optional[str]] = _headers or {}
        _form_params: List[Tuple[str, str]] = []
        _files: Dict[
            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
        ] = {}
        _body_params: Optional[bytes] = None

        # process the path parameters
        if project_name is not None:
            _path_params['project_name'] = project_name
        if inference_name is not None:
            _path_params['inference_name'] = inference_name
        # process the query parameters
        if dry_run is not None:
            
            _query_params.append(('dry_run', dry_run))
            
        # process the header parameters
        # process the form parameters
        # process the body parameter
        if v1_update_inference_request is not None:
            _body_params = v1_update_inference_request


        # set the HTTP header `Accept`
        if 'Accept' not in _header_params:
            _header_params['Accept'] = self.api_client.select_header_accept(
                [
                    'application/json'
                ]
            )

        # set the HTTP header `Content-Type`
        if _content_type:
            _header_params['Content-Type'] = _content_type
        else:
            _default_content_type = (
                self.api_client.select_header_content_type(
                    [
                        'application/json'
                    ]
                )
            )
            if _default_content_type is not None:
                _header_params['Content-Type'] = _default_content_type

        # authentication setting
        _auth_settings: List[str] = [
        ]

        return self.api_client.param_serialize(
            method='PUT',
            resource_path='/v1/{project_name}/inferences/{inference_name}',
            path_params=_path_params,
            query_params=_query_params,
            header_params=_header_params,
            body=_body_params,
            post_params=_form_params,
            files=_files,
            auth_settings=_auth_settings,
            collection_formats=_collection_formats,
            _host=_host,
            _request_auth=_request_auth
        )


