# coding: utf-8

"""
    Inference Box API

    ## Welcome to Inference Box API  A comprehensive REST API for managing cloud infrastructure resources including ML inference deployments, Kubernetes resources, and application lifecycle management.  ## Overview  Inference Box API is a cloud-native REST API service that provides unified access to manage:  - **ML Inference Deployments**: Create, manage, and scale machine learning inference services - **Slurm Clusters**: Deploy, configure, and manage HPC Slurm clusters for batch workloads - **Kubernetes Resources**: Manage namespaces (projects), pods, secrets, volumes, and other K8s resources - **Application Lifecycle**: Deploy and manage applications from catalog templates - **Resource Management**: Handle flavors, capacities, quotas, and node groups - **User & Access Control**: Manage users, groups, and API keys with RBAC integration - **Monitoring & Metrics**: Query metrics and monitor resource usage via Victoria Metrics  The service is built with Go and integrates with Kubernetes, HashiCorp Vault, and Victoria Metrics to provide a complete cloud infrastructure management solution.  ## Key API Endpoints  - **Inference**: `/v1/{project_name}/inferences` - ML inference deployment management - **Inference (Admin)**: `/v1/admin/{project_name}/inferences` - ML inference deployment management (admin only) - **Slurm Clusters**: `/v1/{project_name}/slurm/clusters` - Slurm cluster deployment and management - **Flavors**: `/v1/flavors` - Compute resource flavor definitions - **Flavors Management**: `/v1/admin/flavors` - Compute resource flavor definitions management - **Users**: `/v1/admin/users` - User management (admin only) - **Projects**: `/v1/{project_name}` - Project-scoped resource operations - **Metrics**: `/v1/{project_name}/metrics` - Resource usage metrics  ## Getting Started  ### Authentication  All API requests require authentication using the `X-API-Key` header. Most operations are scoped to specific projects/namespaces.  ### API Organization  The API is organized into logical groups: - **Public endpoints**: `/v1/*` - General access endpoints - **Project-scoped**: `/v1/{project_name}/*` - Project-specific resources - **Admin endpoints**: `/v1/admin/*` - Administrative operations  ### Documentation  Interactive documentation is available at `/docs` when the service is running. 

    The version of the OpenAPI document: 2.0
    Generated by OpenAPI Generator (https://openapi-generator.tech)

    Do not edit the class manually.
"""  # noqa: E501

import warnings
from pydantic import validate_call, Field, StrictFloat, StrictStr, StrictInt
from typing import Any, Dict, List, Optional, Tuple, Union
from typing_extensions import Annotated

from pydantic import Field, StrictInt, StrictStr
from typing import Any, Dict, Optional
from typing_extensions import Annotated
from si_tests.clients.gcore.box_api.models.v1_metrics_query_response import V1MetricsQueryResponse

from si_tests.clients.gcore.box_api.api_client import ApiClient, RequestSerialized
from si_tests.clients.gcore.box_api.api_response import ApiResponse
from si_tests.clients.gcore.box_api.rest import RESTResponseType


class MetricsApi:
    """NOTE: This class is auto generated by OpenAPI Generator
    Ref: https://openapi-generator.tech

    Do not edit the class manually.
    """

    def __init__(self, api_client=None) -> None:
        if api_client is None:
            api_client = ApiClient.get_default()
        self.api_client = api_client


    @validate_call
    def v1_get_raw_metrics(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> object:
        """Return raw metrics

        Returns raw metrics data in Prometheus format for the specified project. This endpoint is useful for monitoring tools that can consume Prometheus metrics directly.

        :param project_name: Project name (required)
        :type project_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_get_raw_metrics_serialize(
            project_name=project_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "object",
            '400': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        ).data


    @validate_call
    def v1_get_raw_metrics_with_http_info(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> ApiResponse[object]:
        """Return raw metrics

        Returns raw metrics data in Prometheus format for the specified project. This endpoint is useful for monitoring tools that can consume Prometheus metrics directly.

        :param project_name: Project name (required)
        :type project_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_get_raw_metrics_serialize(
            project_name=project_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "object",
            '400': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        )


    @validate_call
    def v1_get_raw_metrics_without_preload_content(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> RESTResponseType:
        """Return raw metrics

        Returns raw metrics data in Prometheus format for the specified project. This endpoint is useful for monitoring tools that can consume Prometheus metrics directly.

        :param project_name: Project name (required)
        :type project_name: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_get_raw_metrics_serialize(
            project_name=project_name,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "object",
            '400': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        return response_data.response


    def _v1_get_raw_metrics_serialize(
        self,
        project_name,
        _request_auth,
        _content_type,
        _headers,
        _host_index,
    ) -> RequestSerialized:

        _host = None

        _collection_formats: Dict[str, str] = {
        }

        _path_params: Dict[str, str] = {}
        _query_params: List[Tuple[str, str]] = []
        _header_params: Dict[str, Optional[str]] = _headers or {}
        _form_params: List[Tuple[str, str]] = []
        _files: Dict[
            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
        ] = {}
        _body_params: Optional[bytes] = None

        # process the path parameters
        if project_name is not None:
            _path_params['project_name'] = project_name
        # process the query parameters
        # process the header parameters
        # process the form parameters
        # process the body parameter


        # set the HTTP header `Accept`
        if 'Accept' not in _header_params:
            _header_params['Accept'] = self.api_client.select_header_accept(
                [
                    'application/json', 
                    'application/openmetrics-text; version=1.0.0; charset=utf-8', 
                    'text/plain; version=0.0.4'
                ]
            )


        # authentication setting
        _auth_settings: List[str] = [
        ]

        return self.api_client.param_serialize(
            method='GET',
            resource_path='/v1/{project_name}/metrics',
            path_params=_path_params,
            query_params=_query_params,
            header_params=_header_params,
            body=_body_params,
            post_params=_form_params,
            files=_files,
            auth_settings=_auth_settings,
            collection_formats=_collection_formats,
            _host=_host,
            _request_auth=_request_auth
        )




    @validate_call
    def v1_query_metrics(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        query: Annotated[StrictStr, Field(description="PromQL expression to evaluate. Required. Example: `sum(rate(container_cpu_usage_seconds_total[5m]))`. Supported metrics: `container_cpu_usage_seconds_total`, `container_memory_working_set_bytes`, `container_fs_io_current`, `DCGM_FI_DEV_GPU_UTIL`, `DCGM_FI_DEV_MEM_COPY_UTIL`. Also supported metrics prefixed with llamacpp:, nv_inference_, nv_trt_llm_, slurm_, sglang:, and vllm:. Currently supported only Vector and Matrix results, Scalar and String results are not supported.")],
        time: Annotated[Optional[StrictStr], Field(description="Evaluation timestamp in RFC-3339 format or unix timestamp. Optional.")] = None,
        timeout: Annotated[Optional[StrictStr], Field(description="Evaluation timeout in duration format. Optional.")] = None,
        limit: Annotated[Optional[StrictInt], Field(description="Maximum number of returned series. Optional. 0 means disabled")] = None,
        inference: Annotated[Optional[StrictStr], Field(description="Inference name filter. Optional. Defaults to all inference deployments within a namespace.")] = None,
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> V1MetricsQueryResponse:
        """Execute an instant query for metrics

        Executes a Prometheus query at a single point in time. This endpoint allows you to retrieve metrics data using PromQL (Prometheus Query Language) expressions. You can filter the results by inference name and specify evaluation time and timeout.

        :param project_name: Project name (required)
        :type project_name: str
        :param query: PromQL expression to evaluate. Required. Example: `sum(rate(container_cpu_usage_seconds_total[5m]))`. Supported metrics: `container_cpu_usage_seconds_total`, `container_memory_working_set_bytes`, `container_fs_io_current`, `DCGM_FI_DEV_GPU_UTIL`, `DCGM_FI_DEV_MEM_COPY_UTIL`. Also supported metrics prefixed with llamacpp:, nv_inference_, nv_trt_llm_, slurm_, sglang:, and vllm:. Currently supported only Vector and Matrix results, Scalar and String results are not supported. (required)
        :type query: str
        :param time: Evaluation timestamp in RFC-3339 format or unix timestamp. Optional.
        :type time: str
        :param timeout: Evaluation timeout in duration format. Optional.
        :type timeout: str
        :param limit: Maximum number of returned series. Optional. 0 means disabled
        :type limit: int
        :param inference: Inference name filter. Optional. Defaults to all inference deployments within a namespace.
        :type inference: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_query_metrics_serialize(
            project_name=project_name,
            query=query,
            time=time,
            timeout=timeout,
            limit=limit,
            inference=inference,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1MetricsQueryResponse",
            '400': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        ).data


    @validate_call
    def v1_query_metrics_with_http_info(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        query: Annotated[StrictStr, Field(description="PromQL expression to evaluate. Required. Example: `sum(rate(container_cpu_usage_seconds_total[5m]))`. Supported metrics: `container_cpu_usage_seconds_total`, `container_memory_working_set_bytes`, `container_fs_io_current`, `DCGM_FI_DEV_GPU_UTIL`, `DCGM_FI_DEV_MEM_COPY_UTIL`. Also supported metrics prefixed with llamacpp:, nv_inference_, nv_trt_llm_, slurm_, sglang:, and vllm:. Currently supported only Vector and Matrix results, Scalar and String results are not supported.")],
        time: Annotated[Optional[StrictStr], Field(description="Evaluation timestamp in RFC-3339 format or unix timestamp. Optional.")] = None,
        timeout: Annotated[Optional[StrictStr], Field(description="Evaluation timeout in duration format. Optional.")] = None,
        limit: Annotated[Optional[StrictInt], Field(description="Maximum number of returned series. Optional. 0 means disabled")] = None,
        inference: Annotated[Optional[StrictStr], Field(description="Inference name filter. Optional. Defaults to all inference deployments within a namespace.")] = None,
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> ApiResponse[V1MetricsQueryResponse]:
        """Execute an instant query for metrics

        Executes a Prometheus query at a single point in time. This endpoint allows you to retrieve metrics data using PromQL (Prometheus Query Language) expressions. You can filter the results by inference name and specify evaluation time and timeout.

        :param project_name: Project name (required)
        :type project_name: str
        :param query: PromQL expression to evaluate. Required. Example: `sum(rate(container_cpu_usage_seconds_total[5m]))`. Supported metrics: `container_cpu_usage_seconds_total`, `container_memory_working_set_bytes`, `container_fs_io_current`, `DCGM_FI_DEV_GPU_UTIL`, `DCGM_FI_DEV_MEM_COPY_UTIL`. Also supported metrics prefixed with llamacpp:, nv_inference_, nv_trt_llm_, slurm_, sglang:, and vllm:. Currently supported only Vector and Matrix results, Scalar and String results are not supported. (required)
        :type query: str
        :param time: Evaluation timestamp in RFC-3339 format or unix timestamp. Optional.
        :type time: str
        :param timeout: Evaluation timeout in duration format. Optional.
        :type timeout: str
        :param limit: Maximum number of returned series. Optional. 0 means disabled
        :type limit: int
        :param inference: Inference name filter. Optional. Defaults to all inference deployments within a namespace.
        :type inference: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_query_metrics_serialize(
            project_name=project_name,
            query=query,
            time=time,
            timeout=timeout,
            limit=limit,
            inference=inference,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1MetricsQueryResponse",
            '400': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        )


    @validate_call
    def v1_query_metrics_without_preload_content(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        query: Annotated[StrictStr, Field(description="PromQL expression to evaluate. Required. Example: `sum(rate(container_cpu_usage_seconds_total[5m]))`. Supported metrics: `container_cpu_usage_seconds_total`, `container_memory_working_set_bytes`, `container_fs_io_current`, `DCGM_FI_DEV_GPU_UTIL`, `DCGM_FI_DEV_MEM_COPY_UTIL`. Also supported metrics prefixed with llamacpp:, nv_inference_, nv_trt_llm_, slurm_, sglang:, and vllm:. Currently supported only Vector and Matrix results, Scalar and String results are not supported.")],
        time: Annotated[Optional[StrictStr], Field(description="Evaluation timestamp in RFC-3339 format or unix timestamp. Optional.")] = None,
        timeout: Annotated[Optional[StrictStr], Field(description="Evaluation timeout in duration format. Optional.")] = None,
        limit: Annotated[Optional[StrictInt], Field(description="Maximum number of returned series. Optional. 0 means disabled")] = None,
        inference: Annotated[Optional[StrictStr], Field(description="Inference name filter. Optional. Defaults to all inference deployments within a namespace.")] = None,
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> RESTResponseType:
        """Execute an instant query for metrics

        Executes a Prometheus query at a single point in time. This endpoint allows you to retrieve metrics data using PromQL (Prometheus Query Language) expressions. You can filter the results by inference name and specify evaluation time and timeout.

        :param project_name: Project name (required)
        :type project_name: str
        :param query: PromQL expression to evaluate. Required. Example: `sum(rate(container_cpu_usage_seconds_total[5m]))`. Supported metrics: `container_cpu_usage_seconds_total`, `container_memory_working_set_bytes`, `container_fs_io_current`, `DCGM_FI_DEV_GPU_UTIL`, `DCGM_FI_DEV_MEM_COPY_UTIL`. Also supported metrics prefixed with llamacpp:, nv_inference_, nv_trt_llm_, slurm_, sglang:, and vllm:. Currently supported only Vector and Matrix results, Scalar and String results are not supported. (required)
        :type query: str
        :param time: Evaluation timestamp in RFC-3339 format or unix timestamp. Optional.
        :type time: str
        :param timeout: Evaluation timeout in duration format. Optional.
        :type timeout: str
        :param limit: Maximum number of returned series. Optional. 0 means disabled
        :type limit: int
        :param inference: Inference name filter. Optional. Defaults to all inference deployments within a namespace.
        :type inference: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_query_metrics_serialize(
            project_name=project_name,
            query=query,
            time=time,
            timeout=timeout,
            limit=limit,
            inference=inference,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1MetricsQueryResponse",
            '400': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        return response_data.response


    def _v1_query_metrics_serialize(
        self,
        project_name,
        query,
        time,
        timeout,
        limit,
        inference,
        _request_auth,
        _content_type,
        _headers,
        _host_index,
    ) -> RequestSerialized:

        _host = None

        _collection_formats: Dict[str, str] = {
        }

        _path_params: Dict[str, str] = {}
        _query_params: List[Tuple[str, str]] = []
        _header_params: Dict[str, Optional[str]] = _headers or {}
        _form_params: List[Tuple[str, str]] = []
        _files: Dict[
            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
        ] = {}
        _body_params: Optional[bytes] = None

        # process the path parameters
        if project_name is not None:
            _path_params['project_name'] = project_name
        # process the query parameters
        if time is not None:
            
            _query_params.append(('time', time))
            
        if timeout is not None:
            
            _query_params.append(('timeout', timeout))
            
        if limit is not None:
            
            _query_params.append(('limit', limit))
            
        if inference is not None:
            
            _query_params.append(('inference', inference))
            
        if query is not None:
            
            _query_params.append(('query', query))
            
        # process the header parameters
        # process the form parameters
        # process the body parameter


        # set the HTTP header `Accept`
        if 'Accept' not in _header_params:
            _header_params['Accept'] = self.api_client.select_header_accept(
                [
                    'application/json'
                ]
            )


        # authentication setting
        _auth_settings: List[str] = [
        ]

        return self.api_client.param_serialize(
            method='GET',
            resource_path='/v1/{project_name}/metrics/query',
            path_params=_path_params,
            query_params=_query_params,
            header_params=_header_params,
            body=_body_params,
            post_params=_form_params,
            files=_files,
            auth_settings=_auth_settings,
            collection_formats=_collection_formats,
            _host=_host,
            _request_auth=_request_auth
        )




    @validate_call
    def v1_query_range_metrics(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        start: Annotated[StrictStr, Field(description="Start timestamp in RFC-3339 format or unix timestamp, inclusive.")],
        end: Annotated[StrictStr, Field(description="End timestamp in RFC-3339 format or unix timestamp, inclusive.")],
        step: Annotated[StrictStr, Field(description="Query resolution step width in duration format or float number of seconds.")],
        query: Annotated[StrictStr, Field(description="PromQL expression to evaluate. Required. Example: `sum(rate(container_cpu_usage_seconds_total[5m]))`. Supported metrics: `container_cpu_usage_seconds_total`, `container_memory_working_set_bytes`, `container_fs_io_current`, `DCGM_FI_DEV_GPU_UTIL`, `DCGM_FI_DEV_MEM_COPY_UTIL`. Also supported metrics prefixed with llamacpp:, nv_inference_, nv_trt_llm_, slurm_, sglang:, and vllm:. Currently supported only Vector and Matrix results, Scalar and String results are not supported.")],
        timeout: Annotated[Optional[StrictStr], Field(description="Evaluation timeout in duration format. Optional.")] = None,
        limit: Annotated[Optional[StrictInt], Field(description="Maximum number of returned series. Optional. 0 means disabled")] = None,
        inference: Annotated[Optional[StrictStr], Field(description="Inference name filter. Optional. Defaults to all inference deployments within a namespace.")] = None,
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> V1MetricsQueryResponse:
        """Execute a range query for metrics

        Executes a Prometheus query over a range of time. This endpoint allows you to retrieve time series data using PromQL (Prometheus Query Language) expressions. You can specify the start and end times, step interval, and filter the results by inference name.

        :param project_name: Project name (required)
        :type project_name: str
        :param start: Start timestamp in RFC-3339 format or unix timestamp, inclusive. (required)
        :type start: str
        :param end: End timestamp in RFC-3339 format or unix timestamp, inclusive. (required)
        :type end: str
        :param step: Query resolution step width in duration format or float number of seconds. (required)
        :type step: str
        :param query: PromQL expression to evaluate. Required. Example: `sum(rate(container_cpu_usage_seconds_total[5m]))`. Supported metrics: `container_cpu_usage_seconds_total`, `container_memory_working_set_bytes`, `container_fs_io_current`, `DCGM_FI_DEV_GPU_UTIL`, `DCGM_FI_DEV_MEM_COPY_UTIL`. Also supported metrics prefixed with llamacpp:, nv_inference_, nv_trt_llm_, slurm_, sglang:, and vllm:. Currently supported only Vector and Matrix results, Scalar and String results are not supported. (required)
        :type query: str
        :param timeout: Evaluation timeout in duration format. Optional.
        :type timeout: str
        :param limit: Maximum number of returned series. Optional. 0 means disabled
        :type limit: int
        :param inference: Inference name filter. Optional. Defaults to all inference deployments within a namespace.
        :type inference: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_query_range_metrics_serialize(
            project_name=project_name,
            start=start,
            end=end,
            step=step,
            query=query,
            timeout=timeout,
            limit=limit,
            inference=inference,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1MetricsQueryResponse",
            '400': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        ).data


    @validate_call
    def v1_query_range_metrics_with_http_info(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        start: Annotated[StrictStr, Field(description="Start timestamp in RFC-3339 format or unix timestamp, inclusive.")],
        end: Annotated[StrictStr, Field(description="End timestamp in RFC-3339 format or unix timestamp, inclusive.")],
        step: Annotated[StrictStr, Field(description="Query resolution step width in duration format or float number of seconds.")],
        query: Annotated[StrictStr, Field(description="PromQL expression to evaluate. Required. Example: `sum(rate(container_cpu_usage_seconds_total[5m]))`. Supported metrics: `container_cpu_usage_seconds_total`, `container_memory_working_set_bytes`, `container_fs_io_current`, `DCGM_FI_DEV_GPU_UTIL`, `DCGM_FI_DEV_MEM_COPY_UTIL`. Also supported metrics prefixed with llamacpp:, nv_inference_, nv_trt_llm_, slurm_, sglang:, and vllm:. Currently supported only Vector and Matrix results, Scalar and String results are not supported.")],
        timeout: Annotated[Optional[StrictStr], Field(description="Evaluation timeout in duration format. Optional.")] = None,
        limit: Annotated[Optional[StrictInt], Field(description="Maximum number of returned series. Optional. 0 means disabled")] = None,
        inference: Annotated[Optional[StrictStr], Field(description="Inference name filter. Optional. Defaults to all inference deployments within a namespace.")] = None,
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> ApiResponse[V1MetricsQueryResponse]:
        """Execute a range query for metrics

        Executes a Prometheus query over a range of time. This endpoint allows you to retrieve time series data using PromQL (Prometheus Query Language) expressions. You can specify the start and end times, step interval, and filter the results by inference name.

        :param project_name: Project name (required)
        :type project_name: str
        :param start: Start timestamp in RFC-3339 format or unix timestamp, inclusive. (required)
        :type start: str
        :param end: End timestamp in RFC-3339 format or unix timestamp, inclusive. (required)
        :type end: str
        :param step: Query resolution step width in duration format or float number of seconds. (required)
        :type step: str
        :param query: PromQL expression to evaluate. Required. Example: `sum(rate(container_cpu_usage_seconds_total[5m]))`. Supported metrics: `container_cpu_usage_seconds_total`, `container_memory_working_set_bytes`, `container_fs_io_current`, `DCGM_FI_DEV_GPU_UTIL`, `DCGM_FI_DEV_MEM_COPY_UTIL`. Also supported metrics prefixed with llamacpp:, nv_inference_, nv_trt_llm_, slurm_, sglang:, and vllm:. Currently supported only Vector and Matrix results, Scalar and String results are not supported. (required)
        :type query: str
        :param timeout: Evaluation timeout in duration format. Optional.
        :type timeout: str
        :param limit: Maximum number of returned series. Optional. 0 means disabled
        :type limit: int
        :param inference: Inference name filter. Optional. Defaults to all inference deployments within a namespace.
        :type inference: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_query_range_metrics_serialize(
            project_name=project_name,
            start=start,
            end=end,
            step=step,
            query=query,
            timeout=timeout,
            limit=limit,
            inference=inference,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1MetricsQueryResponse",
            '400': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        response_data.read()
        return self.api_client.response_deserialize(
            response_data=response_data,
            response_types_map=_response_types_map,
        )


    @validate_call
    def v1_query_range_metrics_without_preload_content(
        self,
        project_name: Annotated[StrictStr, Field(description="Project name")],
        start: Annotated[StrictStr, Field(description="Start timestamp in RFC-3339 format or unix timestamp, inclusive.")],
        end: Annotated[StrictStr, Field(description="End timestamp in RFC-3339 format or unix timestamp, inclusive.")],
        step: Annotated[StrictStr, Field(description="Query resolution step width in duration format or float number of seconds.")],
        query: Annotated[StrictStr, Field(description="PromQL expression to evaluate. Required. Example: `sum(rate(container_cpu_usage_seconds_total[5m]))`. Supported metrics: `container_cpu_usage_seconds_total`, `container_memory_working_set_bytes`, `container_fs_io_current`, `DCGM_FI_DEV_GPU_UTIL`, `DCGM_FI_DEV_MEM_COPY_UTIL`. Also supported metrics prefixed with llamacpp:, nv_inference_, nv_trt_llm_, slurm_, sglang:, and vllm:. Currently supported only Vector and Matrix results, Scalar and String results are not supported.")],
        timeout: Annotated[Optional[StrictStr], Field(description="Evaluation timeout in duration format. Optional.")] = None,
        limit: Annotated[Optional[StrictInt], Field(description="Maximum number of returned series. Optional. 0 means disabled")] = None,
        inference: Annotated[Optional[StrictStr], Field(description="Inference name filter. Optional. Defaults to all inference deployments within a namespace.")] = None,
        _request_timeout: Union[
            None,
            Annotated[StrictFloat, Field(gt=0)],
            Tuple[
                Annotated[StrictFloat, Field(gt=0)],
                Annotated[StrictFloat, Field(gt=0)]
            ]
        ] = None,
        _request_auth: Optional[Dict[StrictStr, Any]] = None,
        _content_type: Optional[StrictStr] = None,
        _headers: Optional[Dict[StrictStr, Any]] = None,
        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
    ) -> RESTResponseType:
        """Execute a range query for metrics

        Executes a Prometheus query over a range of time. This endpoint allows you to retrieve time series data using PromQL (Prometheus Query Language) expressions. You can specify the start and end times, step interval, and filter the results by inference name.

        :param project_name: Project name (required)
        :type project_name: str
        :param start: Start timestamp in RFC-3339 format or unix timestamp, inclusive. (required)
        :type start: str
        :param end: End timestamp in RFC-3339 format or unix timestamp, inclusive. (required)
        :type end: str
        :param step: Query resolution step width in duration format or float number of seconds. (required)
        :type step: str
        :param query: PromQL expression to evaluate. Required. Example: `sum(rate(container_cpu_usage_seconds_total[5m]))`. Supported metrics: `container_cpu_usage_seconds_total`, `container_memory_working_set_bytes`, `container_fs_io_current`, `DCGM_FI_DEV_GPU_UTIL`, `DCGM_FI_DEV_MEM_COPY_UTIL`. Also supported metrics prefixed with llamacpp:, nv_inference_, nv_trt_llm_, slurm_, sglang:, and vllm:. Currently supported only Vector and Matrix results, Scalar and String results are not supported. (required)
        :type query: str
        :param timeout: Evaluation timeout in duration format. Optional.
        :type timeout: str
        :param limit: Maximum number of returned series. Optional. 0 means disabled
        :type limit: int
        :param inference: Inference name filter. Optional. Defaults to all inference deployments within a namespace.
        :type inference: str
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        :type _request_timeout: int, tuple(int, int), optional
        :param _request_auth: set to override the auth_settings for an a single
                              request; this effectively ignores the
                              authentication in the spec for a single request.
        :type _request_auth: dict, optional
        :param _content_type: force content-type for the request.
        :type _content_type: str, Optional
        :param _headers: set to override the headers for a single
                         request; this effectively ignores the headers
                         in the spec for a single request.
        :type _headers: dict, optional
        :param _host_index: set to override the host_index for a single
                            request; this effectively ignores the host_index
                            in the spec for a single request.
        :type _host_index: int, optional
        :return: Returns the result object.
        """ # noqa: E501

        _param = self._v1_query_range_metrics_serialize(
            project_name=project_name,
            start=start,
            end=end,
            step=step,
            query=query,
            timeout=timeout,
            limit=limit,
            inference=inference,
            _request_auth=_request_auth,
            _content_type=_content_type,
            _headers=_headers,
            _host_index=_host_index
        )

        _response_types_map: Dict[str, Optional[str]] = {
            '200': "V1MetricsQueryResponse",
            '400': "ApiErrorResponse",
        }
        response_data = self.api_client.call_api(
            *_param,
            _request_timeout=_request_timeout
        )
        return response_data.response


    def _v1_query_range_metrics_serialize(
        self,
        project_name,
        start,
        end,
        step,
        query,
        timeout,
        limit,
        inference,
        _request_auth,
        _content_type,
        _headers,
        _host_index,
    ) -> RequestSerialized:

        _host = None

        _collection_formats: Dict[str, str] = {
        }

        _path_params: Dict[str, str] = {}
        _query_params: List[Tuple[str, str]] = []
        _header_params: Dict[str, Optional[str]] = _headers or {}
        _form_params: List[Tuple[str, str]] = []
        _files: Dict[
            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
        ] = {}
        _body_params: Optional[bytes] = None

        # process the path parameters
        if project_name is not None:
            _path_params['project_name'] = project_name
        # process the query parameters
        if start is not None:
            
            _query_params.append(('start', start))
            
        if end is not None:
            
            _query_params.append(('end', end))
            
        if step is not None:
            
            _query_params.append(('step', step))
            
        if timeout is not None:
            
            _query_params.append(('timeout', timeout))
            
        if limit is not None:
            
            _query_params.append(('limit', limit))
            
        if inference is not None:
            
            _query_params.append(('inference', inference))
            
        if query is not None:
            
            _query_params.append(('query', query))
            
        # process the header parameters
        # process the form parameters
        # process the body parameter


        # set the HTTP header `Accept`
        if 'Accept' not in _header_params:
            _header_params['Accept'] = self.api_client.select_header_accept(
                [
                    'application/json'
                ]
            )


        # authentication setting
        _auth_settings: List[str] = [
        ]

        return self.api_client.param_serialize(
            method='GET',
            resource_path='/v1/{project_name}/metrics/query_range',
            path_params=_path_params,
            query_params=_query_params,
            header_params=_header_params,
            body=_body_params,
            post_params=_form_params,
            files=_files,
            auth_settings=_auth_settings,
            collection_formats=_collection_formats,
            _host=_host,
            _request_auth=_request_auth
        )


