wallaroo.engine_config


class Architecture(builtins.str, enum.Enum):

An Enum to represent the supported processor architecture.

X86 = <Architecture.X86: 'x86'>
ARM = <Architecture.ARM: 'arm'>
Power10 = <Architecture.Power10: 'power10'>
@classmethod
def default(cls) -> Architecture:
class Acceleration(builtins.str, enum.Enum):

An Enum to represent the supported acceleration options.

CUDA = <Acceleration.CUDA: 'cuda'>
Jetson = <Acceleration.Jetson: 'jetson'>
OpenVINO = <Acceleration.OpenVINO: 'openvino'>
QAIC = <Acceleration.QAIC: 'qaic'>
@classmethod
def default(cls) -> Acceleration:
def default_acceleration_with_config(self) -> AccelerationWithConfig:
def is_applicable(self, arch: Architecture) -> bool:
def requires_config(self) -> bool:
def with_config( self, config: QaicConfig) -> AccelerationWithConfig:

Create an acceleration with a config. Required only for the Qaic acceleration for now.

Parameters
  • config: QaicConfig The config to use for the acceleration.
Returns

AccelerationWithConfig The acceleration with the given config.

:raise ModelOptimizationConfigError: If the acceleration is not supported.

@runtime_checkable
class OpenapiAccelerationWithConfig(typing_extensions.Protocol):

Base class for protocol classes.

Protocol classes are defined as::

class Proto(Protocol):
    def meth(self) -> int:
        ...

Such classes are primarily used with static type checkers that recognize structural subtyping (static duck-typing), for example::

class C:
    def meth(self) -> int:
        return 0

def func(x: Proto) -> int:
    return x.meth()

func(C())  # Passes static type check

See PEP 544 for details. Protocol classes decorated with @typing.runtime_checkable act as simple-minded runtime protocols that check only the presence of given attributes, ignoring their type signatures. Protocol classes can be generic, they are defined as::

class GenProto(Protocol[T]):
    def meth(self) -> T:
        ...
OpenapiAccelerationWithConfig(*args, **kwargs)
def to_dict(self) -> Dict[str, Any]:
@classmethod
def from_dict( cls, data: Dict[str, Any]) -> OpenapiAccelerationWithConfig:
class AccelerationWithConfig(pydantic.main.BaseModel, abc.ABC):

A base class for all acceleration that require a config.

model_config = {'arbitrary_types_allowed': True, 'extra': 'forbid', 'protected_namespaces': ()}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

config: pydantic.main.BaseModel
def is_applicable(self, arch: Architecture) -> bool:
def to_dict(self) -> Dict[str, Any]:

Convert the config to a dictionary.

class QaicConfig(pydantic.main.BaseModel):

A config for the Qaic acceleration.

model_config = {'arbitrary_types_allowed': True, 'extra': 'forbid', 'use_enum_values': True, 'protected_namespaces': ()}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

num_cores: Union[wallaroo.wallaroo_ml_ops_api_client.types.Unset, int]
num_devices: Union[wallaroo.wallaroo_ml_ops_api_client.types.Unset, int]
ctx_len: Union[wallaroo.wallaroo_ml_ops_api_client.types.Unset, int]
prefill_seq_len: Union[wallaroo.wallaroo_ml_ops_api_client.types.Unset, int]
full_batch_size: Union[wallaroo.wallaroo_ml_ops_api_client.types.Unset, int]
mxfp6_matmul: Union[wallaroo.wallaroo_ml_ops_api_client.types.Unset, bool]
mxint8_kv_cache: Union[wallaroo.wallaroo_ml_ops_api_client.types.Unset, bool]
aic_enable_depth_first: Union[wallaroo.wallaroo_ml_ops_api_client.types.Unset, bool]
class QaicWithConfig(AccelerationWithConfig):

A base class for all acceleration that require a config.

accel: Literal[<Acceleration.QAIC: 'qaic'>]
config: QaicConfig
model_config = {'arbitrary_types_allowed': True, 'extra': 'forbid', 'protected_namespaces': ()}

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class EngineConfig:

Wraps an engine config.

EngineConfig( cpus: int, gpus: Optional[int] = 0, inference_channel_size: Optional[int] = None, model_concurrency: Optional[int] = None, pipeline_config_directory: Optional[str] = None, model_config_directory: Optional[str] = None, model_directory: Optional[str] = None, audit_logging: bool = False, arch: Architecture = <Architecture.X86: 'x86'>, accel: Acceleration = <Acceleration._None: 'none'>)
def to_json(self) -> str:

Returns a json representation of this object

class InvalidAccelerationError(builtins.Exception):

Raised when the specified acceleration is incompatible with the given platform architecture.

class ModelOptimizationConfigError(builtins.Exception):

Raised when the specified model optimization configuration is not available.