Skip to content

Latest commit

 

History

History
241 lines (239 loc) · 28.3 KB

File metadata and controls

241 lines (239 loc) · 28.3 KB

Inference-Perf CLI Flags

These command line flags are automatically generated from the internal Config schema. You can override any configuration directly from the CLI without using a yaml configuration file.

Flag Type Description
--api.type Enum (completion, chat) Matches api.type in config
--api.streaming boolean Matches api.streaming in config
--api.headers JSON Matches api.headers in config
--api.slo_unit str Matches api.slo_unit in config
--api.slo_tpot_header str Matches api.slo_tpot_header in config
--api.slo_ttft_header str Matches api.slo_ttft_header in config
--api.response_format.type Enum (json_schema, json_object) Matches api.response_format.type in config
--api.response_format.name str Matches api.response_format.name in config
--api.response_format.json_schema JSON Matches api.response_format.json_schema in config
--data.type Enum (mock, shareGPT, synthetic, random, shared_prefix, cnn_dailymail, infinity_instruct, billsum_conversations, otel_trace_replay, conversation_replay) Matches data.type in config
--data.path str Matches data.path in config
--data.input_distribution.min int Matches data.input_distribution.min in config
--data.input_distribution.max int Matches data.input_distribution.max in config
--data.input_distribution.mean float Matches data.input_distribution.mean in config
--data.input_distribution.std_dev float Matches data.input_distribution.std_dev in config
--data.input_distribution.total_count int Matches data.input_distribution.total_count in config
--data.input_distribution.type Enum (normal, skew_normal, lognormal, uniform, poisson, fixed) Matches data.input_distribution.type in config
--data.input_distribution.variance float Matches data.input_distribution.variance in config
--data.input_distribution.skew float Matches data.input_distribution.skew in config
--data.output_distribution.min int Matches data.output_distribution.min in config
--data.output_distribution.max int Matches data.output_distribution.max in config
--data.output_distribution.mean float Matches data.output_distribution.mean in config
--data.output_distribution.std_dev float Matches data.output_distribution.std_dev in config
--data.output_distribution.total_count int Matches data.output_distribution.total_count in config
--data.output_distribution.type Enum (normal, skew_normal, lognormal, uniform, poisson, fixed) Matches data.output_distribution.type in config
--data.output_distribution.variance float Matches data.output_distribution.variance in config
--data.output_distribution.skew float Matches data.output_distribution.skew in config
--data.shared_prefix.num_groups int Matches data.shared_prefix.num_groups in config
--data.shared_prefix.num_prompts_per_group int Matches data.shared_prefix.num_prompts_per_group in config
--data.shared_prefix.system_prompt_len string Matches data.shared_prefix.system_prompt_len in config
--data.shared_prefix.question_len string Matches data.shared_prefix.question_len in config
--data.shared_prefix.output_len string Matches data.shared_prefix.output_len in config
--data.shared_prefix.seed int Matches data.shared_prefix.seed in config
--data.shared_prefix.question_distribution.min int Matches data.shared_prefix.question_distribution.min in config
--data.shared_prefix.question_distribution.max int Matches data.shared_prefix.question_distribution.max in config
--data.shared_prefix.question_distribution.mean float Matches data.shared_prefix.question_distribution.mean in config
--data.shared_prefix.question_distribution.std_dev float Matches data.shared_prefix.question_distribution.std_dev in config
--data.shared_prefix.question_distribution.total_count int Matches data.shared_prefix.question_distribution.total_count in config
--data.shared_prefix.question_distribution.type Enum (normal, skew_normal, lognormal, uniform, poisson, fixed) Matches data.shared_prefix.question_distribution.type in config
--data.shared_prefix.question_distribution.variance float Matches data.shared_prefix.question_distribution.variance in config
--data.shared_prefix.question_distribution.skew float Matches data.shared_prefix.question_distribution.skew in config
--data.shared_prefix.output_distribution.min int Matches data.shared_prefix.output_distribution.min in config
--data.shared_prefix.output_distribution.max int Matches data.shared_prefix.output_distribution.max in config
--data.shared_prefix.output_distribution.mean float Matches data.shared_prefix.output_distribution.mean in config
--data.shared_prefix.output_distribution.std_dev float Matches data.shared_prefix.output_distribution.std_dev in config
--data.shared_prefix.output_distribution.total_count int Matches data.shared_prefix.output_distribution.total_count in config
--data.shared_prefix.output_distribution.type Enum (normal, skew_normal, lognormal, uniform, poisson, fixed) Matches data.shared_prefix.output_distribution.type in config
--data.shared_prefix.output_distribution.variance float Matches data.shared_prefix.output_distribution.variance in config
--data.shared_prefix.output_distribution.skew float Matches data.shared_prefix.output_distribution.skew in config
--data.shared_prefix.enable_multi_turn_chat boolean Matches data.shared_prefix.enable_multi_turn_chat in config
--data.shared_prefix.multimodal.image.count.min int Matches data.shared_prefix.multimodal.image.count.min in config
--data.shared_prefix.multimodal.image.count.max int Matches data.shared_prefix.multimodal.image.count.max in config
--data.shared_prefix.multimodal.image.count.mean float Matches data.shared_prefix.multimodal.image.count.mean in config
--data.shared_prefix.multimodal.image.count.std_dev float Matches data.shared_prefix.multimodal.image.count.std_dev in config
--data.shared_prefix.multimodal.image.count.total_count int Matches data.shared_prefix.multimodal.image.count.total_count in config
--data.shared_prefix.multimodal.image.count.type Enum (normal, skew_normal, lognormal, uniform, poisson, fixed) Matches data.shared_prefix.multimodal.image.count.type in config
--data.shared_prefix.multimodal.image.count.variance float Matches data.shared_prefix.multimodal.image.count.variance in config
--data.shared_prefix.multimodal.image.count.skew float Matches data.shared_prefix.multimodal.image.count.skew in config
--data.shared_prefix.multimodal.image.insertion_point string Placement of media within the text prompt. Float in range [0.0, 1.0] (0=start, 1=end), or a Distribution to sample from.
--data.shared_prefix.multimodal.image.resolutions JSON Resolution or list of weighted resolutions for generated images.
--data.shared_prefix.multimodal.image.representation Enum (png, jpeg) Wire encoding for emitted image bytes: png (default, lossless) or jpeg (lossy, smaller payload). Some VLMs prefer one or the other; consult the model's spec sheet.
--data.shared_prefix.multimodal.video.count.min int Matches data.shared_prefix.multimodal.video.count.min in config
--data.shared_prefix.multimodal.video.count.max int Matches data.shared_prefix.multimodal.video.count.max in config
--data.shared_prefix.multimodal.video.count.mean float Matches data.shared_prefix.multimodal.video.count.mean in config
--data.shared_prefix.multimodal.video.count.std_dev float Matches data.shared_prefix.multimodal.video.count.std_dev in config
--data.shared_prefix.multimodal.video.count.total_count int Matches data.shared_prefix.multimodal.video.count.total_count in config
--data.shared_prefix.multimodal.video.count.type Enum (normal, skew_normal, lognormal, uniform, poisson, fixed) Matches data.shared_prefix.multimodal.video.count.type in config
--data.shared_prefix.multimodal.video.count.variance float Matches data.shared_prefix.multimodal.video.count.variance in config
--data.shared_prefix.multimodal.video.count.skew float Matches data.shared_prefix.multimodal.video.count.skew in config
--data.shared_prefix.multimodal.video.insertion_point string Placement of media within the text prompt. Float in range [0.0, 1.0] (0=start, 1=end), or a Distribution to sample from.
--data.shared_prefix.multimodal.video.profiles JSON Video profile or list of weighted video profiles for generated videos.
--data.shared_prefix.multimodal.video.representation Enum (mp4, png_frames, jpeg_frames) Wire-format strategy. mp4 sends one video_url block carrying an MP4 blob (measures full pipeline including server-side decode). png_frames and jpeg_frames send frames × image_url blocks at one insertion point in the named encoding (no decode dependency, useful for prefix-cache benchmarks and servers that don't accept video_url).
--data.shared_prefix.multimodal.audio.count.min int Matches data.shared_prefix.multimodal.audio.count.min in config
--data.shared_prefix.multimodal.audio.count.max int Matches data.shared_prefix.multimodal.audio.count.max in config
--data.shared_prefix.multimodal.audio.count.mean float Matches data.shared_prefix.multimodal.audio.count.mean in config
--data.shared_prefix.multimodal.audio.count.std_dev float Matches data.shared_prefix.multimodal.audio.count.std_dev in config
--data.shared_prefix.multimodal.audio.count.total_count int Matches data.shared_prefix.multimodal.audio.count.total_count in config
--data.shared_prefix.multimodal.audio.count.type Enum (normal, skew_normal, lognormal, uniform, poisson, fixed) Matches data.shared_prefix.multimodal.audio.count.type in config
--data.shared_prefix.multimodal.audio.count.variance float Matches data.shared_prefix.multimodal.audio.count.variance in config
--data.shared_prefix.multimodal.audio.count.skew float Matches data.shared_prefix.multimodal.audio.count.skew in config
--data.shared_prefix.multimodal.audio.insertion_point string Placement of media within the text prompt. Float in range [0.0, 1.0] (0=start, 1=end), or a Distribution to sample from.
--data.shared_prefix.multimodal.audio.durations JSON Duration or list of weighted durations for generated audio clips.
--data.multimodal.image.count.min int Matches data.multimodal.image.count.min in config
--data.multimodal.image.count.max int Matches data.multimodal.image.count.max in config
--data.multimodal.image.count.mean float Matches data.multimodal.image.count.mean in config
--data.multimodal.image.count.std_dev float Matches data.multimodal.image.count.std_dev in config
--data.multimodal.image.count.total_count int Matches data.multimodal.image.count.total_count in config
--data.multimodal.image.count.type Enum (normal, skew_normal, lognormal, uniform, poisson, fixed) Matches data.multimodal.image.count.type in config
--data.multimodal.image.count.variance float Matches data.multimodal.image.count.variance in config
--data.multimodal.image.count.skew float Matches data.multimodal.image.count.skew in config
--data.multimodal.image.insertion_point string Placement of media within the text prompt. Float in range [0.0, 1.0] (0=start, 1=end), or a Distribution to sample from.
--data.multimodal.image.resolutions JSON Resolution or list of weighted resolutions for generated images.
--data.multimodal.image.representation Enum (png, jpeg) Wire encoding for emitted image bytes: png (default, lossless) or jpeg (lossy, smaller payload). Some VLMs prefer one or the other; consult the model's spec sheet.
--data.multimodal.video.count.min int Matches data.multimodal.video.count.min in config
--data.multimodal.video.count.max int Matches data.multimodal.video.count.max in config
--data.multimodal.video.count.mean float Matches data.multimodal.video.count.mean in config
--data.multimodal.video.count.std_dev float Matches data.multimodal.video.count.std_dev in config
--data.multimodal.video.count.total_count int Matches data.multimodal.video.count.total_count in config
--data.multimodal.video.count.type Enum (normal, skew_normal, lognormal, uniform, poisson, fixed) Matches data.multimodal.video.count.type in config
--data.multimodal.video.count.variance float Matches data.multimodal.video.count.variance in config
--data.multimodal.video.count.skew float Matches data.multimodal.video.count.skew in config
--data.multimodal.video.insertion_point string Placement of media within the text prompt. Float in range [0.0, 1.0] (0=start, 1=end), or a Distribution to sample from.
--data.multimodal.video.profiles JSON Video profile or list of weighted video profiles for generated videos.
--data.multimodal.video.representation Enum (mp4, png_frames, jpeg_frames) Wire-format strategy. mp4 sends one video_url block carrying an MP4 blob (measures full pipeline including server-side decode). png_frames and jpeg_frames send frames × image_url blocks at one insertion point in the named encoding (no decode dependency, useful for prefix-cache benchmarks and servers that don't accept video_url).
--data.multimodal.audio.count.min int Matches data.multimodal.audio.count.min in config
--data.multimodal.audio.count.max int Matches data.multimodal.audio.count.max in config
--data.multimodal.audio.count.mean float Matches data.multimodal.audio.count.mean in config
--data.multimodal.audio.count.std_dev float Matches data.multimodal.audio.count.std_dev in config
--data.multimodal.audio.count.total_count int Matches data.multimodal.audio.count.total_count in config
--data.multimodal.audio.count.type Enum (normal, skew_normal, lognormal, uniform, poisson, fixed) Matches data.multimodal.audio.count.type in config
--data.multimodal.audio.count.variance float Matches data.multimodal.audio.count.variance in config
--data.multimodal.audio.count.skew float Matches data.multimodal.audio.count.skew in config
--data.multimodal.audio.insertion_point string Placement of media within the text prompt. Float in range [0.0, 1.0] (0=start, 1=end), or a Distribution to sample from.
--data.multimodal.audio.durations JSON Duration or list of weighted durations for generated audio clips.
--data.trace.file str Matches data.trace.file in config
--data.trace.format Enum (AzurePublicDataset) Matches data.trace.format in config
--data.otel_trace_replay.use_static_model boolean Use a single static model for all requests
--data.otel_trace_replay.static_model_name str Static model name (required if use_static_model=True)
--data.otel_trace_replay.model_mapping JSON Map recorded model names to target models
--data.otel_trace_replay.default_max_tokens int Default max_tokens if not specified in trace
--data.otel_trace_replay.inject_random_session_id boolean Inject random string into unique segments to invalidate KV-cache between sessions
--data.otel_trace_replay.duplicate_sessions_target int Target number of sessions to reach by duplicating existing sessions. If None, no duplication occurs.
--data.otel_trace_replay.max_wait_ms int Maximum inter-event wait time in milliseconds. Caps the delay between predecessor completion and event dispatch to avoid reproducing unusually long tool/agent execution times from the original trace.
--data.otel_trace_replay.include_errors boolean Include spans with error status
--data.otel_trace_replay.skip_invalid_files boolean Skip invalid trace files instead of failing
--data.otel_trace_replay.trace_directory str Directory containing OTel JSON trace files
--data.otel_trace_replay.trace_files JSON List of paths to specific OTel JSON trace files
--data.otel_trace_replay.hf_dataset_path JSON HuggingFace dataset path. Can be:
  • String: 'username/dataset-name'
  • Dict: {'path': 'username/dataset-name', 'revision': 'main', 'split': 'train'} Any extra keys in the dict are passed as kwargs to datasets.load_dataset(). | | --data.otel_trace_replay.filter | str | Lambda expression to filter trace records. Applied uniformly to all data sources. Example: "lambda x: x['benchmark'] == 'gsm8k'" or "lambda x: 'spans' in x and len(x['spans']) > 5" Security: Filter expressions use eval() and should only contain trusted input. | | --data.conversation_replay.seed | int | Random seed for deterministic generation | | --data.conversation_replay.num_conversations | int | Number of conversation blueprints to generate | | --data.conversation_replay.shared_system_prompt_len | int | Fixed shared system prompt length in tokens | | --data.conversation_replay.dynamic_system_prompt_len.min | int | Matches data.conversation_replay.dynamic_system_prompt_len.min in config | | --data.conversation_replay.dynamic_system_prompt_len.max | int | Matches data.conversation_replay.dynamic_system_prompt_len.max in config | | --data.conversation_replay.dynamic_system_prompt_len.mean | float | Matches data.conversation_replay.dynamic_system_prompt_len.mean in config | | --data.conversation_replay.dynamic_system_prompt_len.std_dev | float | Matches data.conversation_replay.dynamic_system_prompt_len.std_dev in config | | --data.conversation_replay.dynamic_system_prompt_len.total_count | int | Matches data.conversation_replay.dynamic_system_prompt_len.total_count in config | | --data.conversation_replay.dynamic_system_prompt_len.type | Enum (normal, skew_normal, lognormal, uniform, poisson, fixed) | Matches data.conversation_replay.dynamic_system_prompt_len.type in config | | --data.conversation_replay.dynamic_system_prompt_len.variance | float | Matches data.conversation_replay.dynamic_system_prompt_len.variance in config | | --data.conversation_replay.dynamic_system_prompt_len.skew | float | Matches data.conversation_replay.dynamic_system_prompt_len.skew in config | | --data.conversation_replay.turns_per_conversation.min | int | Matches data.conversation_replay.turns_per_conversation.min in config | | --data.conversation_replay.turns_per_conversation.max | int | Matches data.conversation_replay.turns_per_conversation.max in config | | --data.conversation_replay.turns_per_conversation.mean | float | Matches data.conversation_replay.turns_per_conversation.mean in config | | --data.conversation_replay.turns_per_conversation.std_dev | float | Matches data.conversation_replay.turns_per_conversation.std_dev in config | | --data.conversation_replay.turns_per_conversation.total_count | int | Matches data.conversation_replay.turns_per_conversation.total_count in config | | --data.conversation_replay.turns_per_conversation.type | Enum (normal, skew_normal, lognormal, uniform, poisson, fixed) | Matches data.conversation_replay.turns_per_conversation.type in config | | --data.conversation_replay.turns_per_conversation.variance | float | Matches data.conversation_replay.turns_per_conversation.variance in config | | --data.conversation_replay.turns_per_conversation.skew | float | Matches data.conversation_replay.turns_per_conversation.skew in config | | --data.conversation_replay.input_tokens_per_turn.min | int | Matches data.conversation_replay.input_tokens_per_turn.min in config | | --data.conversation_replay.input_tokens_per_turn.max | int | Matches data.conversation_replay.input_tokens_per_turn.max in config | | --data.conversation_replay.input_tokens_per_turn.mean | float | Matches data.conversation_replay.input_tokens_per_turn.mean in config | | --data.conversation_replay.input_tokens_per_turn.std_dev | float | Matches data.conversation_replay.input_tokens_per_turn.std_dev in config | | --data.conversation_replay.input_tokens_per_turn.total_count | int | Matches data.conversation_replay.input_tokens_per_turn.total_count in config | | --data.conversation_replay.input_tokens_per_turn.type | Enum (normal, skew_normal, lognormal, uniform, poisson, fixed) | Matches data.conversation_replay.input_tokens_per_turn.type in config | | --data.conversation_replay.input_tokens_per_turn.variance | float | Matches data.conversation_replay.input_tokens_per_turn.variance in config | | --data.conversation_replay.input_tokens_per_turn.skew | float | Matches data.conversation_replay.input_tokens_per_turn.skew in config | | --data.conversation_replay.output_tokens_per_turn.min | int | Matches data.conversation_replay.output_tokens_per_turn.min in config | | --data.conversation_replay.output_tokens_per_turn.max | int | Matches data.conversation_replay.output_tokens_per_turn.max in config | | --data.conversation_replay.output_tokens_per_turn.mean | float | Matches data.conversation_replay.output_tokens_per_turn.mean in config | | --data.conversation_replay.output_tokens_per_turn.std_dev | float | Matches data.conversation_replay.output_tokens_per_turn.std_dev in config | | --data.conversation_replay.output_tokens_per_turn.total_count | int | Matches data.conversation_replay.output_tokens_per_turn.total_count in config | | --data.conversation_replay.output_tokens_per_turn.type | Enum (normal, skew_normal, lognormal, uniform, poisson, fixed) | Matches data.conversation_replay.output_tokens_per_turn.type in config | | --data.conversation_replay.output_tokens_per_turn.variance | float | Matches data.conversation_replay.output_tokens_per_turn.variance in config | | --data.conversation_replay.output_tokens_per_turn.skew | float | Matches data.conversation_replay.output_tokens_per_turn.skew in config | | --data.conversation_replay.tool_call_latency_sec.min | int | Matches data.conversation_replay.tool_call_latency_sec.min in config | | --data.conversation_replay.tool_call_latency_sec.max | int | Matches data.conversation_replay.tool_call_latency_sec.max in config | | --data.conversation_replay.tool_call_latency_sec.mean | float | Matches data.conversation_replay.tool_call_latency_sec.mean in config | | --data.conversation_replay.tool_call_latency_sec.std_dev | float | Matches data.conversation_replay.tool_call_latency_sec.std_dev in config | | --data.conversation_replay.tool_call_latency_sec.total_count | int | Matches data.conversation_replay.tool_call_latency_sec.total_count in config | | --data.conversation_replay.tool_call_latency_sec.type | Enum (normal, skew_normal, lognormal, uniform, poisson, fixed) | Matches data.conversation_replay.tool_call_latency_sec.type in config | | --data.conversation_replay.tool_call_latency_sec.variance | float | Matches data.conversation_replay.tool_call_latency_sec.variance in config | | --data.conversation_replay.tool_call_latency_sec.skew | float | Matches data.conversation_replay.tool_call_latency_sec.skew in config | | --data.conversation_replay.max_model_len | int | Maximum model context length in tokens | | --load.type | Enum (constant, poisson, trace_replay, concurrent, trace_session_replay) | Matches load.type in config | | --load.interval | float | Matches load.interval in config | | --load.stages | JSON | Matches load.stages in config | | --load.sweep.type | Enum (geometric, linear) | Matches load.sweep.type in config | | --load.sweep.num_requests | int | Matches load.sweep.num_requests in config | | --load.sweep.timeout | float | Matches load.sweep.timeout in config | | --load.sweep.num_stages | int | Matches load.sweep.num_stages in config | | --load.sweep.stage_duration | int | Matches load.sweep.stage_duration in config | | --load.sweep.saturation_percentile | float | Matches load.sweep.saturation_percentile in config | | --load.num_workers | int | Matches load.num_workers in config | | --load.worker_max_concurrency | int | Matches load.worker_max_concurrency in config | | --load.worker_max_tcp_connections | int | Matches load.worker_max_tcp_connections in config | | --load.trace.file | str | Matches load.trace.file in config | | --load.trace.format | Enum (AzurePublicDataset) | Matches load.trace.format in config | | --load.circuit_breakers | JSON | Matches load.circuit_breakers in config | | --load.request_timeout | float | Matches load.request_timeout in config | | --load.lora_traffic_split | JSON | Matches load.lora_traffic_split in config | | --load.base_seed | int | Matches load.base_seed in config | | --metrics.type | Enum (prometheus, default) | Matches metrics.type in config | | --metrics.prometheus.scrape_interval | int | Matches metrics.prometheus.scrape_interval in config | | --metrics.prometheus.url | string | Matches metrics.prometheus.url in config | | --metrics.prometheus.filters | JSON | Matches metrics.prometheus.filters in config | | --metrics.prometheus.google_managed | boolean | Matches metrics.prometheus.google_managed in config | | --report.request_lifecycle.summary | boolean | Matches report.request_lifecycle.summary in config | | --report.request_lifecycle.per_stage | boolean | Matches report.request_lifecycle.per_stage in config | | --report.request_lifecycle.per_request | boolean | Matches report.request_lifecycle.per_request in config | | --report.request_lifecycle.per_adapter | boolean | Matches report.request_lifecycle.per_adapter in config | | --report.request_lifecycle.per_adapter_stage | boolean | Matches report.request_lifecycle.per_adapter_stage in config | | --report.request_lifecycle.percentiles | JSON | Matches report.request_lifecycle.percentiles in config | | --report.prometheus.summary | boolean | Matches report.prometheus.summary in config | | --report.prometheus.per_stage | boolean | Matches report.prometheus.per_stage in config | | --report.session_lifecycle.summary | boolean | Matches report.session_lifecycle.summary in config | | --report.session_lifecycle.per_stage | boolean | Matches report.session_lifecycle.per_stage in config | | --report.session_lifecycle.per_session | boolean | Matches report.session_lifecycle.per_session in config | | --report.goodput.constraints | JSON | Matches report.goodput.constraints in config | | --storage.local_storage.path | str | Matches storage.local_storage.path in config | | --storage.local_storage.report_file_prefix | str | Matches storage.local_storage.report_file_prefix in config | | --storage.google_cloud_storage.path | str | Matches storage.google_cloud_storage.path in config | | --storage.google_cloud_storage.report_file_prefix | str | Matches storage.google_cloud_storage.report_file_prefix in config | | --storage.google_cloud_storage.bucket_name | str | Matches storage.google_cloud_storage.bucket_name in config | | --storage.simple_storage_service.path | str | Matches storage.simple_storage_service.path in config | | --storage.simple_storage_service.report_file_prefix | str | Matches storage.simple_storage_service.report_file_prefix in config | | --storage.simple_storage_service.bucket_name | str | Matches storage.simple_storage_service.bucket_name in config | | --storage.simple_storage_service.endpoint_url | str | Matches storage.simple_storage_service.endpoint_url in config | | --storage.simple_storage_service.region_name | str | Matches storage.simple_storage_service.region_name in config | | --storage.simple_storage_service.addressing_style | string | Matches storage.simple_storage_service.addressing_style in config | | --server.type | Enum (vllm, sglang, tgi, mock) | Matches server.type in config | | --server.model_name | str | Matches server.model_name in config | | --server.base_url | str | Matches server.base_url in config | | --server.ignore_eos | boolean | Matches server.ignore_eos in config | | --server.api_key | str | Matches server.api_key in config | | --server.cert_path | str | Matches server.cert_path in config | | --server.key_path | str | Matches server.key_path in config | | --tokenizer.pretrained_model_name_or_path | str | Matches tokenizer.pretrained_model_name_or_path in config | | --tokenizer.trust_remote_code | boolean | Matches tokenizer.trust_remote_code in config | | --tokenizer.token | str | Matches tokenizer.token in config | | --circuit_breakers | JSON | Matches circuit_breakers in config |