diff --git a/crates/openshell-core/src/inference.rs b/crates/openshell-core/src/inference.rs index 3071d53cd..fae065a93 100644 --- a/crates/openshell-core/src/inference.rs +++ b/crates/openshell-core/src/inference.rs @@ -18,6 +18,15 @@ pub enum AuthHeader { Bearer, /// Custom header name (e.g. `x-api-key` for Anthropic). Custom(&'static str), + /// Do not inject any auth header on outgoing requests. The upstream + /// is expected to authenticate itself — used when the configured + /// `default_base_url` (or operator-supplied base-URL override) points + /// at a translating bridge / proxy that holds operator-side + /// credentials in its own pod and ignores caller-supplied auth. + /// Currently used by the `aws-bedrock` profile, where `SigV4` signing + /// is deferred to a follow-up PR; today the only supported shape is + /// a bridge-fronted upstream. + None, } // --------------------------------------------------------------------------- @@ -69,6 +78,14 @@ const ANTHROPIC_PROTOCOLS: &[&str] = &["anthropic_messages", "model_discovery"]; /// base-URL-override escape hatch path. const VERTEX_AI_PROTOCOLS: &[&str] = &["anthropic_messages", "model_discovery"]; +// `aws_bedrock_invoke_stream` (`/model/{id}/invoke-with-response-stream`) is +// deferred to a follow-up alongside protocol-aware AWS event-stream error +// handling: the shared streaming relay's truncation/timeout path injects +// SSE-formatted error frames, which would corrupt downstream Bedrock +// event-stream parsers. Until that lands, this profile advertises only +// the buffered `InvokeModel` shape. +const AWS_BEDROCK_PROTOCOLS: &[&str] = &["aws_bedrock_invoke"]; + static OPENAI_PROFILE: InferenceProviderProfile = InferenceProviderProfile { provider_type: "openai", default_base_url: "https://api.openai.com/v1", @@ -155,6 +172,38 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile { passthrough_headers: &["x-model-id"], }; +// AWS Bedrock — registered as bridge-fronted (no router-side auth +// injection). Real AWS Bedrock requires `SigV4` signing of every request, +// which is deferred to a follow-up PR (see #1704 thread). Until then, +// operators point `BEDROCK_BASE_URL` at a translating bridge or +// Bedrock-compatible proxy that handles auth in its own pod. The router +// passes Bedrock `InvokeModel` requests through opaquely; the L7 pattern +// `/model/{modelId}/invoke` is wired up in +// `crates/openshell-sandbox/src/l7/inference.rs`. `InvokeModelWithResponseStream` +// is deferred to the same follow-up that adds protocol-aware error framing. +// +// Note: `default_base_url` is intentionally an empty string. Without +// `BEDROCK_BASE_URL` config, route resolution rejects the provider +// rather than silently forwarding prompts to real AWS Bedrock with +// `auth: None` (which would fail upstream and risks operator +// surprise). Once the `SigV4` follow-up lands, the default can revert +// to `https://bedrock-runtime.us-east-1.amazonaws.com`. +static AWS_BEDROCK_PROFILE: InferenceProviderProfile = InferenceProviderProfile { + provider_type: "aws-bedrock", + default_base_url: "", + protocols: AWS_BEDROCK_PROTOCOLS, + // No single API key for Bedrock — `SigV4` takes four credentials + // (access key id, secret, session token, region) and signs requests + // rather than injecting a header. Until the `SigV4` follow-up lands + // the router-side auth shape is `None` and no credential lookup is + // required at route time. + credential_key_names: &[], + base_url_config_keys: &["BEDROCK_BASE_URL"], + auth: AuthHeader::None, + default_headers: &[], + passthrough_headers: &[], +}; + /// Canonicalize an inference provider type string to a well-known identifier. /// /// Returns `Some(canonical_name)` for recognized inference providers, @@ -167,6 +216,7 @@ pub fn normalize_inference_provider_type(input: &str) -> Option<&'static str> { "openai" => Some("openai"), "anthropic" => Some("anthropic"), "nvidia" => Some("nvidia"), + "aws-bedrock" => Some("aws-bedrock"), "google-vertex-ai" | "vertex" | "vertex-ai" | "google-vertex" | "gcp-vertex" => { Some("google-vertex-ai") } @@ -184,6 +234,7 @@ pub fn profile_for(provider_type: &str) -> Option<&'static InferenceProviderProf "anthropic" => Some(&ANTHROPIC_PROFILE), "nvidia" => Some(&NVIDIA_PROFILE), "google-vertex-ai" => Some(&VERTEX_AI_PROFILE), + "aws-bedrock" => Some(&AWS_BEDROCK_PROFILE), _ => None, } } @@ -303,7 +354,34 @@ mod tests { assert!(profile_for("openai").is_some()); assert!(profile_for("anthropic").is_some()); assert!(profile_for("nvidia").is_some()); + assert!(profile_for("aws-bedrock").is_some()); assert!(profile_for("OpenAI").is_some()); // case insensitive + assert!(profile_for("AWS-Bedrock").is_some()); // case insensitive + } + + #[test] + fn aws_bedrock_uses_no_auth_header() { + let (auth, headers) = auth_for_provider_type("aws-bedrock"); + assert_eq!(auth, AuthHeader::None); + assert!(headers.is_empty()); + } + + #[test] + fn aws_bedrock_profile_has_no_credential_keys() { + let profile = profile_for("aws-bedrock").expect("profile registered"); + // No router-side credential lookup until the `SigV4` follow-up. + assert!(profile.credential_key_names.is_empty()); + assert_eq!(profile.base_url_config_keys, &["BEDROCK_BASE_URL"]); + } + + #[test] + fn aws_bedrock_protocols_are_bedrock_specific() { + let profile = profile_for("aws-bedrock").expect("profile registered"); + assert!(profile.protocols.contains(&"aws_bedrock_invoke")); + // `aws_bedrock_invoke_stream` is deferred to the follow-up that adds + // protocol-aware AWS event-stream error framing; until then the + // profile advertises only the buffered `InvokeModel` shape. + assert!(!profile.protocols.contains(&"aws_bedrock_invoke_stream")); } #[test] diff --git a/crates/openshell-providers/src/profiles.rs b/crates/openshell-providers/src/profiles.rs index 624ee0711..5dca23763 100644 --- a/crates/openshell-providers/src/profiles.rs +++ b/crates/openshell-providers/src/profiles.rs @@ -19,6 +19,7 @@ use std::sync::OnceLock; const PATH_TEMPLATE_CREDENTIAL_PLACEHOLDER: &str = "{credential}"; const BUILT_IN_PROFILE_YAMLS: &[&str] = &[ + include_str!("../../../providers/aws-bedrock.yaml"), include_str!("../../../providers/claude-code.yaml"), include_str!("../../../providers/codex.yaml"), include_str!("../../../providers/copilot.yaml"), diff --git a/crates/openshell-router/src/backend.rs b/crates/openshell-router/src/backend.rs index 9eb63c88b..1572c7eca 100644 --- a/crates/openshell-router/src/backend.rs +++ b/crates/openshell-router/src/backend.rs @@ -199,6 +199,33 @@ fn prepare_backend_request( body: bytes::Bytes, stream_response: bool, ) -> Result<(reqwest::RequestBuilder, String), RouterError> { + // For AWS Bedrock routes the model id is encoded in the URL path + // (`/model/{modelId}/invoke[-with-response-stream]`), not in the + // JSON body. The caller's path can carry any model id; rewrite it + // to the operator-configured `route.model` so a sandbox cannot + // pick a different upstream model than what `inference set` + // configured. If the path is not a recognized Bedrock shape on a + // Bedrock route, reject the request rather than forwarding + // verbatim. + let rewritten_path: String; + let path = if route_is_bedrock(route) { + match rewrite_bedrock_path(route, path) { + Some(p) => { + rewritten_path = p; + rewritten_path.as_str() + } + None => { + return Err(RouterError::Internal(format!( + "AWS Bedrock route received unprocessable path '{path}' or invalid \ + route.model; expected /model//invoke and a model id with no \ + path separators, URL delimiters, percent escapes, traversal \ + segments, whitespace, or control characters" + ))); + } + } + } else { + path + }; let url = build_provider_url(route, &route.model, path, stream_response); let headers = sanitize_request_headers(route, headers); @@ -216,6 +243,13 @@ fn prepare_backend_request( AuthHeader::Custom(header_name) => { builder = builder.header(*header_name, &route.api_key); } + AuthHeader::None => { + // Bridge-fronted upstream: no router-side auth injection. + // The configured `endpoint` is expected to be a translating + // bridge / proxy whose own pod holds operator-side + // credentials. Used today by the `aws-bedrock` profile + // (SigV4 signing is a separate follow-up). + } } for (name, value) in &headers { builder = builder.header(name.as_str(), value.as_str()); @@ -252,6 +286,14 @@ fn prepare_backend_request( // in the body; strip it so Vertex AI does not reject the // request with "Extra inputs are not permitted". obj.remove("model"); + } else if route_is_bedrock(route) { + // AWS Bedrock InvokeModel encodes the model in the URL + // path; the request body is the raw provider-specific + // payload (e.g. an Anthropic Messages body for Claude + // models, a Mistral payload for Mistral models). The + // body must not be mutated — injecting a "model" field + // here would either be silently ignored or rejected as + // an unexpected key by the upstream / bridge. } else { obj.insert( "model".to_string(), @@ -775,6 +817,116 @@ fn build_backend_url(endpoint: &str, path: &str) -> String { format!("{base}{path}") } +/// Check whether a route targets an AWS Bedrock `InvokeModel` endpoint. +/// +/// Returns true when any of the route's protocols is one of the Bedrock +/// invocation protocols. Used to gate Bedrock-specific request shaping +/// (path-segment rewriting, skipped body-model injection) in +/// [`prepare_backend_request`]. +/// +/// `aws_bedrock_invoke_stream` is recognized for forward-compatibility +/// with the streaming follow-up but is not currently advertised by the +/// L7 pattern set. +fn route_is_bedrock(route: &ResolvedRoute) -> bool { + route + .protocols + .iter() + .any(|p| p == "aws_bedrock_invoke" || p == "aws_bedrock_invoke_stream") +} + +/// Parse a Bedrock invocation path into its `(model_id, action_suffix, query_tail)` +/// components. +/// +/// Recognized shape (caller's path on the way into the router): +/// - `/model//invoke[?]` → action `/invoke` +/// +/// `` must be non-empty and contain no `/`. The query tail +/// (including the leading `?`) is preserved so [`rewrite_bedrock_path`] +/// can restore it; the L7 matcher accepts queries, so silently dropping +/// them here would mutate the request shape between the matcher and +/// the upstream. Returns `None` when the path does not match — the +/// caller treats that as a malformed request and rejects rather than +/// forwarding verbatim. +/// +/// `InvokeModelWithResponseStream` (`/invoke-with-response-stream`) is +/// deferred until the streaming relay grows protocol-aware AWS +/// event-stream error termination; the L7 pattern set does not +/// advertise it today, so it cannot reach this parser. +fn parse_bedrock_invocation_path(path: &str) -> Option<(&str, &'static str, &str)> { + // Slice up to but not including `?`, then keep the `?`-prefixed + // tail so callers can re-attach it without reconstructing the + // delimiter. + let (path_only, query_tail) = + path.find('?').map_or((path, ""), |idx| (&path[..idx], &path[idx..])); + let rest = path_only.strip_prefix("/model/")?; + let slash_at = rest.find('/')?; + if slash_at == 0 { + return None; + } + let model_id = &rest[..slash_at]; + let suffix = &rest[slash_at..]; + let action: &'static str = match suffix { + "/invoke" => "/invoke", + _ => return None, + }; + Some((model_id, action, query_tail)) +} + +/// Rewrite a Bedrock invocation path so the model segment is the +/// operator-configured `route.model` rather than whatever the caller +/// supplied. Returns the rewritten path on success, or `None` when the +/// inbound path is not a recognized Bedrock invocation shape or when +/// `route.model` is not a valid Bedrock model id. +/// +/// Why rewrite rather than reject: the inbound L7 pattern detector +/// already accepts only `/model/{x}/invoke` shapes for Bedrock routes, +/// so a caller-supplied model segment that differs from the +/// operator-configured one is the only case this function changes — +/// and changing it (vs. rejecting) lets sandbox code that hardcodes a +/// different model continue to work, while still guaranteeing the +/// operator's chosen model is what reaches the upstream. +/// +/// Defense-in-depth model-ID validation: the server-side resolver +/// (`openshell-server::inference::resolve_provider_route`) already +/// rejects malformed Bedrock model ids at route-save time, but the +/// router enforces the same contract before interpolating +/// `route.model` into a URL path segment. Values containing `/`, `\`, +/// `?`, `#`, `%`, traversal segments, whitespace, or control chars +/// are rejected so a stale or hand-edited route store cannot produce +/// ambiguous or malformed upstream paths. +fn rewrite_bedrock_path(route: &ResolvedRoute, path: &str) -> Option { + if !is_valid_bedrock_model_id(&route.model) { + return None; + } + let (_caller_model, action, query_tail) = parse_bedrock_invocation_path(path)?; + Some(format!("/model/{}{}{}", route.model, action, query_tail)) +} + +/// Defense-in-depth predicate matching the server-side +/// `validate_aws_bedrock_model_id` contract — see that function for the +/// authoritative reasoning. Returns `true` when `value` is safe to +/// interpolate into a Bedrock URL path segment. The router uses this +/// before constructing an upstream path so a stale or out-of-band route +/// store cannot bypass the resolver's validation. +fn is_valid_bedrock_model_id(value: &str) -> bool { + if value.is_empty() || value != value.trim() { + return false; + } + if value.contains('/') || value.contains('\\') { + return false; + } + if value.chars().any(|c| matches!(c, '?' | '#' | '%')) { + return false; + } + if value.contains("..") { + return false; + } + if value.chars().any(|c| c.is_control() || c.is_whitespace()) { + return false; + } + true +} + /// Check whether a route targets a Vertex AI Anthropic rawPredict endpoint. /// /// The predicate is purely structural — it tests `model_in_path`, @@ -800,10 +952,13 @@ fn is_vertex_anthropic_rawpredict_route(route: &ResolvedRoute) -> bool { mod tests { use super::{ ValidationFailure, ValidationFailureKind, build_backend_url, build_provider_url, - verify_backend_endpoint, + parse_bedrock_invocation_path, prepare_backend_request, rewrite_bedrock_path, + route_is_bedrock, verify_backend_endpoint, }; + use crate::RouterError; use crate::config::{DEFAULT_ROUTE_TIMEOUT, ResolvedRoute}; use openshell_core::inference::AuthHeader; + use std::time::Duration; use wiremock::matchers::{body_partial_json, header, method, path}; use wiremock::{Mock, MockServer, ResponseTemplate}; @@ -1670,7 +1825,7 @@ mod tests { ); let headers = vec![("content-type".to_string(), "application/json".to_string())]; - let (builder, _url) = super::prepare_backend_request( + let (builder, _url) = prepare_backend_request( &client, &route, "POST", @@ -1741,7 +1896,7 @@ mod tests { ); let headers = vec![("content-type".to_string(), "application/json".to_string())]; - let (builder, _url) = super::prepare_backend_request( + let (builder, _url) = prepare_backend_request( &client, &route, "POST", @@ -1863,7 +2018,7 @@ mod tests { ); let headers = vec![("content-type".to_string(), "application/json".to_string())]; - let (builder, _url) = super::prepare_backend_request( + let (builder, _url) = prepare_backend_request( &client, &route, "POST", @@ -1925,7 +2080,7 @@ mod tests { ); let headers = vec![("content-type".to_string(), "application/json".to_string())]; - let (builder, _url) = super::prepare_backend_request( + let (builder, _url) = prepare_backend_request( &client, &route, "POST", @@ -1989,7 +2144,7 @@ mod tests { ); let headers = vec![("content-type".to_string(), "application/json".to_string())]; - let (builder, _url) = super::prepare_backend_request( + let (builder, _url) = prepare_backend_request( &client, &route, "POST", @@ -2022,4 +2177,265 @@ mod tests { "Vertex Gemini route must still rewrite the model field, got: {received_body}" ); } + + // ============================================================ + // AWS Bedrock route shaping (path rewriting + body preservation) + // ============================================================ + + /// `parse_bedrock_invocation_path` rejects malformed paths. + #[test] + fn parse_bedrock_invocation_path_rejects_malformed() { + // Empty model id: `/model//invoke` + assert!(parse_bedrock_invocation_path("/model//invoke").is_none()); + // Multi-segment model id: `/model/a/b/invoke` + assert!(parse_bedrock_invocation_path("/model/a/b/invoke").is_none()); + // Unknown action: `/model/foo/converse` + assert!(parse_bedrock_invocation_path("/model/foo/converse").is_none()); + // Streaming variant is deferred until protocol-aware error + // framing exists; the parser must reject it the same way it + // rejects any other unknown action. + assert!(parse_bedrock_invocation_path("/model/foo/invoke-with-response-stream").is_none()); + // Wrong prefix: `/v1/messages` + assert!(parse_bedrock_invocation_path("/v1/messages").is_none()); + // Missing slash before action + assert!(parse_bedrock_invocation_path("/model/foo").is_none()); + } + + #[test] + fn parse_bedrock_invocation_path_accepts_invoke() { + let parsed = parse_bedrock_invocation_path( + "/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke", + ); + assert_eq!( + parsed, + Some(("anthropic.claude-3-5-sonnet-20241022-v2:0", "/invoke", "")) + ); + } + + /// Query strings on Bedrock invoke paths are preserved through the + /// rewrite so the matcher (which accepts queries) and the upstream + /// see the same shape. + #[test] + fn parse_bedrock_invocation_path_preserves_query_string() { + let parsed = + parse_bedrock_invocation_path("/model/anthropic.claude-opus-4-7/invoke?trace=1"); + assert_eq!( + parsed, + Some(("anthropic.claude-opus-4-7", "/invoke", "?trace=1")) + ); + } + + /// `route_is_bedrock` matches the Bedrock invocation protocol(s). + /// `aws_bedrock_invoke_stream` is recognized for forward-compatibility + /// even though no L7 pattern advertises it today. + #[test] + fn route_is_bedrock_matches_invoke_protocols() { + let invoke_only = test_route( + "https://example.com", + &["aws_bedrock_invoke"], + AuthHeader::None, + ); + assert!(route_is_bedrock(&invoke_only)); + + let stream_forward_compat = test_route( + "https://example.com", + &["aws_bedrock_invoke_stream"], + AuthHeader::None, + ); + assert!(route_is_bedrock(&stream_forward_compat)); + + let openai = test_route( + "https://example.com", + &["openai_chat_completions"], + AuthHeader::Bearer, + ); + assert!(!route_is_bedrock(&openai)); + } + + /// `rewrite_bedrock_path` swaps caller's model segment for the + /// route-configured model and preserves any query string. + #[test] + fn rewrite_bedrock_path_substitutes_operator_model() { + let mut route = test_route( + "https://bedrock-bridge.example", + &["aws_bedrock_invoke"], + AuthHeader::None, + ); + route.model = "anthropic.claude-opus-4-7".to_string(); + + let rewritten = rewrite_bedrock_path(&route, "/model/some-other-model/invoke"); + assert_eq!( + rewritten, + Some("/model/anthropic.claude-opus-4-7/invoke".to_string()) + ); + + let rewritten_with_query = + rewrite_bedrock_path(&route, "/model/some-other-model/invoke?trace=1"); + assert_eq!( + rewritten_with_query, + Some("/model/anthropic.claude-opus-4-7/invoke?trace=1".to_string()) + ); + } + + #[test] + fn rewrite_bedrock_path_returns_none_for_non_bedrock_path() { + let route = test_route( + "https://bedrock-bridge.example", + &["aws_bedrock_invoke"], + AuthHeader::None, + ); + assert_eq!(rewrite_bedrock_path(&route, "/v1/messages"), None); + assert_eq!(rewrite_bedrock_path(&route, "/model//invoke"), None); + assert_eq!(rewrite_bedrock_path(&route, "/model/a/b/invoke"), None); + // Streaming variant is deferred at the L7 layer; the router + // must not produce an upstream path for it either. + assert_eq!( + rewrite_bedrock_path(&route, "/model/x/invoke-with-response-stream"), + None + ); + } + + /// Defense-in-depth: `rewrite_bedrock_path` rejects route models + /// that would produce ambiguous or malformed upstream URL paths, + /// even if a malformed value somehow reached the router store. + #[test] + fn rewrite_bedrock_path_rejects_unsafe_route_model() { + let mut route = test_route( + "https://bedrock-bridge.example", + &["aws_bedrock_invoke"], + AuthHeader::None, + ); + + for unsafe_model in [ + "anthropic.claude/../../etc/passwd", + "anthropic.claude\\backslash", + "model?injected=1", + "model#fragment", + "percent%2fencoded", + "..", + " leading-space", + "trailing-space ", + "tab\there", + "newline\nhere", + "", + ] { + route.model = unsafe_model.to_string(); + assert!( + rewrite_bedrock_path(&route, "/model/foo/invoke").is_none(), + "rewrite_bedrock_path must reject unsafe route.model: {unsafe_model:?}" + ); + } + } + + /// End-to-end: an inbound Bedrock request that names a different + /// model in the path arrives at the upstream/bridge with the + /// operator's model, and the body is unchanged (no `"model"` + /// injection). + #[tokio::test] + async fn bedrock_route_rewrites_model_in_path_and_preserves_body() { + let mock_server = MockServer::start().await; + let mut route = test_route( + &mock_server.uri(), + &["aws_bedrock_invoke"], + AuthHeader::None, + ); + route.model = "anthropic.claude-opus-4-7".to_string(); + + // The mock asserts the upstream sees the operator's model in + // the path, NOT the caller's model. + Mock::given(method("POST")) + .and(path("/model/anthropic.claude-opus-4-7/invoke")) + // Caller body has a "model" key; we expect it to pass + // through unchanged. The mock uses body_partial_json so + // additional fields are OK; the assertion below pins the + // body more tightly. + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({"ok": true}))) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder() + .timeout(Duration::from_secs(5)) + .build() + .expect("client"); + + // Caller-supplied body — we deliberately include a "model" + // field naming a DIFFERENT model than the operator's, to + // verify the router does not inject route.model on top of + // it. The body should pass through verbatim because Bedrock + // encodes the model in the path. + let caller_body = serde_json::json!({ + "model": "caller-supplied-model-name", + "messages": [{"role": "user", "content": "hi"}], + }); + + let (builder, url) = prepare_backend_request( + &client, + &route, + "POST", + "/model/some-other-model/invoke", + &[], + bytes::Bytes::from(caller_body.to_string()), + false, + ) + .expect("prepare should succeed"); + + // URL should target the operator's model, not the caller's. + assert!( + url.ends_with("/model/anthropic.claude-opus-4-7/invoke"), + "URL must use operator model, got: {url}" + ); + + let resp = builder.send().await.expect("send"); + assert_eq!(resp.status(), 200); + + // Inspect what wiremock actually received. + let received = mock_server.received_requests().await.expect("requests"); + assert_eq!(received.len(), 1); + let req = &received[0]; + let received_body: serde_json::Value = + serde_json::from_slice(&req.body).expect("json body"); + // Caller's model name should pass through (NOT replaced by + // route.model). This proves the body is untouched. + assert_eq!( + received_body.get("model").and_then(|v| v.as_str()), + Some("caller-supplied-model-name"), + "Bedrock route must NOT rewrite body model, got: {received_body}" + ); + assert!( + received_body.get("messages").is_some(), + "messages field should pass through unchanged" + ); + } + + /// Defense-in-depth: a Bedrock route receiving a non-Bedrock path + /// is rejected rather than forwarded. The L7 pattern detector + /// upstream of the router should never produce this combination, + /// but if it ever did, we must not silently forward. + #[test] + fn bedrock_route_rejects_non_bedrock_path() { + let client = reqwest::Client::new(); + let route = test_route( + "https://bedrock-bridge.example", + &["aws_bedrock_invoke"], + AuthHeader::None, + ); + let result = prepare_backend_request( + &client, + &route, + "POST", + "/v1/messages", + &[], + bytes::Bytes::from(r"{}"), + false, + ); + match result { + Err(RouterError::Internal(msg)) => { + assert!( + msg.contains("Bedrock") && msg.contains("/v1/messages"), + "error must name the offending path, got: {msg}" + ); + } + other => panic!("expected RouterError::Internal, got {other:?}"), + } + } } diff --git a/crates/openshell-sandbox/src/l7/inference.rs b/crates/openshell-sandbox/src/l7/inference.rs index ec789ef95..e06dffd32 100644 --- a/crates/openshell-sandbox/src/l7/inference.rs +++ b/crates/openshell-sandbox/src/l7/inference.rs @@ -60,7 +60,7 @@ impl InferenceApiPattern { } } -/// Default patterns for known inference APIs (`OpenAI`, Anthropic). +/// Default patterns for known inference APIs (`OpenAI`, Anthropic, AWS Bedrock). pub fn default_patterns() -> Vec { vec![ InferenceApiPattern { @@ -114,10 +114,40 @@ pub fn default_patterns() -> Vec { kind: "models_get".to_string(), framing: ResponseFraming::Buffered, }, + // AWS Bedrock InvokeModel. The `*` segment is the Bedrock model id + // (e.g. `anthropic.claude-opus-4-7`). + // + // InvokeModel returns ONE JSON object the client decodes whole — it + // must be served buffered with an accurate `Content-Length`, otherwise + // the streaming proxy's size-cap or idle-timeout failure mode would + // append an SSE error event to bytes the caller decodes as one JSON + // object, silently corrupting it. + // + // `InvokeModelWithResponseStream` + // (`/model/{id}/invoke-with-response-stream`) is deferred to a + // follow-up: Bedrock streams use AWS event-stream framing, but the + // shared streaming relay's truncation/timeout/upstream-error path + // emits SSE-formatted error frames, which would corrupt downstream + // event-stream parsers. The follow-up adds protocol-aware error + // termination before re-introducing the streaming pattern. + InferenceApiPattern { + method: "POST".to_string(), + path_glob: "/model/*/invoke".to_string(), + protocol: "aws_bedrock_invoke".to_string(), + kind: "messages".to_string(), + framing: ResponseFraming::Buffered, + }, ] } /// Check if an HTTP request matches a known inference API pattern. +/// +/// Path globs support two wildcard shapes (one per pattern, not both): +/// - **Trailing `/*`**: `/v1/models/*` matches `/v1/models` and any +/// `/v1/models/` (one or many path segments). +/// - **Middle `/*/`**: `/model/*/invoke` matches `/model//invoke` +/// for a single non-empty segment that contains no `/`. Used for +/// AWS Bedrock's `/model/{modelId}/invoke[-with-response-stream]`. pub fn detect_inference_pattern<'a>( method: &str, path: &str, @@ -137,6 +167,21 @@ pub fn detect_inference_pattern<'a>( .is_some_and(|suffix| suffix.starts_with('/')); } + if let Some((before, after)) = p.path_glob.split_once("/*/") { + let Some(rest) = path_only.strip_prefix(before) else { + return false; + }; + let Some(rest) = rest.strip_prefix('/') else { + return false; + }; + // rest must look like `/` where is non-empty + // and contains no `/` (single path segment). + let Some(slash_at) = rest.find('/') else { + return false; + }; + return slash_at > 0 && rest[slash_at + 1..] == *after; + } + path_only == p.path_glob }) } @@ -531,7 +576,7 @@ mod tests { for pattern in &patterns { let expected_buffered = matches!( pattern.protocol.as_str(), - "model_discovery" | "openai_embeddings" + "model_discovery" | "openai_embeddings" | "aws_bedrock_invoke" ); assert_eq!( pattern.is_buffered(), @@ -543,6 +588,101 @@ mod tests { } } + #[test] + fn detect_aws_bedrock_invoke() { + let patterns = default_patterns(); + let result = + detect_inference_pattern("POST", "/model/anthropic.claude-opus-4-7/invoke", &patterns); + assert!(result.is_some()); + assert_eq!(result.unwrap().protocol, "aws_bedrock_invoke"); + assert_eq!(result.unwrap().kind, "messages"); + } + + /// `InvokeModelWithResponseStream` is intentionally NOT advertised by + /// the default pattern set today. The shared streaming relay's + /// truncation/timeout/upstream-error path emits SSE-formatted error + /// frames, which would corrupt the AWS event-stream framing Bedrock + /// streams use. The pattern is restored alongside protocol-aware + /// error termination in a follow-up; until then, intercepted + /// `/invoke-with-response-stream` requests fall through to the + /// non-inference path rather than being mis-routed through the + /// SSE-injecting relay. + #[test] + fn aws_bedrock_invoke_stream_pattern_is_deferred() { + let patterns = default_patterns(); + assert!( + detect_inference_pattern( + "POST", + "/model/anthropic.claude-opus-4-7/invoke-with-response-stream", + &patterns, + ) + .is_none(), + "InvokeModelWithResponseStream must not be advertised until \ + protocol-aware AWS event-stream error framing exists" + ); + assert!( + !patterns + .iter() + .any(|p| p.protocol == "aws_bedrock_invoke_stream"), + "no pattern should declare protocol=aws_bedrock_invoke_stream" + ); + } + + #[test] + fn aws_bedrock_invoke_with_query_string() { + let patterns = default_patterns(); + let result = detect_inference_pattern("POST", "/model/foo.bar/invoke?trace=1", &patterns); + assert!(result.is_some()); + assert_eq!(result.unwrap().protocol, "aws_bedrock_invoke"); + } + + #[test] + fn aws_bedrock_rejects_empty_model_id() { + let patterns = default_patterns(); + // `/model//invoke` — empty wildcard segment is not a valid Bedrock id. + assert!(detect_inference_pattern("POST", "/model//invoke", &patterns).is_none()); + } + + #[test] + fn aws_bedrock_rejects_multi_segment_model_id() { + let patterns = default_patterns(); + // The `*` matches a single path segment only; multi-segment ids must + // not match (would be a path-traversal liability otherwise). + assert!(detect_inference_pattern("POST", "/model/foo/bar/invoke", &patterns).is_none()); + } + + #[test] + fn aws_bedrock_rejects_get() { + let patterns = default_patterns(); + assert!( + detect_inference_pattern("GET", "/model/anthropic.claude-opus-4-7/invoke", &patterns) + .is_none() + ); + } + + #[test] + fn aws_bedrock_rejects_unknown_action() { + let patterns = default_patterns(); + assert!(detect_inference_pattern("POST", "/model/foo/converse", &patterns).is_none()); + } + + /// `InvokeModel` returns one JSON object — must be served buffered. + /// Sending it through the streaming proxy would risk truncation or an + /// appended SSE error event corrupting the JSON body the caller decodes. + #[test] + fn aws_bedrock_invoke_is_buffered() { + let patterns = default_patterns(); + let invoke = + detect_inference_pattern("POST", "/model/anthropic.claude-opus-4-7/invoke", &patterns) + .expect("InvokeModel pattern must match"); + assert_eq!(invoke.protocol, "aws_bedrock_invoke"); + assert!( + invoke.is_buffered(), + "InvokeModel must be Buffered (one JSON object, accurate Content-Length); \ + streaming would risk corrupting the response" + ); + } + #[test] fn parse_simple_post_request() { let body = b"{\"hello\":true}"; diff --git a/crates/openshell-server/src/grpc/provider.rs b/crates/openshell-server/src/grpc/provider.rs index 4552fceae..10c10868e 100644 --- a/crates/openshell-server/src/grpc/provider.rs +++ b/crates/openshell-server/src/grpc/provider.rs @@ -1821,6 +1821,7 @@ mod tests { assert_eq!( ids, vec![ + "aws-bedrock", "claude-code", "codex", "copilot", diff --git a/crates/openshell-server/src/inference.rs b/crates/openshell-server/src/inference.rs index 13496cd99..bf410b129 100644 --- a/crates/openshell-server/src/inference.rs +++ b/crates/openshell-server/src/inference.rs @@ -370,6 +370,52 @@ fn vertex_location_and_host(region: &str) -> (String, String) { (location, host) } +/// Reject Bedrock model ids that would produce ambiguous or malformed +/// upstream URL paths. +/// +/// AWS Bedrock encodes the model in `/model//invoke`, so the value +/// is interpolated directly into a URL path segment. Without +/// validation, a value containing `/`, `\`, percent escapes, query or +/// fragment delimiters, traversal segments, whitespace, or control +/// characters could break out of the path segment, smuggle a different +/// upstream route, or produce ambiguous/malformed paths upstream. +/// +/// Mirrors [`validate_vertex_model_id`] — Bedrock has the same exposure +/// for the same reason, and the contract is enforced again at the +/// router layer (`is_valid_bedrock_model_id`) as defense-in-depth. +fn validate_aws_bedrock_model_id(value: &str) -> Result<(), Status> { + let trimmed = value.trim(); + if trimmed.is_empty() { + return Err(Status::invalid_argument("model_id is required")); + } + if value != trimmed { + return Err(Status::invalid_argument(format!( + "AWS Bedrock model_id must not include leading or trailing whitespace: {value:?}" + ))); + } + if value.contains('/') || value.contains('\\') { + return Err(Status::invalid_argument(format!( + "AWS Bedrock model_id must not contain path separators: {value:?}" + ))); + } + if value.chars().any(|c| matches!(c, '?' | '#' | '%')) { + return Err(Status::invalid_argument(format!( + "AWS Bedrock model_id must not contain URL delimiters or percent escapes: {value:?}" + ))); + } + if value.contains("..") { + return Err(Status::invalid_argument(format!( + "AWS Bedrock model_id must not contain traversal segments: {value:?}" + ))); + } + if value.chars().any(|c| c.is_control() || c.is_whitespace()) { + return Err(Status::invalid_argument(format!( + "AWS Bedrock model_id must not contain whitespace or control characters: {value:?}" + ))); + } + Ok(()) +} + fn validate_vertex_model_id(value: &str) -> Result<(), Status> { let trimmed = value.trim(); if trimmed.is_empty() { @@ -620,26 +666,33 @@ fn resolve_provider_route( let profile = openshell_core::inference::profile_for(&provider_type).ok_or_else(|| { Status::invalid_argument(format!( "provider '{name}' has unsupported type '{raw_provider_type}' for cluster inference \ - (supported: openai, anthropic, nvidia, google-vertex-ai)", + (supported: openai, anthropic, nvidia, google-vertex-ai, aws-bedrock)", name = provider.object_name() )) })?; - let api_key = find_provider_api_key( - provider, - profile.credential_key_names, - if provider_type == "google-vertex-ai" { - CredentialLookup::PreferredOnly - } else { - CredentialLookup::PreferredThenAny - }, - ) - .ok_or_else(|| { - Status::invalid_argument(format!( - "provider '{name}' has no usable API key credential", - name = provider.object_name() - )) - })?; + // Profiles with `auth: None` are bridge-fronted — the upstream + // authenticates itself, so the router doesn't need a credential at + // route-resolution time. Today this is `aws-bedrock`. + let api_key = if matches!(profile.auth, openshell_core::inference::AuthHeader::None) { + String::new() + } else { + find_provider_api_key( + provider, + profile.credential_key_names, + if provider_type == "google-vertex-ai" { + CredentialLookup::PreferredOnly + } else { + CredentialLookup::PreferredThenAny + }, + ) + .ok_or_else(|| { + Status::invalid_argument(format!( + "provider '{name}' has no usable API key credential", + name = provider.object_name() + )) + })? + }; // Vertex AI requires a model-aware URL; delegate to specialised resolver. if provider_type == "google-vertex-ai" { @@ -656,6 +709,16 @@ fn resolve_provider_route( }); } + // AWS Bedrock encodes the model in the URL path + // (`/model//invoke`), so the model id is interpolated directly + // into a path segment by the router. Validate up front so the route + // store cannot hold a model id that would produce ambiguous or + // malformed upstream paths. Defense-in-depth: the router enforces + // the same contract again before constructing an upstream URL. + if provider_type == "aws-bedrock" { + validate_aws_bedrock_model_id(model_id)?; + } + let base_url = find_provider_config_value(provider, profile.base_url_config_keys) .unwrap_or_else(|| profile.default_base_url.to_string()) .trim() @@ -1059,6 +1122,203 @@ mod tests { assert_eq!(config.model_id, "gpt-4.1"); } + #[tokio::test] + async fn upsert_cluster_route_succeeds_for_aws_bedrock_with_bridge_url() { + // aws-bedrock is registered with `auth: AuthHeader::None` (the + // bridge-fronted shape) so route resolution does NOT require a + // real API key — but `provider create` still requires a + // non-empty credentials map at the gRPC layer, so operators + // pass a placeholder credential per the docs. The router + // ignores it on the outbound path. + // + // The other half of the contract is `BEDROCK_BASE_URL`: with + // `default_base_url: ""` in the core profile, providers + // without it fail route resolution rather than silently + // forwarding prompts to AWS Bedrock with no usable auth. This + // test pins down the success path. + let store = test_store().await; + + let provider = Provider { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "provider-bedrock-bridge".to_string(), + name: "bedrock-bridge".to_string(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 0, + }), + r#type: "aws-bedrock".to_string(), + // Placeholder credential — the router ignores it because + // auth: None skips header injection. Mirrors the + // doc-recommended `--credential AWS_ACCESS_KEY_ID=unused-bridge-fronted-shape`. + credentials: std::iter::once(( + "AWS_ACCESS_KEY_ID".to_string(), + "unused-bridge-fronted-shape".to_string(), + )) + .collect(), + config: std::iter::once(( + "BEDROCK_BASE_URL".to_string(), + "http://bedrock-bridge.demo.svc.cluster.local:8080".to_string(), + )) + .collect(), + credential_expires_at_ms: std::collections::HashMap::new(), + }; + store + .put_message(&provider) + .await + .expect("provider should persist"); + + let upserted = upsert_cluster_inference_route( + &store, + CLUSTER_INFERENCE_ROUTE_NAME, + "bedrock-bridge", + "anthropic.claude-3-5-sonnet-20241022-v2:0", + 0, + false, + ) + .await + .expect("upsert should succeed for aws-bedrock provider"); + + assert_eq!(upserted.route.object_name(), CLUSTER_INFERENCE_ROUTE_NAME); + let config = upserted.route.config.as_ref().expect("config"); + assert_eq!(config.provider_name, "bedrock-bridge"); + assert_eq!(config.model_id, "anthropic.claude-3-5-sonnet-20241022-v2:0"); + + // Verify the resolved route metadata reflects bridge-fronted + // auth (empty api_key + provider_type = "aws-bedrock"). Note + // the api_key is empty even though the provider has a + // credential — auth: None skips api-key lookup entirely. + let managed = resolve_route_by_name(&store, CLUSTER_INFERENCE_ROUTE_NAME) + .await + .expect("route should resolve") + .expect("managed route should exist"); + assert_eq!(managed.provider_type, "aws-bedrock"); + assert_eq!( + managed.base_url, + "http://bedrock-bridge.demo.svc.cluster.local:8080" + ); + assert_eq!(managed.api_key, ""); + } + + #[tokio::test] + async fn upsert_cluster_route_rejects_aws_bedrock_without_bedrock_base_url() { + // The companion to upsert_cluster_route_succeeds_for_aws_bedrock_with_bridge_url: + // an aws-bedrock provider without BEDROCK_BASE_URL must be + // rejected at route resolution. This pins down the safety + // contract johntmyers asked for — until the SigV4 follow-up + // lands, the router must NOT silently forward prompts to AWS + // with auth: None. + // + // Mechanism: AWS_BEDROCK_PROFILE.default_base_url is "". When + // the provider has no BEDROCK_BASE_URL config, base_url + // resolves to empty, triggering the existing + // empty-base_url check in resolve_provider_route. + let store = test_store().await; + + let provider = Provider { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "provider-bedrock-misconfigured".to_string(), + name: "bedrock-misconfigured".to_string(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 0, + }), + r#type: "aws-bedrock".to_string(), + credentials: std::iter::once(( + "AWS_ACCESS_KEY_ID".to_string(), + "unused-bridge-fronted-shape".to_string(), + )) + .collect(), + // Intentionally no BEDROCK_BASE_URL. + config: std::collections::HashMap::new(), + credential_expires_at_ms: std::collections::HashMap::new(), + }; + store + .put_message(&provider) + .await + .expect("provider should persist"); + + let err = upsert_cluster_inference_route( + &store, + CLUSTER_INFERENCE_ROUTE_NAME, + "bedrock-misconfigured", + "anthropic.claude-3-5-sonnet-20241022-v2:0", + 0, + false, + ) + .await + .expect_err("upsert should reject aws-bedrock provider without BEDROCK_BASE_URL"); + assert_eq!(err.code(), tonic::Code::InvalidArgument); + assert!( + err.message().contains("empty base_url"), + "error should name the missing base_url, got: {}", + err.message() + ); + } + + /// Bedrock route resolution must reject model ids that would + /// produce ambiguous or malformed upstream URL paths. The Vertex + /// suite has equivalent coverage; this is the Bedrock companion. + #[tokio::test] + async fn upsert_cluster_route_rejects_aws_bedrock_unsafe_model_id() { + let store = test_store().await; + + let provider = Provider { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "provider-bedrock-bridge".to_string(), + name: "bedrock-bridge".to_string(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 0, + }), + r#type: "aws-bedrock".to_string(), + credentials: std::collections::HashMap::new(), + config: std::iter::once(( + "BEDROCK_BASE_URL".to_string(), + "http://bedrock-bridge.demo.svc.cluster.local:8080".to_string(), + )) + .collect(), + credential_expires_at_ms: std::collections::HashMap::new(), + }; + store + .put_message(&provider) + .await + .expect("provider should persist"); + + for unsafe_model in [ + "anthropic.claude/../../etc/passwd", + "back\\slash-id", + "model?injected=1", + "model#fragment", + "percent%2fencoded", + "model..v2", + " leading-space", + "trailing-space ", + "tab\there", + "newline\nhere", + ] { + let err = upsert_cluster_inference_route( + &store, + CLUSTER_INFERENCE_ROUTE_NAME, + "bedrock-bridge", + unsafe_model, + 0, + false, + ) + .await + .expect_err(unsafe_model); + assert_eq!( + err.code(), + tonic::Code::InvalidArgument, + "{unsafe_model:?} should fail with InvalidArgument" + ); + assert!( + err.message().contains("AWS Bedrock model_id"), + "error must name AWS Bedrock model_id for {unsafe_model:?}, got: {}", + err.message() + ); + } + } + #[tokio::test] async fn resolve_managed_route_returns_none_when_missing() { let store = test_store().await; @@ -2496,6 +2756,79 @@ mod tests { ); } + /// Bedrock model ids appear as a URL path segment in + /// `/model//invoke`. Mirrors the Vertex validation suite. + #[test] + fn validate_aws_bedrock_model_id_accepts_well_formed_ids() { + // Real Bedrock model ids: provider-prefixed, dotted, hyphenated, + // possibly versioned with `:0` suffix. + validate_aws_bedrock_model_id("anthropic.claude-opus-4-7").expect("dotted id"); + validate_aws_bedrock_model_id("anthropic.claude-3-5-sonnet-20241022-v2:0") + .expect("versioned id"); + validate_aws_bedrock_model_id("meta.llama3-70b-instruct-v1:0").expect("meta id"); + validate_aws_bedrock_model_id("mistral.mixtral-8x7b-instruct-v0:1").expect("mistral id"); + } + + #[test] + fn validate_aws_bedrock_model_id_rejects_empty() { + let err = validate_aws_bedrock_model_id("").expect_err("empty must be rejected"); + assert_eq!(err.code(), tonic::Code::InvalidArgument); + assert!(err.message().contains("required")); + } + + #[test] + fn validate_aws_bedrock_model_id_rejects_path_separators() { + for value in ["foo/bar", "anthropic.claude/../passwd", "back\\slash"] { + let err = validate_aws_bedrock_model_id(value).expect_err(value); + assert!( + err.message().contains("path separators"), + "expected path-separator error for {value:?}, got: {}", + err.message() + ); + } + } + + #[test] + fn validate_aws_bedrock_model_id_rejects_url_delimiters() { + for value in ["model?injected=1", "model#fragment", "percent%2fencoded"] { + let err = validate_aws_bedrock_model_id(value).expect_err(value); + assert!( + err.message().contains("URL delimiters"), + "expected URL-delimiter error for {value:?}, got: {}", + err.message() + ); + } + } + + #[test] + fn validate_aws_bedrock_model_id_rejects_traversal() { + let err = validate_aws_bedrock_model_id("model..v2") + .expect_err("double-dot traversal must be rejected"); + assert!( + err.message().contains("traversal"), + "expected path traversal error, got: {}", + err.message() + ); + } + + #[test] + fn validate_aws_bedrock_model_id_rejects_whitespace_and_control() { + for value in [ + " leading", + "trailing ", + "in middle", + "tab\tin", + "newline\nin", + ] { + let err = validate_aws_bedrock_model_id(value).expect_err(value); + assert!( + err.message().contains("whitespace") || err.message().contains("control"), + "expected whitespace/control error for {value:?}, got: {}", + err.message() + ); + } + } + #[test] fn effective_route_name_defaults_empty_to_inference_local() { assert_eq!( diff --git a/docs/sandboxes/inference-routing.mdx b/docs/sandboxes/inference-routing.mdx index 0a4e9d726..0a9280008 100644 --- a/docs/sandboxes/inference-routing.mdx +++ b/docs/sandboxes/inference-routing.mdx @@ -24,9 +24,9 @@ If code calls an external inference host directly, OpenShell evaluates that traf | Property | Detail | |---|---| | Credentials | No sandbox API keys needed. Credentials come from the configured provider record. The router strips caller-supplied `Authorization` before forwarding the request. | -| Header forwarding | `inference.local` forwards only a per-provider header allowlist. OpenAI routes allow `openai-organization` and `x-model-id`. Anthropic routes allow `anthropic-version` and `anthropic-beta`. Vertex Claude rawPredict routes strip `anthropic-beta` and do not forward `anthropic-version` as a header because the router injects `anthropic_version` into the Vertex request body. NVIDIA routes allow `x-model-id`. All other caller headers are stripped. | +| Header forwarding | `inference.local` forwards only a per-provider header allowlist. OpenAI routes allow `openai-organization` and `x-model-id`. Anthropic routes allow `anthropic-version` and `anthropic-beta`. Vertex Claude rawPredict routes strip `anthropic-beta` and do not forward `anthropic-version` as a header because the router injects `anthropic_version` into the Vertex request body. NVIDIA routes allow `x-model-id`. AWS Bedrock routes have no passthrough headers today. All other caller headers are stripped. | | Configuration | One provider and one model define sandbox inference for the active gateway. Every sandbox on that gateway sees the same `inference.local` backend. | -| Provider support | NVIDIA, Anthropic, Google Vertex AI, and any OpenAI-compatible provider all work through the same endpoint. Vertex routes Claude models through `/v1/messages` and non-Anthropic models through `/v1/chat/completions`. The gateway resolves the upstream Vertex host from the provider config, including regional, global, and supported multi-region endpoints. | +| Provider support | NVIDIA, Anthropic, Google Vertex AI, AWS Bedrock (via a translating bridge — direct AWS with SigV4 signing is a separate follow-up), and any OpenAI-compatible provider all work through the same endpoint. Vertex routes Claude models through `/v1/messages` and non-Anthropic models through `/v1/chat/completions`. The gateway resolves the upstream Vertex host from the provider config, including regional, global, and supported multi-region endpoints. | | Streaming reliability | The router tolerates idle gaps of up to 120 seconds between streamed chunks so long reasoning responses are not cut off mid-stream. | | Hot refresh | OpenShell picks up provider credential changes and inference updates without recreating sandboxes. Changes propagate within about 5 seconds by default. | @@ -54,6 +54,22 @@ Supported request patterns depend on the provider configured for `inference.loca |---|---|---| | Messages | `POST` | `/v1/messages` | + + + + +| Pattern | Method | Path | +|---|---|---| +| InvokeModel | `POST` | `/model/{modelId}/invoke` | + +The `{modelId}` segment is constrained to a single non-empty path segment to avoid path-traversal liabilities. `/model//invoke` and `/model/a/b/invoke` both no-match. + + +Today the `aws-bedrock` provider type is bridge-fronted only. The router does not inject any auth header on outbound requests; the configured `BEDROCK_BASE_URL` is expected to point at a translating bridge or Bedrock-compatible proxy whose own pod holds operator-side credentials. SigV4 signing for direct AWS Bedrock is deferred to a follow-up release. + +`InvokeModelWithResponseStream` is intentionally not advertised yet. The streaming path emits AWS event-stream framing, which our protocol-aware error path does not yet model; surfacing it without that work risks shipping responses the sandbox cannot interpret on failure. It will land alongside the streaming-error work in a follow-up. + + @@ -148,6 +164,35 @@ openshell provider create --name anthropic-prod --type anthropic --from-existing This reads `ANTHROPIC_API_KEY` from your environment. + + + + +```shell +openshell provider create \ + --name bedrock-bridge \ + --type aws-bedrock \ + --credential AWS_ACCESS_KEY_ID=unused-bridge-fronted-shape \ + --config BEDROCK_BASE_URL=http://your-bedrock-bridge.your-ns.svc.cluster.local:8080 +``` + +Then set the inference route, passing `--no-verify` because the validation probe does not yet support Bedrock protocols: + +```shell +openshell inference set \ + --provider bedrock-bridge \ + --model anthropic.claude-3-5-sonnet-20241022-v2:0 \ + --no-verify +``` + +**Why a placeholder credential?** `provider create` requires a non-empty `credentials` map even when the upstream auth scheme is `AuthHeader::None` — `aws-bedrock` falls into that bucket today because the router never injects a credential header on outbound requests; the bridge holds operator-side auth in its own pod. Any non-empty string value satisfies the structural requirement; `unused-bridge-fronted-shape` makes the intent obvious in `openshell provider get` output. The same pattern applies to any standalone-router profile that registers `AuthHeader::None`. When the SigV4 follow-up lands and the router begins signing requests itself, this becomes a real key. + +**About the bridge-fronted shape.** The router does not inject any auth header on outbound requests. Point `BEDROCK_BASE_URL` at a translating bridge or Bedrock-compatible proxy that handles authentication in its own pod. The bridge is expected to accept Bedrock InvokeModel requests on the patterns listed above and forward to the operator's real upstream. + +**About `--no-verify`.** The default validation probe does not yet recognize the `aws_bedrock_invoke` protocol, so without `--no-verify` the `inference set` call would fail before it could mint a route. The first sandbox round-trip is the real verification today. + +**For direct AWS Bedrock**, refer to a future release that adds the SigV4 router-side signer. Until then, a `BEDROCK_BASE_URL` is required at provider-create time — the core profile sets `default_base_url: ""`, so route resolution rejects providers without it rather than silently forwarding prompts to AWS with no usable auth. + diff --git a/docs/sandboxes/manage-providers.mdx b/docs/sandboxes/manage-providers.mdx index a6b9654d0..bc522e2ce 100644 --- a/docs/sandboxes/manage-providers.mdx +++ b/docs/sandboxes/manage-providers.mdx @@ -247,6 +247,7 @@ The following provider types are supported. | Type | Environment Variables Injected | Typical Use | |---|---|---| | `anthropic` | `ANTHROPIC_API_KEY` | Anthropic API | +| `aws-bedrock` | `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_SESSION_TOKEN`, `AWS_REGION` | AWS Bedrock InvokeModel via a translating bridge. Today the router does not inject any auth header; the configured `BEDROCK_BASE_URL` upstream is expected to handle auth itself. Refer to [Inference Routing](/sandboxes/inference-routing). | | `claude` | `ANTHROPIC_API_KEY`, `CLAUDE_API_KEY` | Claude Code, Anthropic API | | `codex` | `OPENAI_API_KEY` | OpenAI Codex | | `copilot` | `COPILOT_GITHUB_TOKEN`, `GH_TOKEN`, `GITHUB_TOKEN` | GitHub Copilot CLI | @@ -273,6 +274,7 @@ The following providers have been tested with `inference.local`. Any provider th | Provider | Name | Type | Base URL | API Key Variable | |---|---|---|---|---| +| AWS Bedrock (via bridge) | `bedrock-bridge` | `aws-bedrock` | Operator-supplied `BEDROCK_BASE_URL` | None at router level (bridge holds creds) | | NVIDIA API Catalog | `nvidia-prod` | `nvidia` | `https://integrate.api.nvidia.com/v1` | `NVIDIA_API_KEY` | | Anthropic | `anthropic-prod` | `anthropic` | `https://api.anthropic.com` | `ANTHROPIC_API_KEY` | | Google Vertex AI | `vertex-prod` | `google-vertex-ai` | Regional, global, or multi-region Vertex endpoint | `GOOGLE_VERTEX_AI_TOKEN` or `GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN` | diff --git a/providers/aws-bedrock.yaml b/providers/aws-bedrock.yaml new file mode 100644 index 000000000..d00967343 --- /dev/null +++ b/providers/aws-bedrock.yaml @@ -0,0 +1,58 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +id: aws-bedrock +display_name: AWS Bedrock +description: | + Anthropic + Mistral + Llama models served via the AWS Bedrock InvokeModel API. + + This profile supports a bridge-fronted shape only: operators point + `BEDROCK_BASE_URL` at a translating bridge or Bedrock-compatible + proxy that handles AWS auth in its own pod, and the bridge — not + this profile — is responsible for declaring its own egress endpoint + and binary attribution. As a result the profile is intentionally + non-egress-granting: `endpoints` and `binaries` are empty so that + selecting `aws-bedrock` does not implicitly punch a hole to AWS for + any sandbox. + + Direct AWS Bedrock with router-side SigV4 signing is a separate + follow-up; until that lands the AWS_* credentials below are + declarative schema only — none are required, none are + auto-discovered, and the router does not consume them. The SigV4 + follow-up will populate `endpoints` and `binaries` once the router + can sign outbound requests itself. +category: inference +inference_capable: true +credentials: + # Declarative-only until the SigV4 follow-up lands. None of these are + # required for the bridge-fronted shape; the router does not inject + # them on outbound requests. + - name: aws_access_key_id + description: AWS access key id (used by the SigV4 signer follow-up; unused today) + env_vars: [AWS_ACCESS_KEY_ID] + required: false + - name: aws_secret_access_key + description: AWS secret access key (used by the SigV4 signer follow-up; unused today) + env_vars: [AWS_SECRET_ACCESS_KEY] + required: false + - name: aws_session_token + description: Optional session token for temporary credentials (STS, IAM Roles for Service Accounts) + env_vars: [AWS_SESSION_TOKEN] + required: false + - name: aws_region + description: AWS region (used by the SigV4 signer follow-up; unused today) + env_vars: [AWS_REGION, AWS_DEFAULT_REGION] + required: false +discovery: + # Bridge-fronted routing intentionally does not consume AWS + # credentials, so `--from-existing` does not scan for them today. + # The SigV4 follow-up will repopulate this list. + credentials: [] +# Endpoints and binaries are intentionally empty: this profile does +# not grant direct egress to AWS Bedrock. Operators expose their own +# bridge service (and its egress endpoint / binary attribution) via +# their cluster network policies. The SigV4 follow-up will add the +# Bedrock runtime endpoint and Claude binaries once router-side +# signing exists. +endpoints: [] +binaries: []