Skip to content

Commit c071198

Browse files
authored
[router] add centralized configuration module for sgl-router (sgl-project#7588)
1 parent d7374d7 commit c071198

File tree

5 files changed

+916
-60
lines changed

5 files changed

+916
-60
lines changed

sgl-router/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ metrics-exporter-prometheus = "0.17.0"
3636
# Added for request tracing
3737
uuid = { version = "1.10", features = ["v4", "serde"] }
3838
thiserror = "2.0.12"
39+
url = "2.5.4"
3940

4041
[dev-dependencies]
4142
criterion = { version = "0.5", features = ["html_reports"] }

sgl-router/src/config/mod.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
pub mod types;
2+
pub mod validation;
3+
4+
pub use types::*;
5+
pub use validation::*;
6+
7+
/// Configuration errors
8+
#[derive(Debug, thiserror::Error)]
9+
pub enum ConfigError {
10+
#[error("Validation failed: {reason}")]
11+
ValidationFailed { reason: String },
12+
13+
#[error("Invalid value for field '{field}': {value} - {reason}")]
14+
InvalidValue {
15+
field: String,
16+
value: String,
17+
reason: String,
18+
},
19+
20+
#[error("Incompatible configuration: {reason}")]
21+
IncompatibleConfig { reason: String },
22+
23+
#[error("Missing required field: {field}")]
24+
MissingRequired { field: String },
25+
}
26+
27+
/// Result type for configuration operations
28+
pub type ConfigResult<T> = Result<T, ConfigError>;

sgl-router/src/config/types.rs

Lines changed: 298 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,298 @@
1+
use super::{ConfigError, ConfigResult};
2+
use serde::{Deserialize, Serialize};
3+
use std::collections::HashMap;
4+
5+
/// Main router configuration
6+
#[derive(Debug, Clone, Serialize, Deserialize)]
7+
pub struct RouterConfig {
8+
/// Routing mode configuration
9+
pub mode: RoutingMode,
10+
/// Policy configuration
11+
pub policy: PolicyConfig,
12+
/// Server host address
13+
pub host: String,
14+
/// Server port
15+
pub port: u16,
16+
/// Maximum payload size in bytes
17+
pub max_payload_size: usize,
18+
/// Request timeout in seconds
19+
pub request_timeout_secs: u64,
20+
/// Worker startup timeout in seconds
21+
pub worker_startup_timeout_secs: u64,
22+
/// Worker health check interval in seconds
23+
pub worker_startup_check_interval_secs: u64,
24+
/// Service discovery configuration (optional)
25+
pub discovery: Option<DiscoveryConfig>,
26+
/// Metrics configuration (optional)
27+
pub metrics: Option<MetricsConfig>,
28+
/// Log directory (None = stdout only)
29+
pub log_dir: Option<String>,
30+
/// Verbose logging
31+
pub verbose: bool,
32+
}
33+
34+
/// Routing mode configuration
35+
#[derive(Debug, Clone, Serialize, Deserialize)]
36+
#[serde(tag = "type")]
37+
pub enum RoutingMode {
38+
#[serde(rename = "regular")]
39+
Regular {
40+
/// List of worker URLs
41+
worker_urls: Vec<String>,
42+
},
43+
#[serde(rename = "prefill_decode")]
44+
PrefillDecode {
45+
/// Prefill worker URLs with optional bootstrap ports
46+
prefill_urls: Vec<(String, Option<u16>)>,
47+
/// Decode worker URLs
48+
decode_urls: Vec<String>,
49+
},
50+
}
51+
52+
impl RoutingMode {
53+
pub fn is_pd_mode(&self) -> bool {
54+
matches!(self, RoutingMode::PrefillDecode { .. })
55+
}
56+
57+
pub fn worker_count(&self) -> usize {
58+
match self {
59+
RoutingMode::Regular { worker_urls } => worker_urls.len(),
60+
RoutingMode::PrefillDecode {
61+
prefill_urls,
62+
decode_urls,
63+
} => prefill_urls.len() + decode_urls.len(),
64+
}
65+
}
66+
}
67+
68+
/// Policy configuration for routing
69+
#[derive(Debug, Clone, Serialize, Deserialize)]
70+
#[serde(tag = "type")]
71+
pub enum PolicyConfig {
72+
#[serde(rename = "random")]
73+
Random,
74+
75+
#[serde(rename = "round_robin")]
76+
RoundRobin,
77+
78+
#[serde(rename = "cache_aware")]
79+
CacheAware {
80+
/// Minimum prefix match ratio to use cache-based routing
81+
cache_threshold: f32,
82+
/// Absolute load difference threshold for load balancing
83+
balance_abs_threshold: usize,
84+
/// Relative load ratio threshold for load balancing
85+
balance_rel_threshold: f32,
86+
/// Interval between cache eviction cycles (seconds)
87+
eviction_interval_secs: u64,
88+
/// Maximum cache tree size per tenant
89+
max_tree_size: usize,
90+
},
91+
92+
#[serde(rename = "power_of_two")]
93+
PowerOfTwo {
94+
/// Interval for load monitoring (seconds)
95+
load_check_interval_secs: u64,
96+
},
97+
}
98+
99+
impl PolicyConfig {
100+
pub fn name(&self) -> &'static str {
101+
match self {
102+
PolicyConfig::Random => "random",
103+
PolicyConfig::RoundRobin => "round_robin",
104+
PolicyConfig::CacheAware { .. } => "cache_aware",
105+
PolicyConfig::PowerOfTwo { .. } => "power_of_two",
106+
}
107+
}
108+
}
109+
110+
/// Service discovery configuration
111+
#[derive(Debug, Clone, Serialize, Deserialize)]
112+
pub struct DiscoveryConfig {
113+
/// Enable service discovery
114+
pub enabled: bool,
115+
/// Kubernetes namespace (None = all namespaces)
116+
pub namespace: Option<String>,
117+
/// Service discovery port
118+
pub port: u16,
119+
/// Check interval for service discovery
120+
pub check_interval_secs: u64,
121+
/// Regular mode selector
122+
pub selector: HashMap<String, String>,
123+
/// PD mode prefill selector
124+
pub prefill_selector: HashMap<String, String>,
125+
/// PD mode decode selector
126+
pub decode_selector: HashMap<String, String>,
127+
/// Bootstrap port annotation key
128+
pub bootstrap_port_annotation: String,
129+
}
130+
131+
impl Default for DiscoveryConfig {
132+
fn default() -> Self {
133+
Self {
134+
enabled: false,
135+
namespace: None,
136+
port: 8000,
137+
check_interval_secs: 60,
138+
selector: HashMap::new(),
139+
prefill_selector: HashMap::new(),
140+
decode_selector: HashMap::new(),
141+
bootstrap_port_annotation: "sglang.ai/bootstrap-port".to_string(),
142+
}
143+
}
144+
}
145+
146+
/// Metrics configuration
147+
#[derive(Debug, Clone, Serialize, Deserialize)]
148+
pub struct MetricsConfig {
149+
/// Prometheus metrics port
150+
pub port: u16,
151+
/// Prometheus metrics host
152+
pub host: String,
153+
}
154+
155+
impl Default for MetricsConfig {
156+
fn default() -> Self {
157+
Self {
158+
port: 29000,
159+
host: "127.0.0.1".to_string(),
160+
}
161+
}
162+
}
163+
164+
impl Default for RouterConfig {
165+
fn default() -> Self {
166+
Self {
167+
mode: RoutingMode::Regular {
168+
worker_urls: vec![],
169+
},
170+
policy: PolicyConfig::Random,
171+
host: "127.0.0.1".to_string(),
172+
port: 3001,
173+
max_payload_size: 268_435_456, // 256MB
174+
request_timeout_secs: 600,
175+
worker_startup_timeout_secs: 300,
176+
worker_startup_check_interval_secs: 10,
177+
discovery: None,
178+
metrics: None,
179+
log_dir: None,
180+
verbose: false,
181+
}
182+
}
183+
}
184+
185+
impl RouterConfig {
186+
/// Create a new configuration with mode and policy
187+
pub fn new(mode: RoutingMode, policy: PolicyConfig) -> Self {
188+
Self {
189+
mode,
190+
policy,
191+
..Default::default()
192+
}
193+
}
194+
195+
/// Validate the configuration
196+
pub fn validate(&self) -> ConfigResult<()> {
197+
crate::config::validation::ConfigValidator::validate(self)
198+
}
199+
200+
/// Get the routing mode type as a string
201+
pub fn mode_type(&self) -> &'static str {
202+
match self.mode {
203+
RoutingMode::Regular { .. } => "regular",
204+
RoutingMode::PrefillDecode { .. } => "prefill_decode",
205+
}
206+
}
207+
208+
/// Check if service discovery is enabled
209+
pub fn has_service_discovery(&self) -> bool {
210+
self.discovery.as_ref().map_or(false, |d| d.enabled)
211+
}
212+
213+
/// Check if metrics are enabled
214+
pub fn has_metrics(&self) -> bool {
215+
self.metrics.is_some()
216+
}
217+
218+
/// Convert to routing PolicyConfig for internal use
219+
pub fn to_routing_policy_config(&self) -> ConfigResult<crate::router::PolicyConfig> {
220+
match (&self.mode, &self.policy) {
221+
(
222+
RoutingMode::PrefillDecode {
223+
prefill_urls,
224+
decode_urls,
225+
},
226+
policy,
227+
) => {
228+
// Map policy to PDSelectionPolicy
229+
let selection_policy = match policy {
230+
PolicyConfig::Random => crate::pd_types::PDSelectionPolicy::Random,
231+
PolicyConfig::PowerOfTwo { .. } => {
232+
crate::pd_types::PDSelectionPolicy::PowerOfTwo
233+
}
234+
PolicyConfig::CacheAware {
235+
cache_threshold,
236+
balance_abs_threshold,
237+
balance_rel_threshold,
238+
..
239+
} => crate::pd_types::PDSelectionPolicy::CacheAware {
240+
cache_threshold: *cache_threshold,
241+
balance_abs_threshold: *balance_abs_threshold,
242+
balance_rel_threshold: *balance_rel_threshold,
243+
},
244+
PolicyConfig::RoundRobin => {
245+
return Err(ConfigError::IncompatibleConfig {
246+
reason: "RoundRobin policy is not supported in PD disaggregated mode"
247+
.to_string(),
248+
});
249+
}
250+
};
251+
252+
Ok(crate::router::PolicyConfig::PrefillDecodeConfig {
253+
selection_policy,
254+
prefill_urls: prefill_urls.clone(),
255+
decode_urls: decode_urls.clone(),
256+
timeout_secs: self.worker_startup_timeout_secs,
257+
interval_secs: self.worker_startup_check_interval_secs,
258+
})
259+
}
260+
(RoutingMode::Regular { .. }, PolicyConfig::Random) => {
261+
Ok(crate::router::PolicyConfig::RandomConfig {
262+
timeout_secs: self.worker_startup_timeout_secs,
263+
interval_secs: self.worker_startup_check_interval_secs,
264+
})
265+
}
266+
(RoutingMode::Regular { .. }, PolicyConfig::RoundRobin) => {
267+
Ok(crate::router::PolicyConfig::RoundRobinConfig {
268+
timeout_secs: self.worker_startup_timeout_secs,
269+
interval_secs: self.worker_startup_check_interval_secs,
270+
})
271+
}
272+
(
273+
RoutingMode::Regular { .. },
274+
PolicyConfig::CacheAware {
275+
cache_threshold,
276+
balance_abs_threshold,
277+
balance_rel_threshold,
278+
eviction_interval_secs,
279+
max_tree_size,
280+
},
281+
) => Ok(crate::router::PolicyConfig::CacheAwareConfig {
282+
cache_threshold: *cache_threshold,
283+
balance_abs_threshold: *balance_abs_threshold,
284+
balance_rel_threshold: *balance_rel_threshold,
285+
eviction_interval_secs: *eviction_interval_secs,
286+
max_tree_size: *max_tree_size,
287+
timeout_secs: self.worker_startup_timeout_secs,
288+
interval_secs: self.worker_startup_check_interval_secs,
289+
}),
290+
(RoutingMode::Regular { .. }, PolicyConfig::PowerOfTwo { .. }) => {
291+
Err(ConfigError::IncompatibleConfig {
292+
reason: "PowerOfTwo policy is only supported in PD disaggregated mode"
293+
.to_string(),
294+
})
295+
}
296+
}
297+
}
298+
}

0 commit comments

Comments
 (0)