Skip to content

Commit

Permalink
balancer: add prometheus metrics (#1109)
Browse files Browse the repository at this point in the history
* prometheus metrics

fixes #1094
in progress

* finish metrics endpoint

* add metrics to balancer fly config

* fix lints

* fix lints

---------

Co-authored-by: Carson McManus <carson.mcmanus1@gmail.com>
  • Loading branch information
cjrkoa and dyc3 authored Nov 11, 2023
1 parent 2037207 commit 0b88d27
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 1 deletion.
22 changes: 22 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ once_cell = "1.17.1"
ott-common = { path = "crates/ott-common" }
ott-balancer-protocol = { path = "crates/ott-balancer-protocol" }
pin-project = "1.0.12"
prometheus = "0.13.3"
rand = "0.8.5"
reqwest = { version = "0.11.17", features = ["json", "stream", "rustls-tls"] }
serde = { version = "1", features = ["derive", "rc"] }
Expand Down
1 change: 1 addition & 0 deletions crates/ott-balancer-bin/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,5 @@ ott-balancer-protocol.workspace = true
route-recognizer = "0.3.1"
once_cell.workspace = true
pin-project.workspace = true
prometheus.workspace = true
trust-dns-resolver = { version = "0.22.0", features = ["system-config"] }
39 changes: 38 additions & 1 deletion crates/ott-balancer-bin/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use once_cell::sync::Lazy;
use ott_balancer_protocol::monolith::{RoomMetadata, Visibility};
use ott_balancer_protocol::RoomName;
use ott_common::websocket::{is_websocket_upgrade, upgrade};
use prometheus::{register_int_gauge, Encoder, IntGauge, TextEncoder};
use reqwest::Url;
use route_recognizer::Router;
use tokio::sync::RwLock;
Expand Down Expand Up @@ -52,6 +53,8 @@ impl Service<Request<IncomingBody>> for BalancerService {
type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send>>;

fn call(&self, req: Request<hyper::body::Incoming>) -> Self::Future {
COUNTER_HTTP_REQUESTS.inc();

fn mk_response(s: String) -> anyhow::Result<Response<Full<Bytes>>, hyper::Error> {
Ok(Response::builder().body(Full::new(Bytes::from(s))).unwrap())
}
Expand Down Expand Up @@ -83,7 +86,23 @@ impl Service<Request<IncomingBody>> for BalancerService {
.join("\n");
mk_response(rendered)
}
"metrics" => mk_response("TODO: prometheus metrics".to_owned()),
"metrics" => {
let bytes = match gather_metrics() {
Ok(bytes) => bytes,
Err(e) => {
error!("error gathering metrics: {}", e);
return Ok(Response::builder()
.status(StatusCode::INTERNAL_SERVER_ERROR)
.body(Full::new(Bytes::from(format!(
"error gathering metrics: {}",
e
))))
.unwrap());
}
};

Ok(Response::builder().body(Full::new(bytes)).unwrap())
}
"room" => {
let Some(room_name) = route.params().find("room_name") else {
return Ok(not_found());
Expand All @@ -97,9 +116,11 @@ impl Service<Request<IncomingBody>> for BalancerService {
// Spawn a task to handle the websocket connection.
let _ = tokio::task::Builder::new().name("client connection").spawn(
async move {
GUAGE_CLIENTS.inc();
if let Err(e) = client_entry(room_name, websocket, link).await {
error!("Error in websocket connection: {}", e);
}
GUAGE_CLIENTS.dec();
},
);

Expand Down Expand Up @@ -244,6 +265,14 @@ async fn list_rooms(ctx: Arc<RwLock<BalancerContext>>) -> anyhow::Result<Respons
Ok(builder.body(Full::new(body.into())).unwrap())
}

fn gather_metrics() -> anyhow::Result<Bytes> {
let mut buffer = vec![];
let encoder = TextEncoder::new();
let metric_families = prometheus::gather();
encoder.encode(&metric_families, &mut buffer)?;
Ok(Bytes::from(buffer))
}

#[cfg(test)]
mod test {
use super::*;
Expand Down Expand Up @@ -298,3 +327,11 @@ mod test {
}
}
}

static GUAGE_CLIENTS: Lazy<IntGauge> = Lazy::new(|| {
register_int_gauge!("balancer_clients", "Number of connected websocket clients").unwrap()
});

static COUNTER_HTTP_REQUESTS: Lazy<IntGauge> = Lazy::new(|| {
register_int_gauge!("balancer_http_requests", "Number of HTTP requests received").unwrap()
});
4 changes: 4 additions & 0 deletions deploy/fly.staging.balancer.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,7 @@ interval = "30s"
method = "GET"
timeout = "10s"
path = "/api/status"

[metrics]
port = 8081
path = "/api/status/metrics"

0 comments on commit 0b88d27

Please sign in to comment.