在 NextJs 中使用 Prometheus 暴露指标

时之世 发布于 2025-05-02 1268 次阅读 预计阅读时间: 4 分钟 最后更新于 2025-05-02 988 字 无~


我们都知道 NextJs 使用 Promethus 只能显示总路由的指标,无法显示其他具体路由的指标,如:/api/auth/home

这种情况,就需要我们自定义指标了

一. 安装依赖

npm install prom-client

二. 开放默认指标

需要我们编写/api/metrics 开放指标接口

import type { NextApiRequest, NextApiResponse } from 'next'
import { collectDefaultMetrics, register } from 'prom-client'

// 收集默认指标 (如 CPU 、内存)
collectDefaultMetrics({ register })

export default async function handler(
    req: NextApiRequest,
    res: NextApiResponse
) {
    res.setHeader('Content-Type', register.contentType)
    res.send(await register.metrics())
}

三. 编写自定义指标方法

// 自定义业务指标 (示例:HTTP 请求计数和耗时)
import { Histogram } from 'prom-client'

const httpRequestDurationMicroseconds = new Histogram({
    name: 'http_request_duration_seconds',
    help: 'HTTP 请求耗时 (秒)',
    labelNames: ['method', 'route', 'status'],
    buckets: [0.1, 0.5, 1, 2, 5]
})
export { httpRequestDurationMicroseconds }

四. 在 middleware.ts(中间件) 记录自定义指标

因为 Next@15 引入了 nodeJs 运行,而 Next@14 之前都是边缘运行 (Edge runtime) 。在 prom-client 的库里需要使用在 nodejs 环境中才有的库,所以不同版本需要使用不同方法

在 Next@15 中

修改中间件的运行 (runtime

export const config = {
    runtime: 'nodejs',
    matcher: [
        '/home/:path*',
        '/medicine/:path*',
        '/stock/:path*',
        '/login/:path*',
        '/api/:path*',
        '/auth/:path*',
        '/log/:path*',
        '/notification/:path*'
    ]
}

直接在 middleware 中间件 (runtime:nodejs) 中写

export async function middleware(req: NextRequest) {
    const start = Date.now()
    const res = NextResponse.next() 
    const duration = (Date.now() - start) / 1000
    httpRequestDurationMicroseconds.labels({
        method: req.method,
        route: req.nextUrl.pathname,
        status: res.status.toString()
    }).observe(duration)
    return res
}

在 Next@14 及以下的版本中

创建接口执行记录自定义指标的操作

// 接口 /api/httpHistogram
import type { NextApiRequest, NextApiResponse } from 'next'

import { httpRequestDurationMicroseconds } from '@/tools/httpRequestDurationMicroseconds'
import ResponseService from '@/tools/res'

export default async function handler(
    req: NextApiRequest,
    res: NextApiResponse
) {
    const { method, route, status, duration } = req.body
    httpRequestDurationMicroseconds.labels({
        method: method,
        route: route,
        status: status
    }).observe(duration)
    return ResponseService.success(res,'Successfully recorded HTTP request duration',200)
}

middleware 中间件执行请求

if (
        req.nextUrl.pathname !== '/api/httpHistogram' && //记录自定义指标
        req.nextUrl.pathname !== '/api/metrics' //暴露指标的接口
    ) {
        await MiddleAxios({
            url: url + '/api/httpHistogram',
            map: 'post',
            data: {
                method: req.method,
                route: req.nextUrl.pathname,
                status: res.status.toString(),
                duration: duration
            }
        })
    }

完成

可以在/api/metrics看到开放的指标

# HELP process_cpu_user_seconds_total Total user CPU time spent in seconds.
# TYPE process_cpu_user_seconds_total counter
process_cpu_user_seconds_total 0.922

# HELP process_cpu_system_seconds_total Total system CPU time spent in seconds.
# TYPE process_cpu_system_seconds_total counter
process_cpu_system_seconds_total 1.25

# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
# TYPE process_cpu_seconds_total counter
process_cpu_seconds_total 2.172

# HELP process_start_time_seconds Start time of the process since unix epoch in seconds.
# TYPE process_start_time_seconds gauge
process_start_time_seconds 1746197011
...

# HELP nodejs_version_info Node.js version info.
# TYPE nodejs_version_info gauge
nodejs_version_info{version="v21.0.0",major="21",minor="0",patch="0"} 1

# HELP nodejs_gc_duration_seconds Garbage collection duration by kind, one of major, minor, incremental or weakcb.
# TYPE nodejs_gc_duration_seconds histogram
nodejs_gc_duration_seconds_bucket{le="0.001",kind="incremental"} 2
nodejs_gc_duration_seconds_bucket{le="0.01",kind="incremental"} 2
nodejs_gc_duration_seconds_bucket{le="0.1",kind="incremental"} 2
...

//查看接口后会出现自定义指标
# HELP http_request_duration_seconds HTTP 请求耗时 (秒)
# TYPE http_request_duration_seconds histogram
http_request_duration_seconds_bucket{le="0.1",method="GET",route="/api/jwt",status="200"} 7
http_request_duration_seconds_bucket{le="0.5",method="GET",route="/api/jwt",status="200"} 7
http_request_duration_seconds_bucket{le="1",method="GET",route="/api/jwt",status="200"} 7
http_request_duration_seconds_bucket{le="2",method="GET",route="/api/jwt",status="200"} 7
http_request_duration_seconds_bucket{le="5",method="GET",route="/api/jwt",status="200"} 7
http_request_duration_seconds_bucket{le="+Inf",method="GET",route="/api/jwt",status="200"} 7
http_request_duration_seconds_sum{method="GET",route="/api/jwt",status="200"} 0.008
http_request_duration_seconds_count{method="GET",route="/api/jwt",status="200"} 7
http_request_duration_seconds_bucket{le="0.1",method="GET",route="/api/redis",status="200"} 7
http_request_duration_seconds_bucket{le="0.5",method="GET",route="/api/redis",status="200"} 7
http_request_duration_seconds_bucket{le="1",method="GET",route="/api/redis",status="200"} 7
http_request_duration_seconds_bucket{le="2",method="GET",route="/api/redis",status="200"} 7
http_request_duration_seconds_bucket{le="5",method="GET",route="/api/redis",status="200"} 7
http_request_duration_seconds_bucket{le="+Inf",method="GET",route="/api/redis",status="200"} 7
http_request_duration_seconds_sum{method="GET",route="/api/redis",status="200"} 0.009000000000000001
http_request_duration_seconds_count{method="GET",route="/api/redis",status="200"} 7
http_request_duration_seconds_bucket{le="0.1",method="GET",route="/medicine/medicine",status="200"} 0
http_request_duration_seconds_bucket{le="0.5",method="GET",route="/medicine/medicine",status="200"} 0