在上文我们介绍了三种获取进程cpu使用率的方法,本文介绍使用openresty来获取所有nginx worker的cpu使用率,然后提供一个接口来输出cpu使用率。由于收集cpu使用率需要获取两次,两次之间需要等待一些时间,为了保证此接口的性能,决定不采用接口实时统计,采用后台定时统计,然后接口查询其数据就行。
所有步骤思路为:
http {
[...]
lua_shared_dict dict 10m;
init_worker_by_lua_block {
-- 获取所有worker pid到字典
local worker_pid = ngx.worker.pid()
local worker_id = ngx.worker.id()
ngx.shared.dict:set(worker_id,worker_pid)
-- 统计cpu使用率函数
local function count_cpu_usage(premature)
-- 首次获取cpu时间
local worker_cpu_total1 = 0
local cpu_total1 = 0
local worker_count = ngx.worker.count()
for i=0, worker_count - 1 do
local worker_pid = ngx.shared.dict:get(i)
local fp = io.open("/proc/"..worker_pid.."/stat","r")
local data = fp:read("*all")
fp:close()
local res, err = ngx.re.match(data, "(.*? ){13}(.*?) (.*?) ", "jio")
worker_cpu = res[2] + res[3]
worker_cpu_total1 = worker_cpu_total1 + worker_cpu
end
local fp = io.open("/proc/stat","r")
local cpu_line = fp:read()
fp:close()
local iterator, err = ngx.re.gmatch(cpu_line,"(\\d+)")
while true do
local m, err = iterator()
if not m then
break
end
cpu_total1 = cpu_total1 + m[0]
end
-- 第二次获取cpu时间
ngx.sleep(0.5)
local worker_cpu_total2 = 0
local cpu_total2 = 0
for i=0, worker_count -1 do
local worker_pid = ngx.shared.dict:get(i)
local fp = io.open("/proc/"..worker_pid.."/stat","r")
local data = fp:read("*all")
fp:close()
local res, err = ngx.re.match(data, "(.*? ){13}(.*?) (.*?) ", "jio")
worker_cpu = res[2] + res[3]
worker_cpu_total2 = worker_cpu_total2 + worker_cpu
end
local fp = io.open("/proc/stat","r")
local cpu_line = fp:read()
fp:close()
local iterator, err = ngx.re.gmatch(cpu_line,"(\\d+)")
while true do
local m, err = iterator()
if not m then
break
end
cpu_total2 = cpu_total2 + m[0]
end
-- 获取cpu核心数
local cpu_core = 0
local fp = io.open("/proc/cpuinfo")
local data = fp:read("*all")
fp:close()
local iterator, err = ngx.re.gmatch(data, "processor","jio")
while true do
local m, err = iterator()
if not m then
break
end
cpu_core = cpu_core + 1
end
-- 计算出cpu时间
local nginx_workers_cpu_time = ((worker_cpu_total2 - worker_cpu_total1) / (cpu_total2 - cpu_total1)) * 100*cpu_core
nginx_workers_cpu_time = string.format("%d", nginx_workers_cpu_time)
ngx.shared.dict:set("nginx_workers_cpu_time",nginx_workers_cpu_time)
end
-- 定时任务
local function count_cpu_usage_timed_job()
-- 定义间隔执行时间
local delay = 2
local count
count = function(premature)
if not premature then
local ok, err = pcall(count_cpu_usage, premature)
if not ok then
log(ERR, "count cpu usage error:",err)
end
local ok, err = ngx.timer.at(delay, count)
if not ok then
return
end
end
end
local ok, err = ngx.timer.at(delay, count)
if not ok then
return
end
end
-- 执行定时任务
count_cpu_usage_timed_job()
}
[...]
}
location /cpu {
content_by_lua_block {
local nginx_workers_cpu_time = ngx.shared.dict:get(nginx_workers_cpu_time)
ngx.header.content_type = 'text/plain'
ngx.say("nginx_workers_cpu_time")
}
}