背景
之前我们统计域名状态码、平均响应时间和流量的方法是:在每台机器添加一个定时脚本,来获取每个域名最近一分钟的访问日志到临时文件。然后zabbix再对这个一分钟日志临时文件作相关统计。一直运行良好,最近发现某台服务器突然负载增高。使用iotop查看发现获取最近一分钟日志的脚本占用的IO特别高。停止这个定时任务之后恢复正常。于是就打算使用nginx lua来替换目前的方法。新的方法具有统计时占用资源少,实时的特点。
方法介绍
使用nginx lua统计网站相关数据的方法为(我们以统计www.centos.bz 404状态码为例):
记录过程:
- 1、定义了一个共享词典access,获取当前时间戳,获取当前域名,如www.centos.bz;
- 2、我们定义用来存储状态码的词典key为,www.centos.bz-404-当前时间戳;
- 3、自增1 key(www.centos.bz-404-当前时间戳)的值;
- 4、循环2,3步。
查询过程:
提供一个接口,来累加key为www.centos.bz-404-(前60秒时间戳-当前时间戳)的值,返回结果。
方法实现
nginx.conf设置
- http {
- [...]
- lua_shared_dict access 10m;
- log_by_lua_file conf/log_acesss.lua;
- server {
- [...]
- location /domain_status {
- default_type text/plain;
- content_by_lua_file "conf/domain_status.lua";
- }
- [...]
- }
- [...]
- }
log_access.lua
- local access = ngx.shared.access
- local host = ngx.var.host
- local status = ngx.var.status
- local body_bytes_sent = ngx.var.body_bytes_sent
- local request_time = ngx.var.request_time
- local timestamp = os.date("%s")
- local expire_time = 70
-
- local status_key = table.concat({host,"-",status,"-",timestamp})
- local flow_key = table.concat({host,"-flow-",timestamp})
- local req_time_key = table.concat({host,"-reqt-",timestamp})
- local total_req_key = table.concat({host,"-total_req-",timestamp})
-
- -- count total req
- local total_req_sum = access:get(total_req_key) or 0
- total_req_sum = total_req_sum + 1
- access:set(total_req_key, total_req_sum, expire_time)
-
- -- count status
- local status_sum = access:get(status_key) or 0
- status_sum = status_sum + 1
- access:set(status_key, status_sum, expire_time)
-
- -- count flow
- local flow_sum = access:get(flow_key) or 0
- flow_sum = flow_sum + body_bytes_sent
- access:set(flow_key, flow_sum, expire_time)
-
- -- count request time
- local req_sum = access:get(req_time_key) or 0
- req_sum = req_sum + request_time
- access:set(req_time_key, req_sum, expire_time)
domain_status.lua
- local access = ngx.shared.access
- local args = ngx.req.get_uri_args()
- local count = args["count"]
- local host = args["host"]
- local status = args["status"]
- local one_minute_ago = tonumber(os.date("%s")) - 60
- local now = tonumber(os.date("%s"))
-
- local status_total = 0
- local flow_total = 0
- local reqt_total = 0
- local req_total = 0
-
- if not host then
- ngx.print("host arg not found.")
- ngx.exit(ngx.HTTP_OK)
- end
-
- if count == "status" and not status then
- ngx.print("status arg not found.")
- ngx.exit(ngx.HTTP_OK)
- end
-
- if not (count == "status" or count == "flow" or count == "reqt") then
- ngx.print("count arg invalid.")
- ngx.exit(ngx.HTTP_OK)
- end
-
- for second_num=one_minute_ago,now do
- local flow_key = table.concat({host,"-flow-",second_num})
- local req_time_key = table.concat({host,"-reqt-",second_num})
- local total_req_key = table.concat({host,"-total_req-",second_num})
-
- if count == "status" then
- local status_key = table.concat({host,"-",status,"-",second_num})
- local status_sum = access:get(status_key) or 0
- status_total = status_total + status_sum
- elseif count == "flow" then
- local flow_sum = access:get(flow_key) or 0
- flow_total = flow_total + flow_sum
- elseif count == "reqt" then
- local req_sum = access:get(total_req_key) or 0
- local req_time_sum = access:get(req_time_key) or 0
- reqt_total = reqt_total + req_time_sum
- req_total = req_total + req_sum
- end
- end
-
- if count == "status" then
- ngx.print(status_total)
- elseif count == "flow" then
- ngx.print(flow_total)
- elseif count == "reqt" then
- if req_total == 0 then
- reqt_avg = 0
- else
- reqt_avg = reqt_total/req_total
- end
- ngx.print(reqt_avg)
- end
使用说明
1、获取域名状态码
如请求www.centos.bz一分钟内404状态码数量
请求接口http://$host/domain_status?count=status&host=www.centos.bz&status=404
2、获取域名流量
请求接口http://$host/domain_status?count=flow&host=www.centos.bz
3、获取域名一分钟内平均响应时间
请求接口http://$host/domain_status?count=reqt&host=www.centos.bz