Skip to main content

ELK日志告警

1、ES单节点安装

初始化配置

echo 262144 > /proc/sys/vm/max_map_count
sysctl -w vm.max_map_count=262144
sysctl -p

创建持久目录

mkdir -p   /mydata/elasticsearch/config
mkdir -p /mydata/elasticsearch/data
mkdir -p /mydata/elasticsearch/plugins
chmod 777 /mydata/elasticsearch/ -R

修改配置

echo "http.host: 0.0.0.0" >> /mydata/elasticsearch/config/elasticsearch.yml

服务启动


docker run --name elasticsearch -p 9200:9200 -p 9300:9300 \
-e "discovery.type=single-node" -e ES_JAVA_OPTS="-Xms64m -Xmx128m" \
-v /mydata/elasticsearch/config/:/usr/share/elasticsearch/config/ \
-v /mydata/elasticsearch/data:/usr/share/elasticsearch/data \
-v /mydata/elasticsearch/plugins:/usr/share/elasticsearch/plugins -d elasticsearch:7.6.2

2、Elsticalert 告警

2.1、elsticalert手动安装方式

yum -y install wget openssl openssl-devel gcc gcc-c++
yum -y install python3 git python3-devel
pip3 install --upgrade pip
yum -y install python-pip python-devel


pip3 install elastalert

2.2.3 config.yaml 配置文件

[root@110 elastalert-master]# cat config.yaml
# 用来加载rule的目录,默认是example_rules
rules_folder: rules
# 用来设置定时向elasticsearch发送请求,也就是告警执行的频率
run_every:
seconds: 15
# 用来设置请求里时间字段的范围
buffer_time:
seconds: 15
es_host: 10.51.8.112
es_port: 9200
writeback_index: elastalert_status
writeback_alias: elastalert_alerts
# 失败重试的时间限制
alert_time_limit:
days: 2

rules规则文件,rules中可以添加多个规则文件,但是每个规则

[root@110 elastalert-master]# cat rules/error_alert_rule.yaml 
es_host: 10.51.8.112
es_port: 9200
name: ErrorAlertRule_elk
type: frequency
index: nginx_access*
num_events: 1
timeframe:
minutes: 5
filter:
- query_string:
query: "tags:error_alert"
alert:
- "elastalert_modules.dingtalk_alert.DingTalkAlerter"
- "email"
email:
- "409003604@qq.com"
smtp_host: smtp.qq.com
smtp_port: 25
smtp_auth_file: /opt/elastalert-master/email_auth.yaml
from_addr: 409003604@qq.com
dingtalk_webhook: "https://oapi.dingtalk.com/robot/send?access_token=1fa27134e7f6cd127f70ec34093a86cc4b833f421743581c561d8c3245ca25f7"
dingtalk_msgtype: "text"
alert_text: "
【告警项】 Error_keywords_alert_elk \n
【告警时间】 {} \n
【message】 \n
{}\n
【日志路劲】 {} \n
【服务器】 {}
"
alert_text_type: alert_text_only
alert_text_args:
- "@timestamp"
- message
- log.file.path
- agent.hostname
match_enhancements:
- "elastalert.enhancements.TimeEnhancement"
dingtalk_isAtAll: true

docker push zhanglaiqiang/elastalert:v1.0

2.2、通过容器部署方式

创建dockerfiele文件

[root@110 elk]# cat Dockerfile  
FROM python:3.6.15
ADD elastalert-master.tar.gz /opt/
RUN cd /opt/elastalert-master && \
python3 -m pip install --upgrade pip && \
pip install "setuptools>=11.3" -i https://mirrors.aliyun.com/pypi/simple/ && \
pip install "setuptools_rust" -i https://mirrors.aliyun.com/pypi/simple/ && \
pip3 install "elasticsearch>=7.0.0,<8.0.0" &&\
pip3 install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ && \
python3 setup.py install && \
ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo "Asia/Shanghai" > /etc/timezone
WORKDIR /opt/elastalert-master
CMD ["python", "-m", "elastalert.elastalert","--config", "config.yaml"]
docker pull  zhanglaiqiang/elastalert:v1.0
#这个镜像已经包含了所有的配置文件

再本地部署的方式

mkdir  /data/project/
docker run --rm --name elastalert-to-dingding zhanglaiqiang/elastalert:v1.0
cd /data/project/
docker cp elastalert-to-dingding:/opt/elastalert-master .
#删除elastalert-to-dingding ctrl -C
docker run -d -v ./elastalert-master:/opt/elastalert-master --name elastalert-to-dingding registry.cn-hangzhou.aliyuncs.com/zlq_registry/elk-dingtalk:v1.2

**如下配置没使用,只作介绍 **web

2.3、清理elastalert相关状态信息

需要清理es中中索引的文档数据,方式如下:


清理 elastalert_status_status
POST elastalert_status_status/_delete_by_query
{
"query": {
"match_all": {}
}
}

清理elastalert_status
POST elastalert_status/_delete_by_query
{
"query": {
"match_all": {}
}
}

清理elastalert_status_silence
POST elastalert_status_silence/_delete_by_query
{
"query": {
"match_all": {}
}
}

3、kibana安装

版本kibana-7.6.2
配置文件
[root@demo112 kibana-7.6.2-linux-x86_64]# cat config/kibana.yml | grep -Ev '#|^$'
server.port: 5601
server.host: "10.51.8.112"
elasticsearch.hosts: ["http://10.51.8.112:9200"]

启动
[root@demo112 kibana-7.6.2-linux-x86_64]# ./bin/kibana --allow-root
访问地址: x.x.x.x:5601

4、Esheader

esheader download https://github.com/qax-os/ElasticHD/releases/download/1.4/elasticHD_linux_amd64.zip

yum install xdg-utils exec ./ElasticHD -p 0.0.0.0:9800

测试

curl -X POST "http://elastic:passwd@127.0.0.1:9200/test-alert/test" -H 'Content-Type: application/json' -d '{"@timestamp": "'$(date --iso-8601=seconds)'", "field": "value"}'



{"_index":"test-alert","_type":"test","_id":"9y2NJYoB8Mxxn1vtZDO2","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":0,"_primary_term":1}[root@demo112 elastalert]#

es跟其他组件版本对应 https://blog.csdn.net/vtopqx/article/details/105410814

ElasticsearchKibanaX-PackBeats^*Logstash^*ES-Hadoop (jar)*APM ServerApp Search
5.0.x5.0.x5.0.x1.3.x-5.6.x2.4.x-5.6.x5.0.x-5.6.x
5.1.x5.1.x5.1.x1.3.x-5.6.x2.4.x-5.6.x5.0.x-5.6.x
5.2.x5.2.x5.2.x1.3.x-5.6.x2.4.x-5.6.x5.0.x-5.6.x
5.3.x5.3.x5.3.x1.3.x-5.6.x2.4.x-5.6.x5.0.x-5.6.x
5.4.x5.4.x5.4.x1.3.x-5.6.x2.4.x-5.6.x5.0.x-5.6.x
5.5.x5.5.x5.5.x1.3.x-5.6.x2.4.x-5.6.x5.0.x-5.6.x
5.6.x5.6.x5.6.x1.3.x-6.0.x2.4.x-6.0.x5.0.x-6.0.x
6.0.x6.0.x6.0.x5.6.x-6.8.x5.6.x-6.8.x6.0.x-6.8.x
6.1.x6.1.x6.1.x5.6.x-6.8.x5.6.x-6.8.x6.0.x-6.8.x
6.2.x6.2.x6.2.x5.6.x-6.8.x5.6.x-6.8.x6.0.x-6.8.x6.2.x-6.8.x
6.3.x6.3.xN/A**5.6.x-6.8.x5.6.x-6.8.x6.0.x-6.8.x6.2.x-6.8.x
6.4.x6.4.xN/A**5.6.x-6.8.x5.6.x-6.8.x6.0.x-6.8.x6.2.x-6.8.x
6.5.x6.5.xN/A**5.6.x-6.8.x5.6.x-6.8.x6.0.x-6.8.x6.2.x-6.8.x
6.6.x6.6.xN/A**5.6.x-6.8.x5.6.x-6.8.x6.0.x-6.8.x6.2.x-6.8.x
6.7.x6.7.xN/A**5.6.x-6.8.x5.6.x-6.8.x6.0.x-6.8.x6.2.x-6.8.x
6.8.x6.8.xN/A**5.6.x-6.8.x5.6.x-6.8.x6.0.x-6.8.x6.2.x-6.8.x
7.0.x7.0.xN/A**6.8.x-7.6.x6.8.x-7.6.x6.8.x-7.6.x7.0.x-7.6.x***
7.1.x7.1.xN/A**6.8.x-7.6.x6.8.x-7.6.x6.8.x-7.6.x7.0.x-7.6.x***
7.2.x7.2.xN/A**6.8.x-7.6.x6.8.x-7.6.x6.8.x-7.6.x7.0.x-7.6.x***7.2.x
7.3.x7.3.xN/A**6.8.x-7.6.x6.8.x-7.6.x6.8.x-7.6.x7.0.x-7.6.x***7.3.x
7.4.x7.4.xN/A**6.8.x-7.6.x6.8.x-7.6.x6.8.x-7.6.x7.0.x-7.6.x***7.4.x
7.5.x7.5.xN/A**6.8.x-7.6.x6.8.x-7.6.x6.8.x-7.6.x7.0.x-7.6.x***7.5.x
7.6.x7.6.xN/A**6.8.x-7.6.x6.8.x-7.6.x6.8.x-7.6.x7.0.x-7.6.x***7.6.x

参考文档: https://blog.csdn.net/qq_49296785/article/details/108657758

nginx日志收集整理 https://www.cpweb.top/1233

logstash 配置文件检查 logstash -f xxx.conf -t

set from=409003604@qq.com set smtp=smtp.qq.com set smtp-auth-user=409003604@qq.com set smtp-auth-password=xxxxxxxxxxxxxxxx #授权码 set smtp-auth=login set ssl-verify=ignore

邮箱告警配置 elastalert

全局启动-规则文件都在rules目录下
/usr/bin/python3 -m elastalert.elastalert --verbose --config config.yaml
elastalert --config /opt/soft/elastalert/config.yaml --rule /opt/soft/elastalert/example_rules/nginx_frequency.yaml --verbose

name: prod存在"ERROR","Exception"关键字日志,请登陆kibana及时查看,地址:http://152.32.142.164:5601/
type: frequency
index: erp-cod-prod_zooqeer-service-log*
#在一个时间范围内出现这么多与查询匹配的文档时发出警报
num_events: 1
#5分钟发一次
timeframe:
minutes: 5
filter:

- query:
query_string:
query: "ERROR"
query_string:
query: "Exception"
#只需要的字段 https://elastalert.readthedocs.io/en/latest/ruletypes.html#include
include: ["method", "url_path", "url_args", "status", "request_time"]
alert:
"elastalert_modules.dingtalk_alert.DingTalkAlerter"

#dingtalk_webhook: "https://oapi.dingtalk.com/robot/send?access_token=11dd40a2482d889c45a2c16ee3cefea5c1fa7368f8841fdf98bf95d571c98dc9"

dingtalk_webhook: "https://oapi.dingtalk.com/robot/send?access_token=3c7a159c14cf39f0b8e9bd26806d3a96831e4b843c2d5123ccc5eeb232520f11"
dingtalk_msgtype: "text"
#钉钉智能机器人的关键字
content: elk

5、filebeat安装

以上的配置都是基于logstash来收集日志,接下来,调整为使用filebeat来收集,发送给logstash,然后通过logstash再发送给ES

下载filebeat 和es 想对应的版本

从官网下载对应的版本7.6.2

下载地址:https://www.elastic.co/cn/downloads/past-releases#filebeat

[root@demo112 filebeat]# ls -l
total 76696
drwxr-x--- 3 root root 60 Aug 29 17:24 data
-rw-r--r-- 1 root root 500235 Mar 26 2020 fields.yml
-rwxr-xr-x 1 root root 77562560 Mar 26 2020 filebeat
-rw-r--r-- 1 root root 89359 Mar 26 2020 filebeat.reference.yml
-rw------- 1 root root 913 Aug 29 17:04 filebeat.yml
-rw------- 1 root root 8341 Aug 29 16:59 filebeat.yml.bak
drwxr-xr-x 3 root root 15 Mar 26 2020 kibana
-rw-r--r-- 1 root root 13675 Mar 26 2020 LICENSE.txt
drwxr-xr-x 39 root root 4096 Mar 26 2020 module
drwxr-xr-x 2 root root 4096 Mar 26 2020 modules.d
-rw-r--r-- 1 root root 328580 Mar 26 2020 NOTICE.txt
-rw-r--r-- 1 root root 802 Mar 26 2020 README.md

修改配置

[root@demo112 filebeat]# cat filebeat.yml
filebeat.inputs:
- type: log
enabled: true
paths:
- /var/log/nginx/*.log
multiline:
pattern: (^\|[0-9]{4}-[0-9]{2}-[0-9]{2} |^[0-9]{2}\:[0-9]{2}\:[0-9]{2}\.[0-9]+ [A-Z]+|^\|TID)
negate: true
match: after
exclude_files: ['nacos.*.log','^info.log$','^error.log$']
exclude_lines: ['api/ping\|', 'api/health\|']
scan_frequency: 30s
ignore_older: 24h
close_inactive: 3h
close_removed: true
clean_inactive: 48h
clean_removed: true


filebeat.config.modules:
path: ${path.config}/modules.d/*.yml
reload.enabled: false
setup.template.settings:
index.number_of_shards: 1

output.logstash:
hosts: ["10.51.8.112:5044"]

processors:
- add_fields:
target: agent
fields:
server_name: "demo112-prod"
- add_fields:
target: agent
fields:
env_name: prod
- drop_fields:
fields: ["agent.id","agent.ephemeral_id"]

max_procs: 1

启动方式

filebeat -e -c myfilebeat.yml

修改logstash的配置

[root@demo112 config]# cat nginx.conf
input {
beats {
port => 5044
}
}

filter {
# grok {
# match => { "message" => "\|%{TIMESTAMP_ISO8601:timestamp}\|%{WORD:loglevel}\|%{WORD:application}\|%{GREEDYDATA:error_message}" }
# }
grok {
match => {
"message" => [
"^\|%{TIMESTAMP_ISO8601:logdate}\|%{LOGLEVEL:log_level}\s?\|%{USERNAME:app_name}\|%{URIPATH:request_uri}\|%{URIPROTO:http_method}\|%{BASE16NUM:request_id}\|%{DATA:request_app_name}\|%{GREEDYDATA:ms_describe}"
]
}
}
##gateway网关告警处理
if [app_name] == "gateway-next-prod" {

if [message] =~ "ERROR" and [message] !~ /(405|产品不在服务期内|应用信息不存在| \n
刷新用户的热量消耗信息 运营活动的健步走 失败| \n
health-cloud-yxtx-web)/ {
mutate {
add_tag => ["error_alert"]
}
}

}
}


output {
elasticsearch {
hosts => ["http://10.51.8.112:9200"]
index => "nginx_access-%{+YYYY.MM.dd}"
}
stdout { codec => rubydebug }
}

注意:

 "^\|%{TIMESTAMP_ISO8601:logdate}\|%{LOGLEVEL:log_level}\s?\|%{USERNAME:app_name}\|%{URIPATH:request_uri}\|%{URIPROTO:http_method}\|%{BASE16NUM:request_id}\|%{DATA:request_app_name}\|%{GREEDYDATA:ms_describe}"
这部分内容是匹配日志格式, 其中%{DATA:request_app_name}用来可以匹配空格字符,%{GREEDYDATA:ms_describe}用来配置剩余所有的日志内容。

测试日志格式

[root@demo112 nginx]# cat test.log
|2023-08-29 19:04:31.550|ERROR |gateway-next-prod|/food-match03/meal/v3/api/recipes/detail|POST|338bdfaea1fe408992c1421a888ef391|606ffc2e837fee6eca5c1ed0|接口调用记录 mq 消息发送:{"appId":"606ffc2e837fee6eca5c1ed0","bizType":"020501","developerId":"606ffc2e837fee6eca5c1ed1","interfaceCode":"020501","interfaceUrl":"/food-match/meal/v3/api/recipes/detail?bizType=020501","invokeDate":"2023-08-24","invokeStatus":"1","invokeTime":"2023-08-24 16:39:31","sn":"338bdfaea1fe408992c142","timeLength":147}|

告警优化 https://blog.csdn.net/lihaipeng0417/article/details/118340478

##6、header部署


sudo docker run -d \
--name=elasticsearch-head \
--restart=always \
-p 9500:9100 \
registry.cn-hangzhou.aliyuncs.com/zlq_registry/elasticsearch-head:5-alpine