Elastic Application Performance Management(APM) 是基于 Elastic Stack 构建的应用性能监控系统

背景

目前我们日志系统已经投入生产使用, 对于日常开发有了一个方便排查问题的途径
但是, 有些难题不是浮于表面的, 下一步需要做的, 便是找出这些埋于底层的性能及效率的问题.
这便是Application Performance Management(APM)出现的目的

目标

  • 收集有关请求的响应时间,数据库查询
  • 外部 HTTP 请求等的详细性能信息
  • 对应用程序的请求期间发生的其他缓慢操作的跟踪
  • 自动收集未处理的错误和异常,错误主要基于堆栈跟踪进行分组,因此可以识别出现的新错误,并密切关注特定错误发生的次数。

价值

依靠apm, 过滤出不合格的接口, 根据具体情况加以改进, 最终实现一个可以满足三高的理想状态
我们可以对接口的所有信息进行追溯, 现在我们已经有了trace-id, 可以反向拿"不合格的接口"的trace-id去找日志里找原因

APM server

APM Server 是用 Go 编写的开源应用程序,通常运行在专用服务器上,默认监听端口 8200 ,并通过 JSON HTTP API 从 agent 接收数据,然后根据该数据创建文档并将其存储在 Elasticsearch 中。

APM server的安装

  • docker-compose
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
version: '3'
services:
apm-server:
image: docker.elastic.co/apm/apm-server:7.14.0
container_name: apm-server
depends_on:
- elasticsearch
- kibana
ports:
- 8200:8200
volumes:
- /opt/elk/ssl/certs/apm:/usr/share/apm-server/certs
- /etc/apm-server/data:/usr/share/apm-server/data
- /var/log/apm-server:/usr/share/apm-server/logs
- /etc/apm-server/apm-server.yml:/usr/share/apm-server/apm-server.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
apm-server.host: "0.0.0.0:8200"
apm-server.rum.enabled: true
output.elasticsearch.hosts: ["10.0.2.15:9200"]
output.elasticsearch.username: "elastic"
output.elasticsearch.password: "xiaowu"
output.elasticsearch.protocol: https
output.elasticsearch.ssl.key: "/usr/share/apm-server/certs/es-node1.key"
output.elasticsearch.ssl.certificate: "/usr/share/apm-server/certs/es-node1.crt"
output.elasticsearch.ssl.certificate_authorities: "/usr/share/apm-server/certs/ca.crt"
apm-server.kibana.enabled: true
apm-server.kibana.host: "10.0.2.15:5601"
apm-server.kibana.username: "elastic"
apm-server.kibana.password: "xiaowu"
apm-server.kibana.protocol: https
apm-server.kibana.ssl.enabled: true
apm-server.kibana.ssl.certificate_authorities: ["/usr/share/apm-server/certs/ca.crt"]

APM agent

APM agent 是使用与服务相同的语言编写的开源库,可以像安装其他库一样将它们安装到服务中,agent 将检测服务的代码并在运行时收集性能数据和错误,这些数据缓冲一小段时间并发送到 APM server。

Nodejs代码样例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# 更多配置参数详解参考 https://www.elastic.co/guide/en/apm/agent/nodejs/current/configuration.html
const elastic = require('elastic-apm-node');

module.exports = class Apm {
constructor(opts) {
this.opts = opts
}
start(){
const self = this
elastic.start({
active: process.env.NODE_ENV !== 'local',
environment: process.env.NODE_ENV || 'development',
// 开发环境调试留用
logLevel: self.opts.logLevel || "off",
usePathAsTransactionName: true,
serviceName: self.opts.appName,
// 开发环境调试留用
serverUrl: self.opts.serverUrl || 'http://xxx:8200',
})
}
}


new Apm({
appName: sysCfg.name,
serverUrl: "http://192.168.56.103:8200"
}).start()
  • 可以统计接口的详细信息
  • 抓取的全部指标数据
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    {
    "agent" : {
    "name" : "nodejs",
    "version" : "3.20.0"
    },
    "process" : {
    "args" : [
    """D:\Program Files\nodejs\node.exe""",
    """D:\project\node-project\test\bin\run"""
    ],
    "pid" : 3776,
    "title" : "npm",
    "ppid" : 9500
    },
    "source" : {
    "ip" : "127.0.0.1"
    },
    "processor" : {
    "name" : "transaction",
    "event" : "transaction"
    },
    "url" : {
    "path" : "/xiaowu/400",
    "original" : "/xiaowu/400",
    "scheme" : "http",
    "port" : 23000,
    "domain" : "localhost",
    "full" : "http://localhost:23000/xiaowu/400"
    },
    "observer" : {
    "hostname" : "6f216d8db8c6",
    "id" : "2c6a2ceb-ee23-4622-b8b7-8df5c44d0075",
    "type" : "apm-server",
    "ephemeral_id" : "a9e8ecc2-802e-48b3-98dd-3854ed6b18c9",
    "version" : "7.14.0",
    "version_major" : 7
    },
    "trace" : {
    "id" : "b37ea440f81892107c8a665d1681f874"
    },
    "@timestamp" : "2021-08-27T09:13:04.762Z",
    "ecs" : {
    "version" : "1.10.0"
    },
    "service" : {
    "node" : {
    "name" : "DESKTOP-6RD9D8B"
    },
    "environment" : "development",
    "name" : "sapi_carmodel",
    "runtime" : {
    "name" : "node",
    "version" : "12.22.1"
    },
    "language" : {
    "name" : "javascript"
    },
    "version" : "0.2.0"
    },
    "host" : {
    "hostname" : "DESKTOP-6RD9D8B",
    "os" : {
    "platform" : "win32"
    },
    "ip" : "192.168.56.1",
    "name" : "DESKTOP-6RD9D8B",
    "architecture" : "x64"
    },
    "http" : {
    "request" : {
    "headers" : {
    "Connection" : [
    "keep-alive"
    ],
    "User-Agent" : [
    "Apache-HttpClient/4.5.10 (Java/11)"
    ],
    "Host" : [
    "localhost:23000"
    ]
    },
    "method" : "GET",
    "socket" : {
    "encrypted" : false,
    "remote_address" : "127.0.0.1"
    }
    },
    "response" : {
    "headers" : {
    "Keep-Alive" : [
    "timeout=5"
    ],
    "Trace-Id" : [
    "-"
    ],
    "Connection" : [
    "keep-alive"
    ],
    "Content-Length" : [
    "9"
    ],
    "Date" : [
    "Fri, 27 Aug 2021 09:13:04 GMT"
    ],
    "Content-Type" : [
    "text/plain; charset=utf-8"
    ]
    },
    "status_code" : 404
    },
    "version" : "1.1"
    },
    "client" : {
    "ip" : "127.0.0.1"
    },
    "event" : {
    "ingested" : "2021-08-27T09:13:06.567410455Z",
    "outcome" : "success"
    },
    "transaction" : {
    "duration" : {
    "us" : 3448
    },
    "result" : "HTTP 4xx",
    "name" : "GET /xiaowu/400",
    "span_count" : {
    "started" : 0
    },
    "id" : "84271adf07e76a62",
    "type" : "request",
    "sampled" : true
    },
    "user_agent" : {
    "original" : "Apache-HttpClient/4.5.10 (Java/11)",
    "name" : "Apache-HttpClient",
    "device" : {
    "name" : "Other"
    },
    "version" : "4.5.10"
    },
    "timestamp" : {
    "us" : 1630055584762002
    }
    }