[关闭]
@llplmlyd 2021-06-28T17:19:55.000000Z 字数 3651 阅读 643

TiDB监控参数

数据库-TiDB


进程运行数量情况

  1. # TiDB
  2. count(probe_success{tidb_cluster="$tidb_cluster", group="tidb"} == 1)
  3. # PD
  4. count(probe_success{tidb_cluster="$tidb_cluster", group="pd"} == 1)
  5. #TiKV
  6. count(probe_success{tidb_cluster="$tidb_cluster", group="tikv"} == 1)
  7. #TiFlash
  8. count(probe_success{tidb_cluster="$tidb_cluster", group="tiflash"} == 1)
  9. # Pump
  10. count(probe_success{tidb_cluster="$tidb_cluster", group="pump"} == 1)

Store Health Info

  1. ## 整个集群的磁盘使用率
  2. sum(pd_cluster_status{tidb_cluster="$tidb_cluster", instance="$instance",type="storage_size"}) / sum(pd_cluster_status{tidb_cluster="$tidb_cluster", instance="$instance",type="storage_capacity"})
  3. ## 失联的store数量
  4. sum(pd_cluster_status{tidb_cluster="$tidb_cluster", instance="$instance", type="store_disconnected_count"})
  5. ## 不健康的store数量
  6. sum(pd_cluster_status{tidb_cluster="$tidb_cluster", instance="$instance", type="store_unhealth_count"})
  7. ## 存储空间较低的store数量
  8. sum(pd_cluster_status{tidb_cluster="$tidb_cluster", instance="$instance", type="store_low_space_count"})
  9. ## 进程停止的store数量
  10. sum(pd_cluster_status{tidb_cluster="$tidb_cluster", instance="$instance", type="store_down_count"})
  11. ## 正在下线的store数量
  12. sum(pd_cluster_status{tidb_cluster="$tidb_cluster", instance="$instance", type="store_offline_count"})
  13. ## 已经下线的store数量
  14. sum(pd_cluster_status{tidb_cluster="$tidb_cluster", instance="$instance", type="store_tombstone_count"})

region info

  1. # 当前集群的 Region 总量,请注意 Region 数量与副本数无关
  2. pd_cluster_status{tidb_cluster="$tidb_cluster", instance="$instance", type="leader_count"}
  3. ## region health
  4. pd_regions_status{tidb_cluster="$tidb_cluster", instance="$instance"}
  5. sum(pd_regions_status{tidb_cluster="$tidb_cluster"}) by (instance, type)

Pd info

  1. ## PD 启动时间
  2. (time() - process_start_time_seconds{tidb_cluster="$tidb_cluster",job=~".*pd.*"})
  3. ## 创建的调度
  4. sum(delta(pd_schedule_operators_count{tidb_cluster="$tidb_cluster", instance="$instance", event="create"}[1m])) by (type)
  5. ## 完成的调度
  6. sum(delta(pd_schedule_operators_count{tidb_cluster="$tidb_cluster", instance="$instance", event="finish"}[1m])) by (type)

TiKV info

  1. ## TiKV leader info
  2. sum(tikv_raftstore_region_count{tidb_cluster="$tidb_cluster", type="leader"}) by (instance)
  3. ## TiKV region info
  4. sum(tikv_raftstore_region_count{tidb_cluster="$tidb_cluster", type="region"}) by (instance)
  5. ## TiKV 内存使用率
  6. avg(process_resident_memory_bytes{tidb_cluster="$tidb_cluster", instance=~"$instance"}) by (instance)
  7. ## TiKV Server is Busy
  8. sum(rate(tikv_scheduler_too_busy_total{instance=~"$instance"}[1m])) by (instance)
  9. sum(rate(tikv_channel_full_total{instance=~"$instance"}[1m])) by (instance, type)
  10. sum(rate(tikv_coprocessor_request_error{instance=~"$instance", type='full'}[1m])) by (instance)
  11. avg(tikv_engine_write_stall{instance=~"$instance", type="write_stall_percentile99"}) by (instance)
  12. ## TiKV 未compact的堆积字节数量,反映写入压力情况
  13. sum(tikv_engine_pending_compaction_bytes{tidb_cluster="$tidb_cluster", instance=~"$instance", db="$db"}) by (cf)

TiDB info

  1. ## TiDB info 内存信息
  2. process_resident_memory_bytes{tidb_cluster="$tidb_cluster", job="tidb"}
  3. ## TiDB运行时间
  4. (time() - process_start_time_seconds{tidb_cluster="$tidb_cluster", job="tidb"})
  5. ## TiDB 各个节点的连接数
  6. tidb_server_connections{tidb_cluster="$tidb_cluster"}
  7. ## 集群总的连接数
  8. sum(tidb_server_connections{tidb_cluster="$tidb_cluster"})
  9. ## TiDB查询延迟99%
  10. histogram_quantile(0.99, sum(rate(tidb_server_handle_query_duration_seconds_bucket{tidb_cluster="$tidb_cluster", sql_type!="internal"}[1m])) by (le))
  11. ## TiDB查询延迟95%
  12. histogram_quantile(0.95, sum(rate(tidb_server_handle_query_duration_seconds_bucket{tidb_cluster="$tidb_cluster", sql_type!="internal"}[1m])) by (le))
  13. ## 集群QPS
  14. # 按照类型
  15. sum(rate(tidb_executor_statement_total{tidb_cluster="$tidb_cluster"}[1m])) by (type)
  16. # 总的
  17. sum(rate(tidb_executor_statement_total{tidb_cluster="$tidb_cluster"}[1m]))
  18. ## 集群TPS
  19. sum(rate(tidb_session_transaction_duration_seconds_count{tidb_cluster="$tidb_cluster"}[1m])) by (type, txn_mode)
添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注