使用 Prometheus + Grafana 来监控 Mac Mini 的风扇与温度

其实程序部分也没什么复杂的,就当是个笔记吧~

上次给 Mac Mini 安装了 Ubuntu,然后因为学习 Rust,就用 Rust 写了一个 RESTful 的控制风扇的服务;这次就是记录一下使用 Prometheus,Grafana 与 Golang,写个导出 Mac Mini 风扇与温度监控信息到 Prometheus 的坑吧~

(这里我的 Mac Mini 的 IP 地址是 10.0.1.45,Docker 部署的 Prometheus + Grafana 的 Mac 是 10.0.1.46,下面某些配置或者访问的 URL 自行改一下 IP 地址~)

首先就是直接拿 Docker 部署一下 Prometheus + Grafana,这里暂时没有什么好说的。目录结构是

.
└── metrics
    ├── configs
    │   └── prometheus
    │       └── prometheus.yml
    ├── data
    │   └── grafana
    └── docker-compose.yml

data/grafana 是一个空的目录,在下面 docker-compose 设置中会映射给给 Grafana(^O^)

docker-compose.yml 如下

version: '3'
services:
  prom:
    image: prom/prometheus
    ports:
      - "9090:9090"
    volumes:
      - ./configs/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml

  grafana:
    image: grafana/grafana
    ports:
      - "3000:3000"
    volumes:
      - ./data/grafana:/var/lib/grafana
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=secret

上面的 GF_SECURITY_ADMIN_PASSWORD=secret 则是设置了 Grafana 的 admin 用户的密码为 secret,可以根据需要更改一下~

然后是 Prometheus 的配置文件,./configs/prometheus/prometheus.yml,每 5 秒从我的 Mac Mini 上 pull 一次

# global config
global:
  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.

scrape_configs:
  - job_name: 'Mac Mini Fan and Temp'
    scrape_interval: 5s
    metrics_path: '/metrics'
    static_configs:
      - targets: ['10.0.1.45:2276']

接着就可以先启动起来

docker-compose up -d

Mac Mini 上对应的服务的话,则是使用了 Go 语言来写。Go 语言里的 Prometheus Client 则是选择了 github.com/prometheus/client_golang/prometheus,可以使用如下命令安装

mkdir -p breezin-prom
cd breezin-prom
go get github.com/prometheus/client_golang/prometheus

然后就是本体 breezin-prom/main.go 了~

package main

import (
    "flag"
    "fmt"
    "github.com/prometheus/client_golang/prometheus"
    "github.com/prometheus/client_golang/prometheus/promhttp"
    "io/ioutil"
    "log"
    "net/http"
    "os"
    "path/filepath"
    "regexp"
    "strconv"
    "strings"
    "time"
)

var (
    // listen address and port
    addr = flag.String("listen-address", ":2276", "The address to listen on for HTTP requests.")
    // SMC location in Ubuntu Linux
    smc  = "/sys/devices/platform/applesmc.768"
)

// Check whether error occurred
// If `e != nil`, then exit with panic
func check_panic(e error) {
    if e != nil {
        panic(e)
    }
}

// Get all smc info with given prefix
// For all fans, `get_smc("fan")`
// For all temp sensors, `get_smc("temp")`
// It returns an array of map
//  [
//     { 
//         "name": ...,
//         "help": ...,
//       }
//  ]
func get_smc(of string) *[]map[string]string {
    // result
    result := make([]map[string]string, 0)

    // build pattern
    // e.g
    //  /sys/devices/platform/applesmc.768/fan*_label
    //  /sys/devices/platform/applesmc.768/temp*_label
    pattern := smc + "/" + of + "*_label"
    matches, err := filepath.Glob(pattern)
    check_panic(err)

    // regex for extracting name
    smc_re := regexp.MustCompile("^(" + of + "\\d+)_label$")
    for _, match := range matches {
        // read description
        data, err := ioutil.ReadFile(match)
        if err == nil {
            desc := strings.TrimSpace(string(data))

            // extract name
            // e.g
            //  /sys/devices/platform/applesmc.768/fan1_label => [fan1_label fan1]
            //  /sys/devices/platform/applesmc.768/temp1_label => [temp1_label temp1]
            matches := smc_re.FindStringSubmatch(filepath.Base(match))
            if len(matches) != 2 {
                fmt.Fprintf(os.Stderr, "[ERROR] cannot extract name from `%s`\n", match)
            } else {
                // save information
                info := make(map[string]string)
                info["name"] = matches[1]
                info["help"] = desc

                // append to result
                result = append(result, info)
            }
        } else {
            fmt.Fprintf(os.Stderr, "[ERROR] cannot read: %s\n", match)
        }
    }

    return &result
}

// Get all smc fans 
func get_fans() *[]map[string]string {
    return get_smc("fan")
}

// Get all smc temp sensors
func get_temps() *[]map[string]string {
    temps := get_smc("temp")
    
    // map abbr. to humanreadable strings
    // https://superuser.com/questions/553197/interpreting-sensor-names
    temps_humanreadable := map[string]string{
        "TCXC": "PECI CPU",
        "TCXc": "PECI CPU",
        "TC0P": "CPU 1 Proximity",
        "TC0H": "CPU 1 Heatsink",
        "TC0D": "CPU 1 Package",
        "TC0E": "CPU 1",
        "TC0F": "CPU 1",
        "TC1C": "CPU Core 1",
        "TC2C": "CPU Core 2",
        "TC3C": "CPU Core 3",
        "TC4C": "CPU Core 4",
        "TC5C": "CPU Core 5",
        "TC6C": "CPU Core 6",
        "TC7C": "CPU Core 7",
        "TC8C": "CPU Core 8",
        "TCAH": "CPU 1 Heatsink Alt.",
        "TCAD": "CPU 1 Package Alt.",
        "TC1P": "CPU 2 Proximity",
        "TC1H": "CPU 2 Heatsink",
        "TC1D": "CPU 2 Package",
        "TC1E": "CPU 2",
        "TC1F": "CPU 2",
        "TCBH": "CPU 2 Heatsink Alt.",
        "TCBD": "CPU 2 Package Alt.",
        "TCSC": "PECI SA",
        "TCSc": "PECI SA",
        "TCSA": "PECI SA",
        "TCGC": "PECI GPU",
        "TCGc": "PECI GPU",
        "TG0P": "GPU Proximity",
        "TG0D": "GPU Die",
        "TG1D": "GPU Die",
        "TG0H": "GPU Heatsink",
        "TG1H": "GPU Heatsink",
        "Ts0S": "Memory Proximity",
        "TM0P": "Mem Bank A1",
        "TM1P": "Mem Bank A2",
        "TM8P": "Mem Bank B1",
        "TM9P": "Mem Bank B2",
        "TM0S": "Mem Module A1",
        "TM1S": "Mem Module A2",
        "TM8S": "Mem Module B1",
        "TM9S": "Mem Module B2",
        "TN0D": "Northbridge Die",
        "TN0P": "Northbridge Proximity 1",
        "TN1P": "Northbridge Proximity 2",
        "TN0C": "MCH Die",
        "TN0H": "MCH Heatsink",
        "TP0D": "PCH Die",
        "TPCD": "PCH Die",
        "TP0P": "PCH Proximity",
        "TA0P": "Airflow 1",
        "TA1P": "Airflow 2",
        "Th0H": "Heatpipe 1",
        "Th1H": "Heatpipe 2",
        "Th2H": "Heatpipe 3",
        "Tm0P": "Mainboard Proximity",
        "Ts0P": "Palm Rest",
        "Tb0P": "BLC Proximity",
        "TL0P": "LCD Proximity",
        "TW0P": "Airport Proximity",
        "TH0P": "HDD Bay 1",
        "TH1P": "HDD Bay 2",
        "TH2P": "HDD Bay 3",
        "TH3P": "HDD Bay 4",
        "TO0P": "Optical Drive",
        "TB0T": "Battery TS_MAX",
        "TB1T": "Battery 1",
        "TB2T": "Battery 2",
        "TB3T": "Battery",
        "Tp0P": "Power Supply 1",
        "Tp0C": "Power Supply 1 Alt.",
        "Tp1P": "Power Supply 2",
        "Tp1C": "Power Supply 2 Alt.",
        "Tp2P": "Power Supply 3",
        "Tp3P": "Power Supply 4",
        "Tp4P": "Power Supply 5",
        "Tp5P": "Power Supply 6",
        "TS0C": "Expansion Slots",
        "TA0S": "PCI Slot 1 Pos 1",
        "TA1S": "PCI Slot 1 Pos 2",
        "TA2S": "PCI Slot 2 Pos 1",
        "TA3S": "PCI Slot 2 Pos 2",
    }
    for _, temp := range *temps {
        if len(temps_humanreadable[temp["help"]]) != 0 {
            temp["help"] = temps_humanreadable[temp["help"]]
        }
    }

    return temps
}

type postprocessing func(float64) float64

// Update meteics
func update_metrics(gauge *prometheus.GaugeVec, desc string, smc_path string, ticks time.Duration, post postprocessing) {
    // update intervals
    for _ = range time.Tick(ticks) {
        // read data from given smc path
        data, err := ioutil.ReadFile(smc_path)
        if err == nil {
            // try to parse the readout as float
            readout, err := strconv.ParseFloat(strings.TrimSpace(string(data)), 64)
            if err == nil {
                // set value for gauge with given description and postprocessed value
                (*gauge).WithLabelValues(desc).Set(post(readout))
            } else {
                fmt.Fprintf(os.Stderr, "[ERROR] cannot convert `%s` to float\n", data)
                (*gauge).WithLabelValues(desc).Set(0)
            }
        } else {
            fmt.Fprintf(os.Stderr, "[ERROR] cannot read while updating: %s\n", smc_path)
        }
    }
}

// Add fan readings
func add_fan(name string, property string, help string) {
    var gauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
        Name: name + "_" + property,
        Help: help,
    }, []string{"desc"})
    prometheus.MustRegister(gauge)

    // build corresponding smc paths
    var smc_path_builder strings.Builder
    fmt.Fprintf(&smc_path_builder, "%s/%s_%s", smc, name, property)
    smc_path := smc_path_builder.String()

    go update_metrics(gauge, help, smc_path, 5*time.Second, func(metrics float64) float64 { return metrics })
}

// Add temp sensor readings
func add_temp(name string, help string) {
    var gauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
        Name: name,
        Help: help,
    }, []string{"desc"})
    prometheus.MustRegister(gauge)

    var smc_path_builder strings.Builder
    fmt.Fprintf(&smc_path_builder, "%s/%s_input", smc, name)
    smc_path := smc_path_builder.String()

    go update_metrics(gauge, help, smc_path, 5*time.Second, func(metrics float64) float64 { return metrics / 1000.0 })
}

func main() {
    flag.Parse()

    for _, fan := range *get_fans() {
        go add_fan(fan["name"], "input", "RPM Readout")
        go add_fan(fan["name"], "output", "RPM Requested")
        go add_fan(fan["name"], "min", "Min RPM")
        go add_fan(fan["name"], "max", "Max RPM")
    }

    for _, temp := range *get_temps() {
        go add_temp(temp["name"], temp["help"])
    }

    http.Handle("/metrics", promhttp.Handler())
    log.Fatal(http.ListenAndServe(*addr, nil))
}

编译的时候可以使用 go build -ldflags "-s" 来 strip 掉 debug symbols,减小一点最后二进制的体积,然后把它放到 /usr/local/bin 下,再写个 systemd service,就算差不多准备完成啦

sudo mv breezin-prom /usr/local/bin
cat <<EOF | sudo tee /etc/systemd/system/breezin-prom.service
[Unit]
Description=Export SMC Fan and Temp Sensor Data to Prometheus
After=network.target

[Service]
User=root
ExecStart=/usr/local/bin/breezin-prom -listen-address 0.0.0.0:2276
RestartSec=2
Restart=always

[Install]
WantedBy=multi-user.target
EOF
sudo systemctl enable breezin-prom
sudo service breezin-prom start

现在就可以去 Grafana 上配置啦~我的 Grafana 地址是 http://10.0.1.46:3000,用户名 admin,密码 secret

登录上去之后需要先添加数据源「Data Source」,这里我们选择 Prometheus

然后配置连接的页面的话,这里我们也没有别的安全设置(因为只在自家内网使用),就直接写上 Prometheus 的 URL http://10.0.1.45:9090,然后「Save & Test」应该就没问题了

下一步就是添加「Dashboard」了,把想要展示出来的用 PromQL 写上就行,比如下面只简单的写了 fan1_input

在一个 Panel 里也可以同时展示多个不同的 Metrics,如下图显示了 3 个不同的温度传感器的读出数据

保存完 Panel 和 Dashboard 之后,之后就可以在从主页直接点过去看数据了

声明: 本文为0xBBC原创, 转载注明出处喵~

Leave a Reply

Your email address will not be published. Required fields are marked *