Skip to main content
pb-ext includes comprehensive system monitoring that collects and displays real-time metrics for CPU usage, memory consumption, disk space, network activity, and Go runtime statistics.

Key Features

  • CPU metrics — per-core usage and average utilization
  • Memory tracking — total, used, free, and cache/buffer breakdown
  • Disk monitoring — space usage and read/write statistics
  • Network stats — interface information, connections, and throughput
  • Runtime metrics — goroutines, heap allocation, GC statistics
  • Temperature sensors — CPU, disk, and system temperatures (if available)
  • Request tracking — rate, duration, and status code distribution
  • Smart caching — 2-second refresh interval to reduce overhead

System Metrics Collection

From core/monitoring/stats.go:47:
// CollectSystemStats gathers system statistics with context support
func CollectSystemStats(ctx context.Context, startTime time.Time) (*SystemStats, error) {
    // Smart caching - only refresh every 2 seconds
    collector.mu.RLock()
    if time.Since(collector.lastCollected) < StatsRefreshInterval && collector.cachedStats != nil {
        defer collector.mu.RUnlock()
        return collector.cachedStats, nil
    }
    collector.mu.RUnlock()

    stats := &SystemStats{
        StartTime:  startTime,
        UptimeSecs: int64(time.Since(startTime).Seconds()),
    }

    // Collect host information
    if hostInfo, err := host.InfoWithContext(ctx); err == nil {
        stats.Hostname = hostInfo.Hostname
        stats.Platform = hostInfo.Platform
        stats.OS = hostInfo.OS
        stats.KernelVersion = hostInfo.KernelVersion
    }

    // Collect CPU, memory, disk, network, and runtime stats
    stats.CPUInfo, _ = CollectCPUInfoWithContext(ctx)
    stats.MemoryInfo, _ = CollectMemoryInfoWithContext(ctx)
    diskInfo, _ := CollectDiskInfoWithContext(ctx)
    stats.DiskTotal = diskInfo.Total
    stats.DiskUsed = diskInfo.Used
    stats.DiskFree = diskInfo.Free

    tempInfo, _ := CollectTemperatureInfoWithContext(ctx)
    stats.HasTempData = tempInfo.HasTempData

    procInfo, _ := CollectProcessInfoWithContext(ctx)
    stats.ProcessStats = procInfo

    stats.RuntimeStats = CollectRuntimeStats()

    netInfo, _ := CollectNetworkInfoWithContext(ctx)
    stats.NetworkInterfaces = netInfo.Interfaces
    stats.NetworkConnections = netInfo.ConnectionCount
    stats.NetworkBytesSent = netInfo.TotalBytesSent
    stats.NetworkBytesRecv = netInfo.TotalBytesRecv

    // Cache the results
    collector.cachedStats = stats
    collector.lastCollected = time.Now()

    return stats, nil
}

SystemStats Structure

From core/monitoring/stats.go:18:
type SystemStats struct {
    Hostname           string             `json:"hostname"`
    Platform           string             `json:"platform"`
    OS                 string             `json:"os"`
    KernelVersion      string             `json:"kernel_version"`
    CPUInfo            []CPUInfo          `json:"cpu_info"`
    MemoryInfo         MemoryInfo         `json:"memory_info"`
    DiskTotal          uint64             `json:"disk_total"`
    DiskUsed           uint64             `json:"disk_used"`
    DiskFree           uint64             `json:"disk_free"`
    RuntimeStats       RuntimeStats       `json:"runtime_stats"`
    ProcessStats       ProcessInfo        `json:"process_stats"`
    StartTime          time.Time          `json:"start_time"`
    UptimeSecs         int64              `json:"uptime_secs"`
    HasTempData        bool               `json:"has_temp_data"`
    NetworkInterfaces  []NetworkInterface `json:"network_interfaces"`
    NetworkConnections int                `json:"network_connections"`
    NetworkBytesSent   uint64             `json:"network_bytes_sent"`
    NetworkBytesRecv   uint64             `json:"network_bytes_recv"`
}

CPU Monitoring

From core/monitoring/cpu.go:
type CPUInfo struct {
    Core    int     `json:"core"`
    Usage   float64 `json:"usage"`
    ModelName string `json:"model_name"`
}

func CollectCPUInfoWithContext(ctx context.Context) ([]CPUInfo, error) {
    // Get per-core CPU usage percentages
    percentages, err := cpu.PercentWithContext(ctx, CPUCollectionInterval, true)
    if err != nil {
        return nil, NewSystemError("cpu_collection", "failed to get CPU usage", err)
    }

    // Get CPU model information
    info, _ := cpu.InfoWithContext(ctx)
    modelName := "Unknown"
    if len(info) > 0 {
        modelName = info[0].ModelName
    }

    result := make([]CPUInfo, len(percentages))
    for i, pct := range percentages {
        result[i] = CPUInfo{
            Core:      i,
            Usage:     pct,
            ModelName: modelName,
        }
    }
    return result, nil
}

Memory Monitoring

From core/monitoring/memory.go:
type MemoryInfo struct {
    Total       uint64  `json:"total"`
    Used        uint64  `json:"used"`
    Free        uint64  `json:"free"`
    UsedPercent float64 `json:"used_percent"`
    Cached      uint64  `json:"cached"`
    Buffers     uint64  `json:"buffers"`
}

func CollectMemoryInfoWithContext(ctx context.Context) (MemoryInfo, error) {
    vmStat, err := mem.VirtualMemoryWithContext(ctx)
    if err != nil {
        return MemoryInfo{}, NewSystemError("memory_collection", "failed to get memory info", err)
    }

    return MemoryInfo{
        Total:       vmStat.Total,
        Used:        vmStat.Used,
        Free:        vmStat.Free,
        UsedPercent: vmStat.UsedPercent,
        Cached:      vmStat.Cached,
        Buffers:     vmStat.Buffers,
    }, nil
}

Disk Monitoring

From core/monitoring/disk.go:
type DiskInfo struct {
    Total       uint64  `json:"total"`
    Used        uint64  `json:"used"`
    Free        uint64  `json:"free"`
    UsedPercent float64 `json:"used_percent"`
}

func CollectDiskInfoWithContext(ctx context.Context) (DiskInfo, error) {
    usage, err := disk.UsageWithContext(ctx, "/")
    if err != nil {
        return DiskInfo{}, NewSystemError("disk_collection", "failed to get disk usage", err)
    }

    return DiskInfo{
        Total:       usage.Total,
        Used:        usage.Used,
        Free:        usage.Free,
        UsedPercent: usage.UsedPercent,
    }, nil
}

Network Monitoring

From core/monitoring/network.go:
type NetworkInterface struct {
    Name         string `json:"name"`
    BytesSent    uint64 `json:"bytes_sent"`
    BytesRecv    uint64 `json:"bytes_recv"`
    PacketsSent  uint64 `json:"packets_sent"`
    PacketsRecv  uint64 `json:"packets_recv"`
    ErrorsIn     uint64 `json:"errors_in"`
    ErrorsOut    uint64 `json:"errors_out"`
    DropsIn      uint64 `json:"drops_in"`
    DropsOut     uint64 `json:"drops_out"`
}

type NetworkInfo struct {
    Interfaces      []NetworkInterface `json:"interfaces"`
    ConnectionCount int                `json:"connection_count"`
    TotalBytesSent  uint64             `json:"total_bytes_sent"`
    TotalBytesRecv  uint64             `json:"total_bytes_recv"`
}

func CollectNetworkInfoWithContext(ctx context.Context) (NetworkInfo, error) {
    // Get network interface statistics
    ioCounters, err := net.IOCountersWithContext(ctx, true)
    if err != nil {
        return NetworkInfo{}, NewSystemError("network_collection", "failed to get network info", err)
    }

    interfaces := make([]NetworkInterface, 0, len(ioCounters))
    var totalSent, totalRecv uint64

    for _, io := range ioCounters {
        interfaces = append(interfaces, NetworkInterface{
            Name:        io.Name,
            BytesSent:   io.BytesSent,
            BytesRecv:   io.BytesRecv,
            PacketsSent: io.PacketsSent,
            PacketsRecv: io.PacketsRecv,
            ErrorsIn:    io.Errin,
            ErrorsOut:   io.Errout,
            DropsIn:     io.Dropin,
            DropsOut:    io.Dropout,
        })
        totalSent += io.BytesSent
        totalRecv += io.BytesRecv
    }

    // Get connection count
    connections, _ := net.ConnectionsWithContext(ctx, "all")

    return NetworkInfo{
        Interfaces:      interfaces,
        ConnectionCount: len(connections),
        TotalBytesSent:  totalSent,
        TotalBytesRecv:  totalRecv,
    }, nil
}

Runtime Monitoring

From core/monitoring/runtime.go:
type RuntimeStats struct {
    NumGoroutines int    `json:"num_goroutines"`
    NumCPU        int    `json:"num_cpu"`
    GoVersion     string `json:"go_version"`
    Alloc         uint64 `json:"alloc"`
    TotalAlloc    uint64 `json:"total_alloc"`
    Sys           uint64 `json:"sys"`
    NumGC         uint32 `json:"num_gc"`
    LastGC        int64  `json:"last_gc"`
    HeapAlloc     uint64 `json:"heap_alloc"`
    HeapSys       uint64 `json:"heap_sys"`
    HeapInuse     uint64 `json:"heap_inuse"`
    HeapReleased  uint64 `json:"heap_released"`
}

func CollectRuntimeStats() RuntimeStats {
    var m runtime.MemStats
    runtime.ReadMemStats(&m)

    return RuntimeStats{
        NumGoroutines: runtime.NumGoroutine(),
        NumCPU:        runtime.NumCPU(),
        GoVersion:     runtime.Version(),
        Alloc:         m.Alloc,
        TotalAlloc:    m.TotalAlloc,
        Sys:           m.Sys,
        NumGC:         m.NumGC,
        LastGC:        int64(m.LastGC),
        HeapAlloc:     m.HeapAlloc,
        HeapSys:       m.HeapSys,
        HeapInuse:     m.HeapInuse,
        HeapReleased:  m.HeapReleased,
    }
}

Request Tracking

From core/monitoring/requests.go:
type RequestMetrics struct {
    Path          string
    Method        string
    StatusCode    int
    Duration      time.Duration
    Timestamp     time.Time
    UserAgent     string
    ContentLength int64
    RemoteAddr    string
}

type RequestStats struct {
    TotalRequests      uint64
    ActiveConnections  uint64
    AverageRequestTime atomic.Int64  // in nanoseconds
    LastRequestTime    atomic.Int64  // unix timestamp
    StartTime          time.Time
}

func (rs *RequestStats) TrackRequest(metrics RequestMetrics) {
    rs.TotalRequests++
    rs.LastRequestTime.Store(metrics.Timestamp.Unix())
    
    // Update rolling average
    currentAvg := rs.AverageRequestTime.Load()
    newAvg := (currentAvg + int64(metrics.Duration)) / 2
    rs.AverageRequestTime.Store(newAvg)
}

func (rs *RequestStats) GetRequestRate() float64 {
    uptime := time.Since(rs.StartTime).Seconds()
    if uptime == 0 {
        return 0
    }
    return float64(rs.TotalRequests) / uptime
}

Server Stats Access

From core/server/server.go:
type Server struct {
    pb         *pocketbase.PocketBase
    stats      *ServerStats
    analytics  *analytics.Analytics
    // ...
}

// Stats returns the server statistics
func (s *Server) Stats() *ServerStats {
    return s.stats
}

type ServerStats struct {
    TotalRequests      atomic.Uint64
    ActiveConnections  atomic.Uint64
    AverageRequestTime atomic.Int64  // in nanoseconds
    LastRequestTime    atomic.Int64  // unix timestamp
    StartTime          time.Time
}

Temperature Monitoring

From core/monitoring/temperature.go:
type TemperatureInfo struct {
    HasTempData bool     `json:"has_temp_data"`
    Temps       []string `json:"temps,omitempty"`
}

func CollectTemperatureInfoWithContext(ctx context.Context) (TemperatureInfo, error) {
    temps, err := host.SensorsTemperaturesWithContext(ctx)
    if err != nil || len(temps) == 0 {
        return TemperatureInfo{HasTempData: false}, nil
    }

    return TemperatureInfo{
        HasTempData: true,
        Temps:       formatTemps(temps),
    }, nil
}

func IsDiskTemp(sensorKey string) bool {
    lower := strings.ToLower(sensorKey)
    return strings.Contains(lower, "nvme") ||
           strings.Contains(lower, "ssd") ||
           strings.Contains(lower, "hdd")
}

func IsSystemTemp(sensorKey string) bool {
    lower := strings.ToLower(sensorKey)
    return strings.Contains(lower, "cpu") ||
           strings.Contains(lower, "core")
}

Smart Caching

From core/monitoring/stats.go:13:
const (
    // StatsRefreshInterval is the minimum time between stats refreshes
    StatsRefreshInterval = 2 * time.Second
)

type statsCollector struct {
    mu            sync.RWMutex
    lastCollected time.Time
    cachedStats   *SystemStats
}
Metrics are cached for 2 seconds to reduce CPU overhead from frequent collection.

Usage Example

import (
    "context"
    "time"
    "github.com/magooney-loon/pb-ext/core/monitoring"
)

func healthCheckHandler(srv *server.Server) {
    ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
    defer cancel()

    stats, err := monitoring.CollectSystemStats(ctx, srv.Stats().StartTime)
    if err != nil {
        // Handle partial errors
        log.Printf("Some metrics failed: %v", err)
    }

    log.Printf("CPU Usage: %.2f%%", avgCPUUsage(stats.CPUInfo))
    log.Printf("Memory: %.2f GB / %.2f GB",
        float64(stats.MemoryInfo.Used)/1024/1024/1024,
        float64(stats.MemoryInfo.Total)/1024/1024/1024)
    log.Printf("Goroutines: %d", stats.RuntimeStats.NumGoroutines)
}

Dashboard Integration

View real-time metrics in the pb-ext dashboard at /_/_:
  • System Health — CPU, memory, disk usage gauges
  • Network Activity — bytes sent/received, active connections
  • Runtime Metrics — goroutines, heap allocation, GC stats
  • Request Statistics — request rate, average response time
  • Temperature Sensors — CPU, disk, and system temperatures (if available)
  • Uptime — server start time and duration

Error Handling

From core/monitoring/errors.go:
type MonitoringError struct {
    Op      string // Operation that failed
    Type    string // Error type (system, timeout, etc.)
    Message string // Human-readable message
    Err     error  // Underlying error
}

func NewSystemError(op, message string, err error) *MonitoringError {
    return &MonitoringError{
        Op:      op,
        Type:    "system_error",
        Message: message,
        Err:     err,
    }
}

func NewTimeoutError(op, message string) *MonitoringError {
    return &MonitoringError{
        Op:      op,
        Type:    "timeout",
        Message: message,
    }
}

Best Practices

  1. Use context timeouts (5 seconds recommended) to prevent hangs
  2. Handle partial errors — some metrics may fail while others succeed
  3. Leverage caching — don’t collect stats more than once per 2 seconds
  4. Monitor goroutine count — watch for leaks
  5. Track request rate — use for auto-scaling decisions
  6. Set up alerts — trigger notifications on high CPU/memory usage
  7. Check temperature data — not all systems provide sensor information

Performance Impact

  • CPU overhead: ~0.5% per collection (cached)
  • Memory overhead: ~50KB for cached stats
  • Collection time: 10-50ms (with caching: <1ms)
  • Goroutines: +1 for analytics session cleanup