Compare commits
3 Commits
Author | SHA1 | Date | |
---|---|---|---|
44ecd2d49c | |||
8958845f65 | |||
fe6ca819f4 |
@ -40,3 +40,9 @@ Sending POST on /reload :
|
|||||||
curl -XPOST http://my-nodegopher-host:8080/reload
|
curl -XPOST http://my-nodegopher-host:8080/reload
|
||||||
{"message":"configuration successfully reloaded"}
|
{"message":"configuration successfully reloaded"}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Reloading a badly formated configuration will produce an error and keep the old configuration running.
|
||||||
|
```
|
||||||
|
% curl -XPOST 127.1:8080/reload
|
||||||
|
{"error":"Unable to load new configuration, keeping old one. See logs."}
|
||||||
|
```
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
# Formatting metrics in main & secondarystat. Supported: "english", "french", "german", "ukrainian", "chinese", "arabic"
|
# Formatting metrics in main & secondarystat. Supported: "english", "french", "german", "ukrainian", "chinese", "arabic". Default is english.
|
||||||
language: 'english'
|
language: 'english'
|
||||||
|
|
||||||
# datasource describe a way to get prometheus metrics.
|
# datasource describe a way to get prometheus metrics.
|
||||||
@ -7,51 +7,44 @@ language: 'english'
|
|||||||
# - address: the address of prometheus.
|
# - address: the address of prometheus.
|
||||||
# - query: prometheus query. Same as typed in prometheus graph page.
|
# - query: prometheus query. Same as typed in prometheus graph page.
|
||||||
# - type: type of query. "query" will get instant value, "query_range" will get all samples for the grafana period. Result will be averaged.
|
# - type: type of query. "query" will get instant value, "query_range" will get all samples for the grafana period. Result will be averaged.
|
||||||
# - timeout: query timeout in seconds.
|
# - timeout: query timeout in seconds. default is 10.
|
||||||
datasources:
|
datasources:
|
||||||
- name: prom_samples_per_sec
|
- name: prom_samples_per_sec
|
||||||
type: query
|
type: query
|
||||||
address: 'http://prometheus.local.lan:9090'
|
address: 'http://prometheus.local.lan:9090'
|
||||||
query: 'rate(prometheus_tsdb_head_samples_appended_total{type="float"}[10m])'
|
query: 'rate(prometheus_tsdb_head_samples_appended_total{type="float"}[10m])'
|
||||||
timeout: 10
|
timeout: 15
|
||||||
- name: node_cpu_metric
|
- name: node_cpu_metric
|
||||||
# Simple query, return an instant metric
|
# Simple query, return an instant metric
|
||||||
type: query
|
type: query
|
||||||
address: 'http://prometheus.local.lan:9090'
|
address: 'http://prometheus.local.lan:9090'
|
||||||
query: 'sum(rate(node_cpu_seconds_total{instance="router01.local.lan:9100",job="node",mode!~"idle"}[30s]))*100'
|
query: 'sum(rate(node_cpu_seconds_total{instance="router01.local.lan:9100",job="node",mode!~"idle"}[30s]))*100'
|
||||||
timeout: 10
|
|
||||||
- name: node_cpu_metric_over_80
|
- name: node_cpu_metric_over_80
|
||||||
type: query
|
type: query
|
||||||
address: 'http://prometheus.local.lan:9090'
|
address: 'http://prometheus.local.lan:9090'
|
||||||
# Return 1 if cpu rate > 80%
|
# Return 1 if cpu rate > 80%
|
||||||
query: '(sum(rate(node_cpu_seconds_total{instance="router01.local.lan:9100",job="node",mode!~"idle"}[30s]))*100) > bool 80'
|
query: '(sum(rate(node_cpu_seconds_total{instance="router01.local.lan:9100",job="node",mode!~"idle"}[30s]))*100) > bool 80'
|
||||||
timeout: 10
|
|
||||||
- name: router01_net_down_rate
|
- name: router01_net_down_rate
|
||||||
# Range query. Return all metrics from a time range. Result will be averaged from these metrics. Time range will be provided by Grafana.
|
# Range query. Return all metrics from a time range. Result will be averaged from these metrics. Time range will be provided by Grafana.
|
||||||
type: query_range
|
type: query_range
|
||||||
address: 'http://prometheus.local.lan:9090'
|
address: 'http://prometheus.local.lan:9090'
|
||||||
query: 'rate(node_network_receive_bytes_total{device="igb0", instance="router01.local.lan:9100", job="node"}[30s])'
|
query: 'rate(node_network_receive_bytes_total{device="igb0", instance="router01.local.lan:9100", job="node"}[30s])'
|
||||||
timeout: 10
|
|
||||||
- name: router01_net_up_rate
|
- name: router01_net_up_rate
|
||||||
type: query_range
|
type: query_range
|
||||||
address: 'http://prometheus.local.lan:9090'
|
address: 'http://prometheus.local.lan:9090'
|
||||||
query: 'rate(node_network_transmit_bytes_total{device="igb0", instance="router01.local.lan:9100", job="node"}[30s])'
|
query: 'rate(node_network_transmit_bytes_total{device="igb0", instance="router01.local.lan:9100", job="node"}[30s])'
|
||||||
timeout: 10
|
|
||||||
- name: router01_lan_down_rate
|
- name: router01_lan_down_rate
|
||||||
type: query_range
|
type: query_range
|
||||||
address: 'http://prometheus.local.lan:9090'
|
address: 'http://prometheus.local.lan:9090'
|
||||||
query: 'rate(node_network_receive_bytes_total{device="ix3", instance="router01.local.lan:9100", job="node"}[30s])'
|
query: 'rate(node_network_receive_bytes_total{device="ix3", instance="router01.local.lan:9100", job="node"}[30s])'
|
||||||
timeout: 10
|
|
||||||
- name: router01_lan_up_rate
|
- name: router01_lan_up_rate
|
||||||
type: query_range
|
type: query_range
|
||||||
address: 'http://prometheus.local.lan:9090'
|
address: 'http://prometheus.local.lan:9090'
|
||||||
query: 'rate(node_network_transmit_bytes_total{device="ix3", instance="router01.local.lan:9100", job="node"}[30s])'
|
query: 'rate(node_network_transmit_bytes_total{device="ix3", instance="router01.local.lan:9100", job="node"}[30s])'
|
||||||
timeout: 10
|
|
||||||
- name: router01_net_down_rate_perten
|
- name: router01_net_down_rate_perten
|
||||||
type: query
|
type: query
|
||||||
address: 'http://prometheus.local.lan:9090'
|
address: 'http://prometheus.local.lan:9090'
|
||||||
query: 'rate(node_network_receive_bytes_total{device="igb0", instance="router01.local.lan:9100", job="node"}[30s])/62500000*10'
|
query: 'rate(node_network_receive_bytes_total{device="igb0", instance="router01.local.lan:9100", job="node"}[30s])/62500000*10'
|
||||||
timeout: 10
|
|
||||||
|
|
||||||
# graphs identifies context for a nodegraph. You can have many contexts, and your grafana query will mention this context name.
|
# graphs identifies context for a nodegraph. You can have many contexts, and your grafana query will mention this context name.
|
||||||
# For this example named "internet", grafana URL will be :
|
# For this example named "internet", grafana URL will be :
|
||||||
|
130
main.go
130
main.go
@ -34,7 +34,9 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
gVersion = "0.2.3"
|
gVersion = "0.2.5"
|
||||||
|
// Default datasource timeout is 10 seconds
|
||||||
|
gDefaultDSTimeout = 10
|
||||||
)
|
)
|
||||||
|
|
||||||
type PromDataSourceConfig struct {
|
type PromDataSourceConfig struct {
|
||||||
@ -120,7 +122,7 @@ func (d *PromDataSourceConfig) GetData(timeRange *MyRange) (float64, error) {
|
|||||||
result, warnings, err = v1api.QueryRange(ctx, d.Query, rng, v1.WithTimeout(time.Duration(d.Timeout)*time.Second))
|
result, warnings, err = v1api.QueryRange(ctx, d.Query, rng, v1.WithTimeout(time.Duration(d.Timeout)*time.Second))
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("DataSourceConfig.GetData: Error querying Prometheus: %v\n", err)
|
log.Errorf("DataSourceConfig.GetData: Error querying Prometheus: %v. Query is: %s\n", err, d.Query)
|
||||||
return 0.0, err
|
return 0.0, err
|
||||||
}
|
}
|
||||||
if len(warnings) > 0 {
|
if len(warnings) > 0 {
|
||||||
@ -327,7 +329,7 @@ func getGraph(name string) (Graph, error) {
|
|||||||
return Graph{}, fmt.Errorf("Graph not found: %s", name)
|
return Graph{}, fmt.Errorf("Graph not found: %s", name)
|
||||||
}
|
}
|
||||||
|
|
||||||
func initRoutes(r *gin.Engine) {
|
func initRoutes(r *gin.Engine, confFile string) {
|
||||||
r.GET("/ping", func(c *gin.Context) {
|
r.GET("/ping", func(c *gin.Context) {
|
||||||
c.JSON(http.StatusOK, gin.H{
|
c.JSON(http.StatusOK, gin.H{
|
||||||
"message": "pong",
|
"message": "pong",
|
||||||
@ -336,7 +338,10 @@ func initRoutes(r *gin.Engine) {
|
|||||||
|
|
||||||
// An endpoint to force read of configuration file
|
// An endpoint to force read of configuration file
|
||||||
r.POST("/reload", func(c *gin.Context) {
|
r.POST("/reload", func(c *gin.Context) {
|
||||||
reloadConfigFile()
|
if err := reloadConfigFile(confFile); err != nil {
|
||||||
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
c.JSON(http.StatusOK, gin.H{
|
c.JSON(http.StatusOK, gin.H{
|
||||||
"message": "configuration successfully reloaded",
|
"message": "configuration successfully reloaded",
|
||||||
})
|
})
|
||||||
@ -425,18 +430,76 @@ func toggleDebug() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func reloadConfigFile() {
|
// Deep copy src Node into a new memory space
|
||||||
// First reread config file
|
func newNodeClone(src *Node) *Node {
|
||||||
if err := viper.ReadInConfig(); err != nil {
|
return &Node{
|
||||||
if _, ok := err.(viper.ConfigFileNotFoundError); ok {
|
Name: src.Name,
|
||||||
log.Fatalf("config file not found")
|
Id: src.Id,
|
||||||
os.Exit(1)
|
Title: src.Title,
|
||||||
} else {
|
Subtitle: src.Subtitle,
|
||||||
log.Fatalf("unknown error looking for config file: %v", err)
|
MainStat: src.MainStat,
|
||||||
os.Exit(1)
|
MainStatQuery: src.MainStatQuery,
|
||||||
|
MainStatFormat: src.MainStatFormat,
|
||||||
|
SecondaryStat: src.SecondaryStat,
|
||||||
|
SecondaryStatQuery: src.SecondaryStatQuery,
|
||||||
|
SecondaryStatFormat: src.SecondaryStatFormat,
|
||||||
|
Color: src.Color,
|
||||||
|
Icon: src.Icon,
|
||||||
|
NodeRadius: src.NodeRadius,
|
||||||
|
Highlighted: src.Highlighted,
|
||||||
|
HighlightedQuery: src.HighlightedQuery,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Deep copy src Edge into a new memory space
|
||||||
|
func newEdgeClone(src *Edge) *Edge {
|
||||||
|
return &Edge{
|
||||||
|
Id: src.Id,
|
||||||
|
Source: src.Source,
|
||||||
|
Target: src.Target,
|
||||||
|
MainStat: src.MainStat,
|
||||||
|
MainStatQuery: src.MainStatQuery,
|
||||||
|
MainStatFormat: src.MainStatFormat,
|
||||||
|
SecondaryStat: src.SecondaryStat,
|
||||||
|
SecondaryStatQuery: src.SecondaryStatQuery,
|
||||||
|
SecondaryStatFormat: src.SecondaryStatFormat,
|
||||||
|
Color: src.Color,
|
||||||
|
Thickness: src.Thickness,
|
||||||
|
ThicknessQuery: src.ThicknessQuery,
|
||||||
|
Highlighted: src.Highlighted,
|
||||||
|
HighlightedQuery: src.HighlightedQuery,
|
||||||
|
StrokeDashArray: src.StrokeDashArray,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function assume we already have a running configuration.
|
||||||
|
func reloadConfigFile(confFile string) error {
|
||||||
|
oldConfigRestored := false
|
||||||
|
// We need to keep this config, incase the new one is b0rken
|
||||||
|
fname := fmt.Sprintf("/tmp/nodegopher.%d.yaml", os.Getpid())
|
||||||
|
if err := viper.WriteConfigAs(fname); err != nil {
|
||||||
|
log.Errorf("Unable to save current running config to %s, wont reload configuration.\n", fname)
|
||||||
|
return fmt.Errorf("Unable to save current configuration, configuration not reloaded. See logs.")
|
||||||
|
}
|
||||||
|
defer os.Remove(fname)
|
||||||
|
|
||||||
|
// Reread config file
|
||||||
|
if oldErr := viper.ReadInConfig(); oldErr != nil {
|
||||||
|
if _, ok := oldErr.(viper.ConfigFileNotFoundError); ok {
|
||||||
|
log.Errorf("config file not found")
|
||||||
|
} else {
|
||||||
|
log.Errorf("unknown error looking for config file: %v", oldErr)
|
||||||
|
}
|
||||||
|
// Restore old configuration and notify.
|
||||||
|
log.Debugf("Fallback on previous configuration.\n")
|
||||||
|
viper.SetConfigFile(fname)
|
||||||
|
if err := viper.ReadInConfig(); err != nil {
|
||||||
|
log.Fatalf("Unable to restore configuration, and new is invalid. fix it now.\n")
|
||||||
|
}
|
||||||
|
viper.SetConfigFile(confFile)
|
||||||
|
oldConfigRestored = true
|
||||||
|
}
|
||||||
|
|
||||||
switch viper.Get("language").(string) {
|
switch viper.Get("language").(string) {
|
||||||
case "english":
|
case "english":
|
||||||
gPrinter = message.NewPrinter(language.English)
|
gPrinter = message.NewPrinter(language.English)
|
||||||
@ -451,7 +514,7 @@ func reloadConfigFile() {
|
|||||||
case "chinese":
|
case "chinese":
|
||||||
gPrinter = message.NewPrinter(language.Chinese)
|
gPrinter = message.NewPrinter(language.Chinese)
|
||||||
default:
|
default:
|
||||||
log.Errorf("Language not implented: %s. Fallback to english\n", viper.Get("language").(string))
|
log.Errorf("Language not implemented: %s. Fallback to english\n", viper.Get("language").(string))
|
||||||
gPrinter = message.NewPrinter(language.English)
|
gPrinter = message.NewPrinter(language.English)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -459,6 +522,7 @@ func reloadConfigFile() {
|
|||||||
gCfgMutex.Lock()
|
gCfgMutex.Lock()
|
||||||
defer gCfgMutex.Unlock()
|
defer gCfgMutex.Unlock()
|
||||||
|
|
||||||
|
// We need to keep this config, incase the new one is b0rken
|
||||||
for _, g := range gGraphs {
|
for _, g := range gGraphs {
|
||||||
g.Nodes = nil
|
g.Nodes = nil
|
||||||
g.Edges = nil
|
g.Edges = nil
|
||||||
@ -478,34 +542,40 @@ func reloadConfigFile() {
|
|||||||
Edges []Edge `yaml:"edges"`
|
Edges []Edge `yaml:"edges"`
|
||||||
}{}
|
}{}
|
||||||
yaml.Unmarshal(yd, &tmp)
|
yaml.Unmarshal(yd, &tmp)
|
||||||
var graphNodes []Item
|
|
||||||
var graphEdges []Item
|
|
||||||
for _, n := range tmp.Nodes {
|
|
||||||
graphNodes = append(graphNodes, &n)
|
|
||||||
}
|
|
||||||
for _, e := range tmp.Edges {
|
|
||||||
graphEdges = append(graphEdges, &e)
|
|
||||||
}
|
|
||||||
|
|
||||||
graph := Graph{
|
graph := Graph{
|
||||||
Name: tmp.Name,
|
Name: tmp.Name,
|
||||||
Nodes: graphNodes,
|
}
|
||||||
Edges: graphEdges,
|
for _, n := range tmp.Nodes {
|
||||||
|
// Deep copy Node so garbage collecting tmp won't pull the carpet under our feet
|
||||||
|
graph.Nodes = append(graph.Nodes, newNodeClone(&n))
|
||||||
|
}
|
||||||
|
for _, e := range tmp.Edges {
|
||||||
|
// Deep copy Edge
|
||||||
|
graph.Edges = append(graph.Edges, newEdgeClone(&e))
|
||||||
}
|
}
|
||||||
gGraphs = append(gGraphs, graph)
|
gGraphs = append(gGraphs, graph)
|
||||||
}
|
}
|
||||||
|
|
||||||
if viper.Get("datasources") == nil {
|
if viper.Get("datasources") == nil {
|
||||||
log.Printf("no datasources found, data will be static")
|
log.Warningf("no datasources found, data will be static")
|
||||||
return
|
return nil
|
||||||
}
|
}
|
||||||
dss := viper.Get("datasources").([]interface{})
|
dss := viper.Get("datasources").([]interface{})
|
||||||
for _, d := range dss {
|
for _, d := range dss {
|
||||||
yd, _ := yaml.Marshal(d)
|
yd, _ := yaml.Marshal(d)
|
||||||
var ds PromDataSourceConfig
|
var ds PromDataSourceConfig
|
||||||
yaml.Unmarshal(yd, &ds)
|
yaml.Unmarshal(yd, &ds)
|
||||||
|
// Set default Values
|
||||||
|
if ds.Timeout == 0 {
|
||||||
|
ds.Timeout = gDefaultDSTimeout
|
||||||
|
}
|
||||||
gDataSources = append(gDataSources, ds)
|
gDataSources = append(gDataSources, ds)
|
||||||
}
|
}
|
||||||
|
if oldConfigRestored {
|
||||||
|
return fmt.Errorf("Unable to load new configuration, keeping old one. See logs.")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@ -558,7 +628,7 @@ func main() {
|
|||||||
// FIXME: Watch config changes. Does not work on FreeBSD. TODO: Test with linux
|
// FIXME: Watch config changes. Does not work on FreeBSD. TODO: Test with linux
|
||||||
viper.OnConfigChange(func(e fsnotify.Event) {
|
viper.OnConfigChange(func(e fsnotify.Event) {
|
||||||
log.Printf("Config file changed, reloading data\n")
|
log.Printf("Config file changed, reloading data\n")
|
||||||
reloadConfigFile()
|
reloadConfigFile(confFile)
|
||||||
})
|
})
|
||||||
|
|
||||||
// Lets reload config on SIGHUP
|
// Lets reload config on SIGHUP
|
||||||
@ -568,11 +638,11 @@ func main() {
|
|||||||
for {
|
for {
|
||||||
_ = <- sigs
|
_ = <- sigs
|
||||||
log.Infof("SIGHUP received, reloading configuration\n")
|
log.Infof("SIGHUP received, reloading configuration\n")
|
||||||
reloadConfigFile()
|
reloadConfigFile(confFile)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
reloadConfigFile()
|
reloadConfigFile(confFile)
|
||||||
|
|
||||||
// Capture variable name. There should be only one variable. Space is tolerated before and after name.
|
// Capture variable name. There should be only one variable. Space is tolerated before and after name.
|
||||||
gDSVarCompRegex = regexp.MustCompile(`^\{\{(?:\ )?([a-zA-Z0-9\-_]+)(?:\ )?\}\}$`)
|
gDSVarCompRegex = regexp.MustCompile(`^\{\{(?:\ )?([a-zA-Z0-9\-_]+)(?:\ )?\}\}$`)
|
||||||
@ -583,6 +653,6 @@ func main() {
|
|||||||
log.Printf("Starting NodeGopher v.%s\n", gVersion)
|
log.Printf("Starting NodeGopher v.%s\n", gVersion)
|
||||||
|
|
||||||
r := gin.Default()
|
r := gin.Default()
|
||||||
initRoutes(r)
|
initRoutes(r, confFile)
|
||||||
r.Run(listen)
|
r.Run(listen)
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user