Compare commits
	
		
			2 Commits
		
	
	
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 44ecd2d49c | |||
| 8958845f65 | 
| @ -40,3 +40,9 @@ Sending POST on /reload : | ||||
| curl -XPOST http://my-nodegopher-host:8080/reload | ||||
| {"message":"configuration successfully reloaded"} | ||||
| ``` | ||||
|  | ||||
| Reloading a badly formated configuration will produce an error and keep the old configuration running.   | ||||
| ``` | ||||
| % curl -XPOST 127.1:8080/reload | ||||
| {"error":"Unable to load new configuration, keeping old one. See logs."} | ||||
| ``` | ||||
|  | ||||
| @ -1,4 +1,4 @@ | ||||
| # Formatting metrics in main & secondarystat. Supported: "english", "french", "german", "ukrainian", "chinese", "arabic" | ||||
| # Formatting metrics in main & secondarystat. Supported: "english", "french", "german", "ukrainian", "chinese", "arabic". Default is english. | ||||
| language: 'english' | ||||
|  | ||||
| # datasource describe a way to get prometheus metrics. | ||||
| @ -7,51 +7,44 @@ language: 'english' | ||||
| # - address: the address of prometheus. | ||||
| # - query: prometheus query. Same as typed in prometheus graph page. | ||||
| # - type: type of query. "query" will get instant value, "query_range" will get all samples for the grafana period. Result will be averaged. | ||||
| # - timeout: query timeout in seconds. | ||||
| # - timeout: query timeout in seconds. default is 10. | ||||
| datasources: | ||||
|   - name: prom_samples_per_sec | ||||
|     type: query | ||||
|     address: 'http://prometheus.local.lan:9090' | ||||
|     query: 'rate(prometheus_tsdb_head_samples_appended_total{type="float"}[10m])' | ||||
|     timeout: 10 | ||||
|     timeout: 15 | ||||
|   - name: node_cpu_metric | ||||
|     # Simple query, return an instant metric | ||||
|     type: query | ||||
|     address: 'http://prometheus.local.lan:9090' | ||||
|     query: 'sum(rate(node_cpu_seconds_total{instance="router01.local.lan:9100",job="node",mode!~"idle"}[30s]))*100' | ||||
|     timeout: 10 | ||||
|   - name: node_cpu_metric_over_80 | ||||
|     type: query | ||||
|     address: 'http://prometheus.local.lan:9090' | ||||
|     # Return 1 if cpu rate > 80% | ||||
|     query: '(sum(rate(node_cpu_seconds_total{instance="router01.local.lan:9100",job="node",mode!~"idle"}[30s]))*100) > bool 80' | ||||
|     timeout: 10 | ||||
|   - name: router01_net_down_rate | ||||
|     # Range query. Return all metrics from a time range. Result will be averaged from these metrics. Time range will be provided by Grafana. | ||||
|     type: query_range | ||||
|     address: 'http://prometheus.local.lan:9090' | ||||
|     query: 'rate(node_network_receive_bytes_total{device="igb0", instance="router01.local.lan:9100", job="node"}[30s])' | ||||
|     timeout: 10 | ||||
|   - name: router01_net_up_rate | ||||
|     type: query_range | ||||
|     address: 'http://prometheus.local.lan:9090' | ||||
|     query: 'rate(node_network_transmit_bytes_total{device="igb0", instance="router01.local.lan:9100", job="node"}[30s])' | ||||
|     timeout: 10 | ||||
|   - name: router01_lan_down_rate | ||||
|     type: query_range | ||||
|     address: 'http://prometheus.local.lan:9090' | ||||
|     query: 'rate(node_network_receive_bytes_total{device="ix3", instance="router01.local.lan:9100", job="node"}[30s])' | ||||
|     timeout: 10 | ||||
|   - name: router01_lan_up_rate | ||||
|     type: query_range | ||||
|     address: 'http://prometheus.local.lan:9090' | ||||
|     query: 'rate(node_network_transmit_bytes_total{device="ix3", instance="router01.local.lan:9100", job="node"}[30s])' | ||||
|     timeout: 10 | ||||
|   - name: router01_net_down_rate_perten | ||||
|     type: query | ||||
|     address: 'http://prometheus.local.lan:9090' | ||||
|     query: 'rate(node_network_receive_bytes_total{device="igb0", instance="router01.local.lan:9100", job="node"}[30s])/62500000*10' | ||||
|     timeout: 10 | ||||
|  | ||||
| # graphs identifies context for a nodegraph. You can have many contexts, and your grafana query will mention this context name. | ||||
| #  For this example named "internet", grafana URL will be : | ||||
|  | ||||
							
								
								
									
										58
									
								
								main.go
									
									
									
									
									
								
							
							
						
						
									
										58
									
								
								main.go
									
									
									
									
									
								
							| @ -34,7 +34,7 @@ import ( | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| 	gVersion = "0.2.4" | ||||
| 	gVersion = "0.2.5" | ||||
| 	// Default datasource timeout is 10 seconds | ||||
| 	gDefaultDSTimeout = 10 | ||||
| ) | ||||
| @ -329,7 +329,7 @@ func getGraph(name string) (Graph, error) { | ||||
| 	return Graph{}, fmt.Errorf("Graph not found: %s", name) | ||||
| } | ||||
|  | ||||
| func initRoutes(r *gin.Engine) { | ||||
| func initRoutes(r *gin.Engine, confFile string) { | ||||
| 	r.GET("/ping", func(c *gin.Context) { | ||||
| 		c.JSON(http.StatusOK, gin.H{ | ||||
| 			"message": "pong", | ||||
| @ -338,7 +338,10 @@ func initRoutes(r *gin.Engine) { | ||||
| 	 | ||||
| 	// An endpoint to force read of configuration file | ||||
| 	r.POST("/reload", func(c *gin.Context) { | ||||
| 		reloadConfigFile() | ||||
| 		if err := reloadConfigFile(confFile); err != nil { | ||||
| 			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) | ||||
| 			return | ||||
| 		} | ||||
| 		c.JSON(http.StatusOK, gin.H{ | ||||
| 			"message": "configuration successfully reloaded", | ||||
| 		}) | ||||
| @ -469,16 +472,32 @@ func newEdgeClone(src *Edge) *Edge { | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func reloadConfigFile() { | ||||
| 	// First reread config file | ||||
| 	if err := viper.ReadInConfig(); err != nil { | ||||
| 		if _, ok := err.(viper.ConfigFileNotFoundError); ok { | ||||
| 			log.Fatalf("config file not found") | ||||
| 			os.Exit(1) | ||||
| // This function assume we already have a running configuration. | ||||
| func reloadConfigFile(confFile string) error { | ||||
| 	oldConfigRestored := false | ||||
| 	// We need to keep this config, incase the new one is b0rken | ||||
| 	fname := fmt.Sprintf("/tmp/nodegopher.%d.yaml", os.Getpid()) | ||||
| 	if err := viper.WriteConfigAs(fname); err != nil { | ||||
| 		log.Errorf("Unable to save current running config to %s, wont reload configuration.\n", fname) | ||||
| 		return fmt.Errorf("Unable to save current configuration, configuration not reloaded. See logs.") | ||||
| 	} | ||||
| 	defer os.Remove(fname) | ||||
|  | ||||
| 	// Reread config file | ||||
| 	if oldErr := viper.ReadInConfig(); oldErr != nil { | ||||
| 		if _, ok := oldErr.(viper.ConfigFileNotFoundError); ok { | ||||
| 			log.Errorf("config file not found") | ||||
| 		} else { | ||||
| 			log.Fatalf("unknown error looking for config file: %v", err) | ||||
| 			os.Exit(1) | ||||
| 			log.Errorf("unknown error looking for config file: %v", oldErr) | ||||
| 		} | ||||
| 		// Restore old configuration and notify. | ||||
| 		log.Debugf("Fallback on previous configuration.\n") | ||||
| 		viper.SetConfigFile(fname) | ||||
| 		if err := viper.ReadInConfig(); err != nil { | ||||
| 			log.Fatalf("Unable to restore configuration, and new is invalid. fix it now.\n") | ||||
| 		} | ||||
| 		viper.SetConfigFile(confFile) | ||||
| 		oldConfigRestored = true | ||||
| 	} | ||||
|  | ||||
| 	switch viper.Get("language").(string) { | ||||
| @ -503,6 +522,7 @@ func reloadConfigFile() { | ||||
| 	gCfgMutex.Lock() | ||||
| 	defer gCfgMutex.Unlock() | ||||
|  | ||||
| 	// We need to keep this config, incase the new one is b0rken | ||||
| 	for _, g := range gGraphs { | ||||
| 		g.Nodes = nil | ||||
| 		g.Edges = nil | ||||
| @ -538,8 +558,8 @@ func reloadConfigFile() { | ||||
| 	} | ||||
|  | ||||
| 	if viper.Get("datasources") == nil { | ||||
| 		log.Printf("no datasources found, data will be static") | ||||
| 		return | ||||
| 		log.Warningf("no datasources found, data will be static") | ||||
| 		return nil | ||||
| 	} | ||||
| 	dss := viper.Get("datasources").([]interface{}) | ||||
| 	for _, d := range dss { | ||||
| @ -552,6 +572,10 @@ func reloadConfigFile() { | ||||
| 		} | ||||
| 		gDataSources = append(gDataSources, ds) | ||||
| 	} | ||||
| 	if oldConfigRestored { | ||||
| 		return fmt.Errorf("Unable to load new configuration, keeping old one. See logs.") | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func main() { | ||||
| @ -604,7 +628,7 @@ func main() { | ||||
| 	// FIXME: Watch config changes. Does not work on FreeBSD. TODO: Test with linux | ||||
| 	viper.OnConfigChange(func(e fsnotify.Event) { | ||||
| 		log.Printf("Config file changed, reloading data\n") | ||||
| 		reloadConfigFile() | ||||
| 		reloadConfigFile(confFile) | ||||
| 	}) | ||||
|  | ||||
| 	// Lets reload config on SIGHUP | ||||
| @ -614,11 +638,11 @@ func main() { | ||||
| 		for { | ||||
| 			_ = <- sigs | ||||
| 			log.Infof("SIGHUP received, reloading configuration\n") | ||||
| 			reloadConfigFile() | ||||
| 			reloadConfigFile(confFile) | ||||
| 		} | ||||
| 	}() | ||||
|  | ||||
| 	reloadConfigFile() | ||||
| 	reloadConfigFile(confFile) | ||||
|  | ||||
| 	// Capture variable name. There should be only one variable. Space is tolerated before and after name. | ||||
| 	gDSVarCompRegex = regexp.MustCompile(`^\{\{(?:\ )?([a-zA-Z0-9\-_]+)(?:\ )?\}\}$`) | ||||
| @ -629,6 +653,6 @@ func main() { | ||||
| 	log.Printf("Starting NodeGopher v.%s\n", gVersion) | ||||
|  | ||||
| 	r := gin.Default() | ||||
| 	initRoutes(r) | ||||
| 	initRoutes(r, confFile) | ||||
| 	r.Run(listen) | ||||
| } | ||||
|  | ||||
		Reference in New Issue
	
	Block a user