Add queue mechanism

Signed-off-by: Nikolaos Karaolidis <nick@karaolidis.com>
This commit is contained in:
2023-06-29 16:05:27 +03:00
parent 47478a9a89
commit e0b329fd65
4 changed files with 202 additions and 37 deletions

1
.gitignore vendored
View File

@@ -7,6 +7,7 @@
*.dll *.dll
*.so *.so
*.dylib *.dylib
clickhouse
# Test binary, built with `go test -c` # Test binary, built with `go test -c`
*.test *.test

Binary file not shown.

View File

@@ -2,16 +2,17 @@
package sql package sql
import ( import (
gosql "database/sql" "database/sql"
_ "embed" _ "embed"
"fmt" "fmt"
"math"
"strings" "strings"
"sync"
"time" "time"
_ "github.com/ClickHouse/clickhouse-go" _ "github.com/ClickHouse/clickhouse-go"
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/config"
"github.com/influxdata/telegraf/plugins/outputs" "github.com/influxdata/telegraf/plugins/outputs"
"github.com/wk8/go-ordered-map/v2" "github.com/wk8/go-ordered-map/v2"
) )
@@ -35,13 +36,19 @@ type ClickHouse struct {
TableMode string `toml:"table_mode"` TableMode string `toml:"table_mode"`
SingleTableOptions SingleTableOptions `toml:"single_table"` SingleTableOptions SingleTableOptions `toml:"single_table"`
MultiTableOptions MultiTableOptions `toml:"multi_table"` MultiTableOptions MultiTableOptions `toml:"multi_table"`
ConnectionMaxIdleTime config.Duration `toml:"connection_max_idle_time"` QueueSize int `toml:"queue_size"`
ConnectionMaxLifetime config.Duration `toml:"connection_max_lifetime"` QueueLimit int `toml:"queue_limit"`
FlushInterval time.Duration `toml:"flush_interval"`
ConnectionMaxIdleTime time.Duration `toml:"connection_max_idle_time"`
ConnectionMaxLifetime time.Duration `toml:"connection_max_lifetime"`
ConnectionMaxIdle int `toml:"connection_max_idle"` ConnectionMaxIdle int `toml:"connection_max_idle"`
ConnectionMaxOpen int `toml:"connection_max_open"` ConnectionMaxOpen int `toml:"connection_max_open"`
db *gosql.DB db *sql.DB
Log telegraf.Logger `toml:"-"` Log telegraf.Logger
metricQueue []telegraf.Metric
metricLock sync.Mutex
metricTrigger chan struct{}
} }
func (*ClickHouse) SampleConfig() string { func (*ClickHouse) SampleConfig() string {
@@ -54,26 +61,47 @@ func (p *ClickHouse) Init() error {
} }
if p.TimestampColumn == "" { if p.TimestampColumn == "" {
fmt.Println("timestamp_column is not set, using default value: timestamp")
p.TimestampColumn = "timestamp" p.TimestampColumn = "timestamp"
p.Log.Info("timestamp_column is not set, using default value:", p.TimestampColumn)
} }
if p.TableMode == "" { if p.TableMode == "" {
fmt.Println("table_mode is not set, using default value: single") p.TableMode = "multi"
p.TableMode = "single" p.Log.Info("table_mode is not set, using default value:", p.TableMode)
} else if p.TableMode != "single" && p.TableMode != "multi" { } else if p.TableMode != "single" && p.TableMode != "multi" {
return fmt.Errorf("table_mode must be one of: single, multi") return fmt.Errorf("table_mode must be one of: single, multi")
} }
if p.TableMode == "single" && p.SingleTableOptions.TableName == "" { if p.TableMode == "single" && p.SingleTableOptions.TableName == "" {
p.SingleTableOptions.TableName = "telegraf" p.SingleTableOptions.TableName = "telegraf"
p.Log.Info("table_name is not set, using default value:", p.SingleTableOptions.TableName)
} }
if p.QueueSize <= 0 {
p.QueueSize = 100000
p.Log.Info("queue_size is not set, using default value:", p.QueueSize)
}
if p.QueueLimit <= 0 {
p.QueueLimit = int(math.MaxUint64 >> 1)
p.Log.Info("queue_limit is not set, using default value:", p.QueueLimit)
}
if p.FlushInterval <= 0 {
p.FlushInterval = 5 * time.Second
p.Log.Info("flush_interval is not set, using default value:", p.FlushInterval)
}
p.metricQueue = make([]telegraf.Metric, 0, p.QueueSize)
p.metricTrigger = make(chan struct{}, 1)
go p.metricWriter(p.FlushInterval)
return nil return nil
} }
func (p *ClickHouse) Connect() error { func (p *ClickHouse) Connect() error {
db, err := gosql.Open("clickhouse", p.DataSourceName) db, err := sql.Open("clickhouse", p.DataSourceName)
if err != nil { if err != nil {
return err return err
} }
@@ -96,7 +124,7 @@ func (p *ClickHouse) Connect() error {
} }
p.db = db p.db = db
fmt.Println("Connected to ClickHouse!") p.Log.Info("Connected to ClickHouse!")
return nil return nil
} }
@@ -122,7 +150,7 @@ func quoteIdent(name string) string {
return `"` + strings.ReplaceAll(sanitizeQuoted(name), `"`, `""`) + `"` return `"` + strings.ReplaceAll(sanitizeQuoted(name), `"`, `""`) + `"`
} }
func (p *ClickHouse) deriveDatatype(value interface{}) string { func (p *ClickHouse) valueToDatatype(value interface{}) string {
var datatype string var datatype string
switch value.(type) { switch value.(type) {
@@ -146,6 +174,36 @@ func (p *ClickHouse) deriveDatatype(value interface{}) string {
return datatype return datatype
} }
// TODO: fix this fuckyness
type NullUint64 struct {
Uint64 uint64
Valid bool
}
func (p *ClickHouse) datatypeToNullable(datatype string) interface{} {
var nullable interface{}
switch datatype {
case "Int64":
nullable = int64(0) // sql.NullInt64{}
case "UInt64":
nullable = uint64(0) // NullUint64{}
case "Float64":
nullable = float64(0) // sql.NullFloat64{}
case "String":
nullable = "" // sql.NullString{}
case "UInt8":
nullable = false // sql.NullBool{}
case "DateTime":
nullable = time.Unix(0, 0) // sql.NullTime{}
default:
nullable = "" // sql.NullString{}
p.Log.Errorf("unknown datatype %s", datatype)
}
return nullable
}
func (p *ClickHouse) generateCreateTable(tablename string, columns *orderedmap.OrderedMap[string, string]) string { func (p *ClickHouse) generateCreateTable(tablename string, columns *orderedmap.OrderedMap[string, string]) string {
columnDefs := make([]string, 0, columns.Len()) columnDefs := make([]string, 0, columns.Len())
@@ -190,7 +248,7 @@ func (p *ClickHouse) generateAlterTable(tablename string, columns *orderedmap.Or
} }
func (p *ClickHouse) ensureTable(tablename string, columns *orderedmap.OrderedMap[string, string]) error { func (p *ClickHouse) ensureTable(tablename string, columns *orderedmap.OrderedMap[string, string]) error {
var res *gosql.Rows var res *sql.Rows
var err error var err error
for { for {
@@ -203,7 +261,7 @@ func (p *ClickHouse) ensureTable(tablename string, columns *orderedmap.OrderedMa
if err != nil { if err != nil {
return err return err
} }
fmt.Println("Created table", tablename) p.Log.Info("Created table", tablename)
continue continue
} }
return err return err
@@ -232,7 +290,7 @@ func (p *ClickHouse) ensureTable(tablename string, columns *orderedmap.OrderedMa
if err != nil { if err != nil {
return err return err
} }
fmt.Println("Altered table", tablename) p.Log.Info("Altered table", tablename)
break break
} }
} }
@@ -249,12 +307,22 @@ func (p *ClickHouse) generateInsert(tablename string, columns *orderedmap.Ordere
placeholder := "(" + strings.Repeat("?,", columns.Len()-1) + "?)" placeholder := "(" + strings.Repeat("?,", columns.Len()-1) + "?)"
placeholders := strings.Repeat(placeholder+",", batchSize-1) + placeholder placeholders := strings.Repeat(placeholder+",", batchSize-1) + placeholder
return fmt.Sprintf("INSERT INTO %s(%s) VALUES %s", return fmt.Sprintf("INSERT INTO %s (%s) VALUES %s",
quoteIdent(tablename), quoteIdent(tablename),
strings.Join(quotedColumns, ", "), strings.Join(quotedColumns, ", "),
placeholders) placeholders)
} }
func (p *ClickHouse) nullifyMissingValues(columns *orderedmap.OrderedMap[string, string], metrics []map[string]interface{}) {
for _, metric := range metrics {
for pair := columns.Oldest(); pair != nil; pair = pair.Next() {
if _, ok := metric[pair.Key]; !ok {
metric[pair.Key] = p.datatypeToNullable(pair.Value)
}
}
}
}
func (p *ClickHouse) writeMetrics(tablename string, columns *orderedmap.OrderedMap[string, string], metrics []map[string]interface{}) error { func (p *ClickHouse) writeMetrics(tablename string, columns *orderedmap.OrderedMap[string, string], metrics []map[string]interface{}) error {
err := p.ensureTable(tablename, columns) err := p.ensureTable(tablename, columns)
if err != nil { if err != nil {
@@ -262,7 +330,6 @@ func (p *ClickHouse) writeMetrics(tablename string, columns *orderedmap.OrderedM
} }
sql := p.generateInsert(tablename, columns, len(metrics)) sql := p.generateInsert(tablename, columns, len(metrics))
values := make([]interface{}, 0, len(metrics)*columns.Len())
tx, err := p.db.Begin() tx, err := p.db.Begin()
if err != nil { if err != nil {
@@ -275,18 +342,25 @@ func (p *ClickHouse) writeMetrics(tablename string, columns *orderedmap.OrderedM
} }
defer stmt.Close() defer stmt.Close()
values := make([][]interface{}, 0, len(metrics))
for _, metric := range metrics { for _, metric := range metrics {
value := make([]interface{}, 0, columns.Len())
for pair := columns.Oldest(); pair != nil; pair = pair.Next() { for pair := columns.Oldest(); pair != nil; pair = pair.Next() {
values = append(values, metric[pair.Key]) value = append(value, metric[pair.Key])
} }
values = append(values, value)
} }
start := time.Now() for _, value := range values {
_, err = stmt.Exec(values...) _, err = stmt.Exec(value...)
if err != nil {
return fmt.Errorf("exec failed: %w", err)
}
}
if err != nil { if err != nil {
return fmt.Errorf("exec failed: %w", err) return fmt.Errorf("exec failed: %w", err)
} }
p.Log.Debugf("Wrote %d metrics to %s in %s", len(metrics), tablename, time.Since(start))
err = tx.Commit() err = tx.Commit()
if err != nil { if err != nil {
@@ -318,21 +392,25 @@ func (p *ClickHouse) WriteMultiTable(metrics []telegraf.Metric) error {
metricEntry := make(map[string]interface{}) metricEntry := make(map[string]interface{})
metricEntry[p.TimestampColumn] = metric.Time() metricEntry[p.TimestampColumn] = metric.Time()
columns[tablename].Set(p.TimestampColumn, p.deriveDatatype(metric.Time())) columns[tablename].Set(p.TimestampColumn, p.valueToDatatype(metric.Time()))
for _, tag := range metric.TagList() { for _, tag := range metric.TagList() {
metricEntry[tag.Key] = tag.Value metricEntry[tag.Key] = tag.Value
columns[tablename].Set(tag.Key, p.deriveDatatype(tag.Value)) columns[tablename].Set(tag.Key, p.valueToDatatype(tag.Value))
} }
for _, field := range metric.FieldList() { for _, field := range metric.FieldList() {
metricEntry[field.Key] = field.Value metricEntry[field.Key] = field.Value
columns[tablename].Set(field.Key, p.deriveDatatype(field.Value)) columns[tablename].Set(field.Key, p.valueToDatatype(field.Value))
} }
metricsData[tablename] = append(metricsData[tablename], metricEntry) metricsData[tablename] = append(metricsData[tablename], metricEntry)
} }
p.Log.Debugf("Prepared %d metrics for writing in %s", len(metrics), time.Since(start))
for tablename, metrics := range metricsData {
p.nullifyMissingValues(columns[tablename], metrics)
}
p.Log.Infof("Prepared %d metrics for writing in %s\n", len(metrics), time.Since(start))
start = time.Now() start = time.Now()
for tablename, metrics := range metricsData { for tablename, metrics := range metricsData {
@@ -341,7 +419,7 @@ func (p *ClickHouse) WriteMultiTable(metrics []telegraf.Metric) error {
return err return err
} }
} }
p.Log.Debugf("Wrote %d metrics to %d tables in %s", len(metrics), len(metricsData), time.Since(start)) p.Log.Infof("Wrote %d metrics to %d tables in %s\n", len(metrics), len(metricsData), time.Since(start))
return nil return nil
} }
@@ -357,43 +435,97 @@ func (p *ClickHouse) WriteSingleTable(metrics []telegraf.Metric) error {
metricEntry := make(map[string]interface{}) metricEntry := make(map[string]interface{})
metricEntry[p.TimestampColumn] = metric.Time() metricEntry[p.TimestampColumn] = metric.Time()
columns.Set(p.TimestampColumn, p.deriveDatatype(metric.Time())) columns.Set(p.TimestampColumn, p.valueToDatatype(metric.Time()))
metricEntry["measurement"] = metricName metricEntry["measurement"] = metricName
columns.Set("measurement", p.deriveDatatype(metricName)) columns.Set("measurement", p.valueToDatatype(metricName))
for _, tag := range metric.TagList() { for _, tag := range metric.TagList() {
colName := fmt.Sprintf("%s_%s", metricName, tag.Key) colName := fmt.Sprintf("%s_%s", metricName, tag.Key)
metricEntry[colName] = tag.Value metricEntry[colName] = tag.Value
columns.Set(colName, p.deriveDatatype(tag.Value)) columns.Set(colName, p.valueToDatatype(tag.Value))
} }
for _, field := range metric.FieldList() { for _, field := range metric.FieldList() {
colName := fmt.Sprintf("%s_%s", metricName, field.Key) colName := fmt.Sprintf("%s_%s", metricName, field.Key)
metricEntry[colName] = field.Value metricEntry[colName] = field.Value
columns.Set(colName, p.deriveDatatype(field.Value)) columns.Set(colName, p.valueToDatatype(field.Value))
}
for pair := columns.Oldest(); pair != nil; pair = pair.Next() {
if _, ok := metricEntry[pair.Key]; !ok {
metricEntry[pair.Key] = p.datatypeToNullable(pair.Value)
}
} }
metricsData = append(metricsData, metricEntry) metricsData = append(metricsData, metricEntry)
} }
p.Log.Debugf("Prepared %d metrics for writing in %s", len(metrics), time.Since(start))
p.nullifyMissingValues(columns, metricsData)
p.Log.Infof("Prepared %d metrics for writing in %s\n", len(metrics), time.Since(start))
start = time.Now() start = time.Now()
err := p.writeMetrics(tablename, columns, metricsData) err := p.writeMetrics(tablename, columns, metricsData)
if err != nil { if err != nil {
return err return err
} }
p.Log.Debugf("Wrote %d metrics to %s in %s", len(metrics), tablename, time.Since(start)) p.Log.Infof("Wrote %d metrics to %s in %s\n", len(metrics), tablename, time.Since(start))
return nil return nil
} }
func (p *ClickHouse) Write(metrics []telegraf.Metric) error { func (p *ClickHouse) Write(metrics []telegraf.Metric) error {
if p.TableMode == "single" { p.metricLock.Lock()
return p.WriteSingleTable(metrics) if len(p.metricQueue) >= p.QueueLimit {
p.Log.Errorf("Metrics queue is full (%d/%d), dropping metrics", len(p.metricQueue), p.QueueLimit)
} else {
p.metricQueue = append(p.metricQueue, metrics...)
}
p.metricLock.Unlock()
select {
case p.metricTrigger <- struct{}{}:
default:
} }
return p.WriteMultiTable(metrics) return nil
}
func (p *ClickHouse) metricWriter(delay time.Duration) {
timer := time.NewTimer(delay)
defer timer.Stop()
for {
select {
case <-timer.C:
p.metricLock.Lock()
metrics := p.metricQueue
p.metricQueue = nil
p.metricLock.Unlock()
if len(metrics) > 0 {
if p.TableMode == "single" {
err := p.WriteSingleTable(metrics)
if err != nil {
p.Log.Errorf("Error writing to ClickHouse: %s", err)
}
} else {
err := p.WriteMultiTable(metrics)
if err != nil {
p.Log.Errorf("Error writing to ClickHouse: %s", err)
}
}
}
timer.Reset(delay)
case <-p.metricTrigger:
if !timer.Stop() {
<-timer.C
}
timer.Reset(delay)
}
}
} }
func init() { func init() {

View File

@@ -1,11 +1,43 @@
# Save metrics to an SQL Database # Save metrics to an SQL Database
[[outputs.sql]] [[outputs.clickhouse]]
## Data source name ## Data source name
# data_source_name = "" data_source_name = ""
## Initialization SQL ## Initialization SQL
# init_sql = "" # init_sql = ""
## Timestamp column name
# timestamp_column = "timestamp"
## Default TTL for data in the table (use ClickHouse syntax)
ttl = "3 MONTH"
## Table operation mode
## Set to "single" to create a single table for all metrics.
## Set to "multi" to create a new table for each metric.
# table_mode = "multi"
## Single table configuration
# [outputs.clickhouse.single_table]
## Table name
# table_name = "telegraf"
## Multi table configuration
# [outputs.clickhouse.multi_table]
## Table name prefix
# table_prefix = "telegraf"
## Initial metric queue size, resizes automatically if the queue becomes too large
# queue_size = 100000
## Maximum queue size, 0 means unlimited.
## If the queue reaches this size, new writes will be dropped until it drains.
# queue_limit = 0
## Flush interval for the metric queue
## The agent waits until N seconds have passed without any writes before flushing metrics to ClickHouse.
# queue_flush_interval = "5"
## Maximum amount of time a connection may be idle. "0s" means connections are ## Maximum amount of time a connection may be idle. "0s" means connections are
## never closed due to idle time. ## never closed due to idle time.
# connection_max_idle_time = "0s" # connection_max_idle_time = "0s"