Remove single-table mode

Signed-off-by: Nikolaos Karaolidis <nick@karaolidis.com>
This commit is contained in:
2023-07-06 12:26:35 +03:00
parent bbefd86e13
commit 318155561b
4 changed files with 126 additions and 211 deletions

1
.gitignore vendored
View File

@@ -7,7 +7,6 @@
*.dll
*.so
*.dylib
clickhouse_*
# Test binary, built with `go test -c`
*.test

View File

@@ -18,6 +18,8 @@ build:
expire_in: 1d
reports:
dotenv: job.env
rules:
- if: "$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH"
release:
image: registry.gitlab.com/gitlab-org/release-cli
@@ -38,3 +40,5 @@ release:
url: "https://git.karaolidis.com/karaolidis/telegraf-clickhouse-plugin/-/jobs/$JOB_ID/artifacts/raw/telegraf-clickhouse-plugin-linux-amd64"
- name: "telegraf-clickhouse-plugin-linux-arm64"
url: "https://git.karaolidis.com/karaolidis/telegraf-clickhouse-plugin/-/jobs/$JOB_ID/artifacts/raw/telegraf-clickhouse-plugin-linux-arm64"
rules:
- if: "$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH"

View File

@@ -20,30 +20,20 @@ import (
//go:embed sample.conf
var sampleConfig string
type SingleTableOptions struct {
TableName string `toml:"table_name"`
}
type MultiTableOptions struct {
TablePrefix string `toml:"table_prefix"`
}
type ClickHouse struct {
DataSourceName string `toml:"data_source_name"`
InitSQL string `toml:"init_sql"`
TimestampColumn string `toml:"timestamp_column"`
TTL string `toml:"ttl"`
TableMode string `toml:"table_mode"`
SingleTableOptions SingleTableOptions `toml:"single_table"`
MultiTableOptions MultiTableOptions `toml:"multi_table"`
QueueInitialSize int `toml:"queue_initial_size"`
QueueMaxSize int `toml:"queue_max_size"`
QueueFlushSize int `toml:"queue_flush_size"`
QueueFlushInterval time.Duration `toml:"queue_flush_interval"`
ConnectionMaxIdleTime time.Duration `toml:"connection_max_idle_time"`
ConnectionMaxLifetime time.Duration `toml:"connection_max_lifetime"`
ConnectionMaxIdle int `toml:"connection_max_idle"`
ConnectionMaxOpen int `toml:"connection_max_open"`
DataSourceName string `toml:"data_source_name"`
InitSQL string `toml:"init_sql"`
TimestampColumn string `toml:"timestamp_column"`
TTL string `toml:"ttl"`
TablePrefix string `toml:"table_prefix"`
QueueInitialSize int `toml:"queue_initial_size"`
QueueMaxSize int `toml:"queue_max_size"`
QueueFlushSize int `toml:"queue_flush_size"`
QueueFlushInterval time.Duration `toml:"queue_flush_interval"`
ConnectionMaxIdleTime time.Duration `toml:"connection_max_idle_time"`
ConnectionMaxLifetime time.Duration `toml:"connection_max_lifetime"`
ConnectionMaxIdle int `toml:"connection_max_idle"`
ConnectionMaxOpen int `toml:"connection_max_open"`
db *sql.DB
Log telegraf.Logger
@@ -66,18 +56,6 @@ func (ch *ClickHouse) Init() error {
ch.Log.Info("timestamp_column is not set, using default value: ", ch.TimestampColumn)
}
if ch.TableMode == "" {
ch.TableMode = "multi"
ch.Log.Info("table_mode is not set, using default value: ", ch.TableMode)
} else if ch.TableMode != "single" && ch.TableMode != "multi" {
return fmt.Errorf("table_mode must be one of: single, multi")
}
if ch.TableMode == "single" && ch.SingleTableOptions.TableName == "" {
ch.SingleTableOptions.TableName = "telegraf"
ch.Log.Info("table_name is not set, using default value: ", ch.SingleTableOptions.TableName)
}
if ch.QueueInitialSize <= 0 {
ch.QueueInitialSize = 100000
ch.Log.Info("queue_initial_size is not set, using default value: ", ch.QueueInitialSize)
@@ -187,20 +165,52 @@ func (ch *ClickHouse) toNullable(pair *orderedmap.Pair[string, string]) string {
return pair.Value
}
func (ch *ClickHouse) pepareMetrics(metrics []telegraf.Metric, metricsData map[string][]map[string]interface{}, columns map[string]*orderedmap.OrderedMap[string, string]) {
for _, metric := range metrics {
tablename := metric.Name()
if ch.TablePrefix != "" {
tablename = ch.TablePrefix + "_" + tablename
}
if _, ok := metricsData[tablename]; !ok {
metricsData[tablename] = make([]map[string]interface{}, 0, len(metrics))
}
if _, ok := columns[tablename]; !ok {
columns[tablename] = orderedmap.New[string, string](len(metrics))
}
metricEntry := make(map[string]interface{})
metricEntry[ch.TimestampColumn] = metric.Time()
columns[tablename].Set(ch.TimestampColumn, ch.toDatatype(metric.Time()))
for _, tag := range metric.TagList() {
metricEntry[tag.Key] = tag.Value
columns[tablename].Set(tag.Key, ch.toDatatype(tag.Value))
}
for _, field := range metric.FieldList() {
metricEntry[field.Key] = field.Value
columns[tablename].Set(field.Key, ch.toDatatype(field.Value))
}
metricsData[tablename] = append(metricsData[tablename], metricEntry)
}
}
func (ch *ClickHouse) generateCreateTable(tablename string, columns *orderedmap.OrderedMap[string, string]) string {
columnDefs := make([]string, 0, columns.Len())
for pair := columns.Oldest(); pair != nil; pair = pair.Next() {
columnDefs = append(columnDefs, fmt.Sprintf("%s %s", quoteIdent(pair.Key), ch.toNullable(pair)))
}
orderBy := make([]string, 0, 3)
orderBy := make([]string, 0, 2)
if _, ok := columns.Get("host"); ok {
orderBy = append(orderBy, "host")
}
orderBy = append(orderBy, quoteIdent(ch.TimestampColumn))
if _, ok := columns.Get("measurement"); ok {
orderBy = append(orderBy, "measurement")
}
createTable := fmt.Sprintf("CREATE TABLE IF NOT EXISTS %s (%s) ENGINE = MergeTree() ORDER BY (%s) PARTITION BY toYYYYMM(%s)",
quoteIdent(tablename),
@@ -304,140 +314,100 @@ func (ch *ClickHouse) generateInsert(tablename string, columns *orderedmap.Order
placeholders)
}
func (ch *ClickHouse) writeMetrics(tablename string, columns *orderedmap.OrderedMap[string, string], metrics []map[string]interface{}) error {
err := ch.ensureTable(tablename, columns)
if err != nil {
return err
}
sql := ch.generateInsert(tablename, columns, len(metrics))
tx, err := ch.db.Begin()
if err != nil {
return fmt.Errorf("begin failed: %w", err)
}
stmt, err := tx.Prepare(sql)
if err != nil {
return fmt.Errorf("prepare failed: %w", err)
}
defer stmt.Close()
values := make([][]interface{}, 0, len(metrics))
for _, metric := range metrics {
value := make([]interface{}, 0, columns.Len())
for pair := columns.Oldest(); pair != nil; pair = pair.Next() {
value = append(value, metric[pair.Key])
}
values = append(values, value)
}
for _, value := range values {
_, err = stmt.Exec(value...)
if err != nil {
return fmt.Errorf("exec failed: %w", err)
}
}
err = tx.Commit()
if err != nil {
return fmt.Errorf("commit failed: %w", err)
}
return nil
}
func (ch *ClickHouse) WriteMultiTable(metrics []telegraf.Metric) error {
func (ch *ClickHouse) writeToDB(metrics []telegraf.Metric) error {
metricsData := make(map[string][]map[string]interface{})
columns := make(map[string]*orderedmap.OrderedMap[string, string])
start := time.Now()
for _, metric := range metrics {
tablename := metric.Name()
if ch.MultiTableOptions.TablePrefix != "" {
tablename = ch.MultiTableOptions.TablePrefix + "_" + tablename
}
if _, ok := metricsData[tablename]; !ok {
metricsData[tablename] = make([]map[string]interface{}, 0, len(metrics))
}
if _, ok := columns[tablename]; !ok {
columns[tablename] = orderedmap.New[string, string](len(metrics))
}
metricEntry := make(map[string]interface{})
metricEntry[ch.TimestampColumn] = metric.Time()
columns[tablename].Set(ch.TimestampColumn, ch.toDatatype(metric.Time()))
for _, tag := range metric.TagList() {
metricEntry[tag.Key] = tag.Value
columns[tablename].Set(tag.Key, ch.toDatatype(tag.Value))
}
for _, field := range metric.FieldList() {
metricEntry[field.Key] = field.Value
columns[tablename].Set(field.Key, ch.toDatatype(field.Value))
}
metricsData[tablename] = append(metricsData[tablename], metricEntry)
}
ch.pepareMetrics(metrics, metricsData, columns)
ch.Log.Infof("Prepared %d metrics for writing in %s\n", len(metrics), time.Since(start))
start = time.Now()
for tablename, metrics := range metricsData {
err := ch.writeMetrics(tablename, columns[tablename], metrics)
tableColumns := columns[tablename]
err := ch.ensureTable(tablename, tableColumns)
if err != nil {
return err
}
sql := ch.generateInsert(tablename, tableColumns, len(metrics))
tx, err := ch.db.Begin()
if err != nil {
return fmt.Errorf("begin failed: %w", err)
}
stmt, err := tx.Prepare(sql)
if err != nil {
return fmt.Errorf("prepare failed: %w", err)
}
defer stmt.Close()
values := make([][]interface{}, 0, len(metrics))
for _, metric := range metrics {
value := make([]interface{}, 0, tableColumns.Len())
for pair := tableColumns.Oldest(); pair != nil; pair = pair.Next() {
value = append(value, metric[pair.Key])
}
values = append(values, value)
}
for _, value := range values {
_, err = stmt.Exec(value...)
if err != nil {
return fmt.Errorf("exec failed: %w", err)
}
}
err = tx.Commit()
if err != nil {
return fmt.Errorf("commit failed: %w", err)
}
}
ch.Log.Infof("Wrote %d metrics to %d tables in %s\n", len(metrics), len(metricsData), time.Since(start))
return nil
}
func (ch *ClickHouse) WriteSingleTable(metrics []telegraf.Metric) error {
tablename := ch.SingleTableOptions.TableName
metricsData := make([]map[string]interface{}, 0, len(metrics))
columns := orderedmap.New[string, string](len(metrics))
func (ch *ClickHouse) backgroundWriter(delay time.Duration) {
timer := time.NewTimer(delay)
defer timer.Stop()
start := time.Now()
for _, metric := range metrics {
metricName := metric.Name()
for {
select {
case <-timer.C:
ch.metricLock.RLock()
metrics := ch.metricQueue
ch.metricLock.RUnlock()
metricEntry := make(map[string]interface{})
metricEntry[ch.TimestampColumn] = metric.Time()
columns.Set(ch.TimestampColumn, ch.toDatatype(metric.Time()))
if len(metrics) > 0 {
ch.metricLock.Lock()
ch.metricQueue = make([]telegraf.Metric, 0, ch.QueueInitialSize)
ch.metricLock.Unlock()
metricEntry["measurement"] = metricName
columns.Set("measurement", ch.toDatatype(metricName))
err := ch.writeToDB(metrics)
if err != nil {
ch.Log.Error("Error writing to ClickHouse: ", err)
}
}
for _, tag := range metric.TagList() {
colName := fmt.Sprintf("%s_%s", metricName, tag.Key)
metricEntry[colName] = tag.Value
columns.Set(colName, ch.toDatatype(tag.Value))
timer.Reset(delay)
case <-ch.metricTrigger:
ch.metricLock.RLock()
metricsLength := len(ch.metricQueue)
ch.metricLock.RUnlock()
if metricsLength < ch.QueueFlushSize {
if !timer.Stop() {
<-timer.C
}
timer.Reset(delay)
}
}
for _, field := range metric.FieldList() {
colName := fmt.Sprintf("%s_%s", metricName, field.Key)
metricEntry[colName] = field.Value
columns.Set(colName, ch.toDatatype(field.Value))
}
metricsData = append(metricsData, metricEntry)
}
ch.Log.Infof("Prepared %d metrics for writing in %s\n", len(metrics), time.Since(start))
start = time.Now()
err := ch.writeMetrics(tablename, columns, metricsData)
if err != nil {
return err
}
ch.Log.Infof("Wrote %d metrics to %s in %s\n", len(metrics), tablename, time.Since(start))
return nil
}
func (ch *ClickHouse) Write(metrics []telegraf.Metric) error {
@@ -462,52 +432,6 @@ func (ch *ClickHouse) Write(metrics []telegraf.Metric) error {
return nil
}
func (ch *ClickHouse) backgroundWriter(delay time.Duration) {
timer := time.NewTimer(delay)
defer timer.Stop()
for {
select {
case <-timer.C:
ch.metricLock.RLock()
metrics := ch.metricQueue
ch.metricLock.RUnlock()
if len(metrics) > 0 {
ch.metricLock.Lock()
ch.metricQueue = make([]telegraf.Metric, 0, ch.QueueInitialSize)
ch.metricLock.Unlock()
if ch.TableMode == "single" {
err := ch.WriteSingleTable(metrics)
if err != nil {
ch.Log.Error("Error writing to ClickHouse: ", err)
}
} else {
err := ch.WriteMultiTable(metrics)
if err != nil {
ch.Log.Error("Error writing to ClickHouse: ", err)
}
}
}
timer.Reset(delay)
case <-ch.metricTrigger:
ch.metricLock.RLock()
metricsLength := len(ch.metricQueue)
ch.metricLock.RUnlock()
if metricsLength < ch.QueueFlushSize {
if !timer.Stop() {
<-timer.C
}
timer.Reset(delay)
}
}
}
}
func init() {
outputs.Add("clickhouse", func() telegraf.Output {
return &ClickHouse{

View File

@@ -11,20 +11,8 @@
## Default TTL for data in the table (use ClickHouse syntax)
ttl = "3 MONTH"
## Table operation mode
## Set to "single" to create a single table for all metrics.
## Set to "multi" to create a new table for each metric.
# table_mode = "multi"
## Single table configuration
# [outputs.clickhouse.single_table]
## Table name
# table_name = "telegraf"
## Multi table configuration
# [outputs.clickhouse.multi_table]
## Table name prefix
# table_prefix = "telegraf"
## Table name prefix
# table_prefix = "telegraf"
## Initial metric queue size
## Queue resizes automatically if the queue becomes too large.