chore: logs parsing pipeline support in opamp

This commit is contained in:
nityanandagohain 2023-03-15 17:42:24 +05:30
parent 210c5fd7f2
commit 500ab02c47
6 changed files with 457 additions and 95 deletions

View File

@ -210,3 +210,21 @@ func UpsertSamplingProcessor(ctx context.Context, version int, config *tsp.Confi
m.updateDeployStatus(ctx, ElementTypeSamplingRules, version, string(DeployInitiated), "Deployment started", configHash, string(processorConfYaml)) m.updateDeployStatus(ctx, ElementTypeSamplingRules, version, string(DeployInitiated), "Deployment started", configHash, string(processorConfYaml))
return nil return nil
} }
// UpsertLogParsingProcessors updates the agent with log parsing processors
func UpsertLogParsingProcessor(ctx context.Context, version int, rawPipelineData []byte, config map[string]interface{}, names []interface{}) error {
if !atomic.CompareAndSwapUint32(&m.lock, 0, 1) {
return fmt.Errorf("agent updater is busy")
}
defer atomic.StoreUint32(&m.lock, 0)
// send the changes to opamp.
configHash, err := opamp.UpsertLogsParsingProcessor(context.Background(), config, names, m.OnConfigUpdate)
if err != nil {
zap.S().Errorf("failed to call agent config update for log parsing processor:", err)
return err
}
m.updateDeployStatus(ctx, ElementTypeLogPipelines, version, string(DeployInitiated), "Deployment started", configHash, string(rawPipelineData))
return nil
}

View File

@ -0,0 +1,180 @@
package opamp
import (
"context"
"crypto/sha256"
"fmt"
"strings"
"sync"
"github.com/knadh/koanf/parsers/yaml"
"github.com/open-telemetry/opamp-go/protobufs"
model "go.signoz.io/signoz/pkg/query-service/app/opamp/model"
"go.signoz.io/signoz/pkg/query-service/constants"
"go.uber.org/zap"
)
var lockLogsPipelineSpec sync.RWMutex
func UpsertLogsParsingProcessor(ctx context.Context, parsingProcessors map[string]interface{}, parsingProcessorsNames []interface{}, callback func(string, string, error)) (string, error) {
confHash := ""
if opAmpServer == nil {
return confHash, fmt.Errorf("opamp server is down, unable to push config to agent at this moment")
}
agents := opAmpServer.agents.GetAllAgents()
if len(agents) == 0 {
return confHash, fmt.Errorf("no agents available at the moment")
}
for _, agent := range agents {
config := agent.EffectiveConfig
c, err := yaml.Parser().Unmarshal([]byte(config))
if err != nil {
return confHash, err
}
BuildLogParsingProcessors(c, parsingProcessors)
// get the processor list
logs := c["service"].(map[string]interface{})["pipelines"].(map[string]interface{})["logs"]
processors := logs.(map[string]interface{})["processors"].([]interface{})
// build the new processor list
updatedProcessorList, _ := buildLogsProcessors(processors, parsingProcessorsNames)
// add the new processor to the data
c["service"].(map[string]interface{})["pipelines"].(map[string]interface{})["logs"].(map[string]interface{})["processors"] = updatedProcessorList
updatedConf, err := yaml.Parser().Marshal(c)
if err != nil {
return confHash, err
}
// zap.S().Infof("sending new config", string(updatedConf))
hash := sha256.New()
_, err = hash.Write(updatedConf)
if err != nil {
return confHash, err
}
agent.EffectiveConfig = string(updatedConf)
err = agent.Upsert()
if err != nil {
return confHash, err
}
agent.SendToAgent(&protobufs.ServerToAgent{
RemoteConfig: &protobufs.AgentRemoteConfig{
Config: &protobufs.AgentConfigMap{
ConfigMap: map[string]*protobufs.AgentConfigFile{
"collector.yaml": {
Body: updatedConf,
ContentType: "application/x-yaml",
},
},
},
ConfigHash: hash.Sum(nil),
},
})
if confHash == "" {
confHash = string(hash.Sum(nil))
model.ListenToConfigUpdate(agent.ID, confHash, callback)
}
}
return confHash, nil
}
// check if the processors already exist
// if yes then update the processor.
// if something doesn't exists then remove it.
func BuildLogParsingProcessors(agentConf, parsingProcessors map[string]interface{}) error {
agentProcessors := agentConf["processors"].(map[string]interface{})
exists := map[string]struct{}{}
for key, params := range parsingProcessors {
agentProcessors[key] = params
exists[key] = struct{}{}
}
// remove the old unwanted processors
for k := range agentProcessors {
if _, ok := exists[k]; !ok && strings.HasPrefix(k, constants.LogsPPLPfx) {
delete(agentProcessors, k)
}
}
agentConf["processors"] = agentProcessors
return nil
}
func buildLogsProcessors(current []interface{}, logsParserPipeline []interface{}) ([]interface{}, error) {
lockLogsPipelineSpec.Lock()
defer lockLogsPipelineSpec.Unlock()
exists := map[string]struct{}{}
for _, v := range logsParserPipeline {
exists[v.(string)] = struct{}{}
}
// removed the old processors which are not used
var pipeline []interface{}
for _, v := range current {
k := v.(string)
if _, ok := exists[k]; ok || !strings.HasPrefix(k, constants.LogsPPLPfx) {
pipeline = append(pipeline, v)
}
}
// create a reverse map of existing config processors and their position
existing := map[string]int{}
for i, p := range current {
name := p.(string)
existing[name] = i
}
// create mapping from our tracesPipelinePlan (processors managed by us) to position in existing processors (from current config)
// this means, if "batch" holds position 3 in the current effective config, and 2 in our config, the map will be [2]: 3
specVsExistingMap := map[int]int{}
// go through plan and map its elements to current positions in effective config
for i, m := range logsParserPipeline {
if loc, ok := existing[m.(string)]; ok {
specVsExistingMap[i] = loc
}
}
lastMatched := 0
// go through plan again in the increasing order
for i := 0; i < len(logsParserPipeline); i++ {
m := logsParserPipeline[i]
if loc, ok := specVsExistingMap[i]; ok {
lastMatched = loc + 1
} else {
if lastMatched <= 0 {
zap.S().Debugf("build_pipeline: found a new item to be inserted, inserting at position 0:", m)
pipeline = append([]interface{}{m}, pipeline[lastMatched:]...)
lastMatched++
} else {
zap.S().Debugf("build_pipeline: found a new item to be inserted, inserting at position :", lastMatched, " ", m)
prior := make([]interface{}, len(pipeline[:lastMatched]))
next := make([]interface{}, len(pipeline[lastMatched:]))
copy(prior, pipeline[:lastMatched])
copy(next, pipeline[lastMatched:])
pipeline = append(prior, m)
pipeline = append(pipeline, next...)
}
}
}
if checkDuplicates(pipeline) {
// duplicates are most likely because the processor sequence in effective config conflicts
// with the planned sequence as per planned pipeline
return pipeline, fmt.Errorf("the effective config has an unexpected processor sequence: %v", pipeline)
}
return pipeline, nil
}

View File

@ -0,0 +1,161 @@
package opamp
import (
"fmt"
"testing"
. "github.com/smartystreets/goconvey/convey"
"go.signoz.io/signoz/pkg/query-service/constants"
)
var BuildProcessorTestData = []struct {
Name string
agentConf map[string]interface{}
pipelineProcessor map[string]interface{}
outputConf map[string]interface{}
}{
{
Name: "Add",
agentConf: map[string]interface{}{
"processors": map[string]interface{}{
"batch": struct{}{},
},
},
pipelineProcessor: map[string]interface{}{
constants.LogsPPLPfx + "_b": struct{}{},
},
outputConf: map[string]interface{}{
"processors": map[string]interface{}{
constants.LogsPPLPfx + "_b": struct{}{},
"batch": struct{}{},
},
},
},
{
Name: "Remove",
agentConf: map[string]interface{}{
"processors": map[string]interface{}{
constants.LogsPPLPfx + "_b": struct{}{},
"batch": struct{}{},
},
},
pipelineProcessor: map[string]interface{}{},
outputConf: map[string]interface{}{
"processors": map[string]interface{}{
"batch": struct{}{},
},
},
},
{
Name: "remove and upsert 1",
agentConf: map[string]interface{}{
"processors": map[string]interface{}{
constants.LogsPPLPfx + "_a": struct{}{},
constants.LogsPPLPfx + "_b": struct{}{},
"batch": struct{}{},
},
},
pipelineProcessor: map[string]interface{}{
constants.LogsPPLPfx + "_b": struct{}{},
},
outputConf: map[string]interface{}{
"processors": map[string]interface{}{
constants.LogsPPLPfx + "_b": struct{}{},
"batch": struct{}{},
},
},
},
{
Name: "remove and upsert 2",
agentConf: map[string]interface{}{
"processors": map[string]interface{}{
"memory_limiter": struct{}{},
constants.LogsPPLPfx + "_a": struct{}{},
constants.LogsPPLPfx + "_b": struct{}{},
"batch": struct{}{},
},
},
pipelineProcessor: map[string]interface{}{
constants.LogsPPLPfx + "_b": struct{}{},
},
outputConf: map[string]interface{}{
"processors": map[string]interface{}{
"memory_limiter": struct{}{},
constants.LogsPPLPfx + "_b": struct{}{},
"batch": struct{}{},
},
},
},
}
func TestBuildLogParsingProcessors(t *testing.T) {
for _, test := range BuildProcessorTestData {
Convey(test.Name, t, func() {
err := BuildLogParsingProcessors(test.agentConf, test.pipelineProcessor)
So(err, ShouldBeNil)
So(test.agentConf, ShouldResemble, test.outputConf)
})
}
}
var BuildLogsPipelineTestData = []struct {
Name string
currentPipeline []interface{}
logsPipeline []interface{}
expectedPipeline []interface{}
}{
{
Name: "Add new pipelines",
currentPipeline: []interface{}{"processor1", "processor2"},
logsPipeline: []interface{}{constants.LogsPPLPfx + "_a", constants.LogsPPLPfx + "_b"},
expectedPipeline: []interface{}{constants.LogsPPLPfx + "_a", constants.LogsPPLPfx + "_b", "processor1", "processor2"},
},
{
Name: "Add new pipeline and respect custom processors",
currentPipeline: []interface{}{constants.LogsPPLPfx + "_a", "processor1", constants.LogsPPLPfx + "_b", "processor2"},
logsPipeline: []interface{}{constants.LogsPPLPfx + "_a", constants.LogsPPLPfx + "_b", constants.LogsPPLPfx + "_c"},
expectedPipeline: []interface{}{constants.LogsPPLPfx + "_a", "processor1", constants.LogsPPLPfx + "_b", constants.LogsPPLPfx + "_c", "processor2"},
},
{
Name: "Add new pipeline and respect custom processors in the beginning and middle",
currentPipeline: []interface{}{"processor1", constants.LogsPPLPfx + "_a", "processor2", constants.LogsPPLPfx + "_b", "batch"},
logsPipeline: []interface{}{constants.LogsPPLPfx + "_a", constants.LogsPPLPfx + "_b", constants.LogsPPLPfx + "_c"},
expectedPipeline: []interface{}{"processor1", constants.LogsPPLPfx + "_a", "processor2", constants.LogsPPLPfx + "_b", constants.LogsPPLPfx + "_c", "batch"},
},
{
Name: "Remove old pipeline add add new",
currentPipeline: []interface{}{constants.LogsPPLPfx + "_a", constants.LogsPPLPfx + "_b", "processor1", "processor2"},
logsPipeline: []interface{}{constants.LogsPPLPfx + "_a"},
expectedPipeline: []interface{}{constants.LogsPPLPfx + "_a", "processor1", "processor2"},
},
{
Name: "Remove old pipeline from middle",
currentPipeline: []interface{}{"processor1", "processor2", constants.LogsPPLPfx + "_a", "processor3", constants.LogsPPLPfx + "_b", "batch"},
logsPipeline: []interface{}{constants.LogsPPLPfx + "_a"},
expectedPipeline: []interface{}{"processor1", "processor2", constants.LogsPPLPfx + "_a", "processor3", "batch"},
},
{
Name: "Remove old pipeline from middle and add new pipeline",
currentPipeline: []interface{}{"processor1", "processor2", constants.LogsPPLPfx + "_a", "processor3", constants.LogsPPLPfx + "_b", "batch"},
logsPipeline: []interface{}{constants.LogsPPLPfx + "_a", constants.LogsPPLPfx + "_c"},
expectedPipeline: []interface{}{"processor1", "processor2", constants.LogsPPLPfx + "_a", constants.LogsPPLPfx + "_c", "processor3", "batch"},
},
{
Name: "Remove multiple old pipelines from middle and add multiple new ones",
currentPipeline: []interface{}{"processor1", constants.LogsPPLPfx + "_a", "processor2", constants.LogsPPLPfx + "_b", "processor3", constants.LogsPPLPfx + "_c", "processor4", constants.LogsPPLPfx + "_d", "processor5", "batch"},
logsPipeline: []interface{}{constants.LogsPPLPfx + "_a", constants.LogsPPLPfx + "_a1", constants.LogsPPLPfx + "_c", constants.LogsPPLPfx + "_c1"},
expectedPipeline: []interface{}{"processor1", constants.LogsPPLPfx + "_a", constants.LogsPPLPfx + "_a1", "processor2", "processor3", constants.LogsPPLPfx + "_c", constants.LogsPPLPfx + "_c1", "processor4", "processor5", "batch"},
},
}
func TestBuildLogsPipeline(t *testing.T) {
for _, test := range BuildLogsPipelineTestData {
Convey(test.Name, t, func() {
v, err := buildLogsProcessors(test.currentPipeline, test.logsPipeline)
So(err, ShouldBeNil)
fmt.Println(test.Name, "\n", test.currentPipeline, "\n", v, "\n", test.expectedPipeline)
So(v, ShouldResemble, test.expectedPipeline)
})
}
}

View File

@ -2,15 +2,10 @@ package opamp
import ( import (
"context" "context"
"crypto/sha256"
"strings"
"github.com/knadh/koanf/parsers/yaml"
"github.com/open-telemetry/opamp-go/protobufs" "github.com/open-telemetry/opamp-go/protobufs"
"github.com/open-telemetry/opamp-go/server" "github.com/open-telemetry/opamp-go/server"
"github.com/open-telemetry/opamp-go/server/types" "github.com/open-telemetry/opamp-go/server/types"
"go.opentelemetry.io/collector/confmap"
model "go.signoz.io/signoz/pkg/query-service/app/opamp/model" model "go.signoz.io/signoz/pkg/query-service/app/opamp/model"
"go.uber.org/zap" "go.uber.org/zap"
@ -112,93 +107,3 @@ func Ready() bool {
func Subscribe(agentId string, hash string, f model.OnChangeCallback) { func Subscribe(agentId string, hash string, f model.OnChangeCallback) {
model.ListenToConfigUpdate(agentId, hash, f) model.ListenToConfigUpdate(agentId, hash, f)
} }
func UpsertProcessor(ctx context.Context, processors map[string]interface{}, names []interface{}) error {
x := map[string]interface{}{
"processors": processors,
}
newConf := confmap.NewFromStringMap(x)
agents := opAmpServer.agents.GetAllAgents()
for _, agent := range agents {
config := agent.EffectiveConfig
c, err := yaml.Parser().Unmarshal([]byte(config))
if err != nil {
return err
}
agentConf := confmap.NewFromStringMap(c)
err = agentConf.Merge(newConf)
if err != nil {
return err
}
service := agentConf.Get("service")
logs := service.(map[string]interface{})["pipelines"].(map[string]interface{})["logs"]
processors := logs.(map[string]interface{})["processors"].([]interface{})
userProcessors := []interface{}{}
// remove old ones
for _, v := range processors {
if !strings.HasPrefix(v.(string), "logstransform/pipeline_") {
userProcessors = append(userProcessors, v)
}
}
// all user processors are pushed after pipelines
processors = append(names, userProcessors...)
service.(map[string]interface{})["pipelines"].(map[string]interface{})["logs"].(map[string]interface{})["processors"] = processors
s := map[string]interface{}{
"service": map[string]interface{}{
"pipelines": map[string]interface{}{
"logs": map[string]interface{}{
"processors": processors,
},
},
},
}
serviceC := confmap.NewFromStringMap(s)
err = agentConf.Merge(serviceC)
if err != nil {
return err
}
// ------ complete adding processor
configR, err := yaml.Parser().Marshal(agentConf.ToStringMap())
if err != nil {
return err
}
zap.S().Infof("sending new config", string(configR))
hash := sha256.New()
_, err = hash.Write(configR)
if err != nil {
return err
}
agent.EffectiveConfig = string(configR)
err = agent.Upsert()
if err != nil {
return err
}
agent.SendToAgent(&protobufs.ServerToAgent{
RemoteConfig: &protobufs.AgentRemoteConfig{
Config: &protobufs.AgentConfigMap{
ConfigMap: map[string]*protobufs.AgentConfigFile{
"collector.yaml": {
Body: configR,
ContentType: "application/x-yaml",
},
},
},
ConfigHash: hash.Sum(nil),
},
})
}
return nil
}

View File

@ -220,3 +220,6 @@ const (
NumberTagMapCol = "numberTagMap" NumberTagMapCol = "numberTagMap"
BoolTagMapCol = "boolTagMap" BoolTagMapCol = "boolTagMap"
) )
// logsPPLPfx is a short constant for logsPipelinePrefix
const LogsPPLPfx = "logstransform/pipeline"

View File

@ -0,0 +1,95 @@
package model
import (
"encoding/json"
"time"
"github.com/pkg/errors"
)
// Pipeline is stored and also deployed finally to collector config
type Pipeline struct {
Id string `json:"id,omitempty" db:"id"`
OrderId int `json:"orderId" db:"order_id"`
Name string `json:"name,omitempty" db:"name"`
Alias string `json:"alias" db:"alias"`
Description *string `json:"description" db:"description"`
Enabled bool `json:"enabled" db:"enabled"`
Filter string `json:"filter" db:"filter"`
// configuration for pipeline
RawConfig string `db:"config_json" json:"-"`
Config []PipelineOperator `json:"config"`
// Updater not required as any change will result in new version
Creator
}
type Creator struct {
CreatedBy string `json:"createdBy" db:"created_by"`
CreatedAt time.Time `json:"createdAt" db:"created_at"`
}
type Processor struct {
Operators []PipelineOperator `json:"operators" yaml:"operators"`
}
type PipelineOperator struct {
Type string `json:"type" yaml:"type"`
ID string `json:"id,omitempty" yaml:"id,omitempty"`
Output string `json:"output,omitempty" yaml:"output,omitempty"`
OnError string `json:"on_error,omitempty" yaml:"on_error,omitempty"`
// don't need the following in the final config
OrderId int `json:"orderId" yaml:"-"`
Enabled bool `json:"enabled" yaml:"-"`
Name string `json:"name,omitempty" yaml:"-"`
// optional keys depending on the type
ParseTo string `json:"parse_to,omitempty" yaml:"parse_to,omitempty"`
Pattern string `json:"pattern,omitempty" yaml:"pattern,omitempty"`
Regex string `json:"regex,omitempty" yaml:"regex,omitempty"`
ParseFrom string `json:"parse_from,omitempty" yaml:"parse_from,omitempty"`
Timestamp *TimestampParser `json:"timestamp,omitempty" yaml:"timestamp,omitempty"`
TraceParser *TraceParser `json:"trace_parser,omitempty" yaml:"trace_parser,omitempty"`
Field string `json:"field,omitempty" yaml:"field,omitempty"`
Value string `json:"value,omitempty" yaml:"value,omitempty"`
From string `json:"from,omitempty" yaml:"from,omitempty"`
To string `json:"to,omitempty" yaml:"to,omitempty"`
Expr string `json:"expr,omitempty" yaml:"expr,omitempty"`
Routes *[]Route `json:"routes,omitempty" yaml:"routes,omitempty"`
Fields []string `json:"fields,omitempty" yaml:"fields,omitempty"`
Default string `json:"default,omitempty" yaml:"default,omitempty"`
}
type TimestampParser struct {
Layout string `json:"layout" yaml:"layout"`
LayoutType string `json:"layout_type" yaml:"layout_type"`
ParseFrom string `json:"parse_from" yaml:"parse_from"`
}
type TraceParser struct {
TraceId *ParseFrom `json:"trace_id,omitempty" yaml:"trace_id,omitempty"`
SpanId *ParseFrom `json:"span_id,omitempty" yaml:"span_id,omitempty"`
TraceFlags *ParseFrom `json:"trace_flags,omitempty" yaml:"trace_flags,omitempty"`
}
type ParseFrom struct {
ParseFrom string `json:"parse_from" yaml:"parse_from"`
}
type Route struct {
Output string `json:"output" yaml:"output"`
Expr string `json:"expr" yaml:"expr"`
}
func (i *Pipeline) ParseRawConfig() error {
c := []PipelineOperator{}
err := json.Unmarshal([]byte(i.RawConfig), &c)
if err != nil {
return errors.Wrap(err, "failed to parse ingestion rule config")
}
i.Config = c
return nil
}