mirror of
https://git.mirrors.martin98.com/https://github.com/SigNoz/signoz
synced 2025-06-04 11:25:52 +08:00

* fix: new implementation for finding missing timerange * fix: remove unwanted code * fix: update if condition * fix: update logic and the test cases * fix: correct name * fix: filter points which are not a complete agg interval * fix: fix the logic to use the points correctly * fix: fix overlapping test case * fix: add comments * Update pkg/query-service/querycache/query_range_cache.go Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> * fix: use step ms * fix: use step ms * fix: tests * fix: update logic to handle actual empty series * fix: name updated * Update pkg/query-service/app/querier/v2/helper.go Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> * fix: address comments * fix: address comments * fix: address comments * Update pkg/query-service/common/query_range.go Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> * fix: add error log * fix: handle case where end is equal to a complete window end * fix: added comments * fix: address comments * fix: move function to common query range --------- Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>
347 lines
10 KiB
Go
347 lines
10 KiB
Go
package querycache
|
|
|
|
import (
|
|
"encoding/json"
|
|
"math"
|
|
"sort"
|
|
"time"
|
|
|
|
"go.signoz.io/signoz/pkg/query-service/cache"
|
|
v3 "go.signoz.io/signoz/pkg/query-service/model/v3"
|
|
"go.signoz.io/signoz/pkg/query-service/utils/labels"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
type queryCache struct {
|
|
cache cache.Cache
|
|
fluxInterval time.Duration
|
|
}
|
|
|
|
type MissInterval struct {
|
|
Start, End int64 // in milliseconds
|
|
}
|
|
|
|
type CachedSeriesData struct {
|
|
Start int64 `json:"start"`
|
|
End int64 `json:"end"`
|
|
Data []*v3.Series `json:"data"`
|
|
}
|
|
|
|
type QueryCacheOption func(q *queryCache)
|
|
|
|
func NewQueryCache(opts ...QueryCacheOption) *queryCache {
|
|
q := &queryCache{}
|
|
for _, opt := range opts {
|
|
opt(q)
|
|
}
|
|
return q
|
|
}
|
|
|
|
func WithCache(cache cache.Cache) QueryCacheOption {
|
|
return func(q *queryCache) {
|
|
q.cache = cache
|
|
}
|
|
}
|
|
|
|
func WithFluxInterval(fluxInterval time.Duration) QueryCacheOption {
|
|
return func(q *queryCache) {
|
|
q.fluxInterval = fluxInterval
|
|
}
|
|
}
|
|
|
|
// FindMissingTimeRangesV2 is a new correct implementation of FindMissingTimeRanges
|
|
// It takes care of any timestamps that were not queried due to rounding in the first version.
|
|
func (q *queryCache) FindMissingTimeRangesV2(start, end int64, step int64, cacheKey string) []MissInterval {
|
|
if q.cache == nil || cacheKey == "" {
|
|
return []MissInterval{{Start: start, End: end}}
|
|
}
|
|
|
|
stepMs := step * 1000
|
|
|
|
// when the window is too small to be cached, we return the entire range as a miss
|
|
if (start + stepMs) > end {
|
|
return []MissInterval{{Start: start, End: end}}
|
|
}
|
|
|
|
cachedSeriesDataList := q.getCachedSeriesData(cacheKey)
|
|
|
|
// Sort the cached data by start time
|
|
sort.Slice(cachedSeriesDataList, func(i, j int) bool {
|
|
return cachedSeriesDataList[i].Start < cachedSeriesDataList[j].Start
|
|
})
|
|
|
|
zap.L().Info("Number of non-overlapping cached series data", zap.Int("count", len(cachedSeriesDataList)))
|
|
|
|
// Exclude the flux interval from the cached end time
|
|
|
|
// Why do we use `time.Now()` here?
|
|
// When querying for a range [start, now())
|
|
// we don't want to use the cached data inside the flux interval period
|
|
// because the data in the flux interval period might not be fully ingested
|
|
// and should not be used for caching.
|
|
// This is not an issue if the end time is before now() - fluxInterval
|
|
if len(cachedSeriesDataList) > 0 {
|
|
lastCachedData := cachedSeriesDataList[len(cachedSeriesDataList)-1]
|
|
lastCachedData.End = int64(
|
|
math.Min(
|
|
float64(lastCachedData.End),
|
|
float64(time.Now().UnixMilli()-q.fluxInterval.Milliseconds()),
|
|
),
|
|
)
|
|
}
|
|
|
|
var missingRanges []MissInterval
|
|
currentTime := start
|
|
|
|
// check if start is a complete aggregation window if not then add it as a miss
|
|
if start%stepMs != 0 {
|
|
nextAggStart := start - (start % stepMs) + stepMs
|
|
missingRanges = append(missingRanges, MissInterval{Start: start, End: nextAggStart})
|
|
currentTime = nextAggStart
|
|
}
|
|
|
|
for _, data := range cachedSeriesDataList {
|
|
// Ignore cached data that ends before the start time
|
|
if data.End <= start {
|
|
continue
|
|
}
|
|
// Stop processing if we've reached the end time
|
|
if data.Start >= end {
|
|
break
|
|
}
|
|
|
|
// Add missing range if there's a gap
|
|
if currentTime < data.Start {
|
|
missingRanges = append(missingRanges, MissInterval{Start: currentTime, End: min(data.Start, end)})
|
|
}
|
|
|
|
// Update currentTime, but don't go past the end time
|
|
currentTime = max(currentTime, min(data.End, end))
|
|
}
|
|
|
|
// while iterating through the cachedSeriesDataList, we might have reached the end
|
|
// but there might be a case where the last data range is not a complete aggregation window
|
|
// so we add it manually by first checking if currentTime < end which means it has not reached the end
|
|
// and then checking if end%(step*1000) != 0 which means it is not a complete aggregation window but currentTime becomes end.
|
|
// that can happen when currentTime = nextAggStart and no other range match is found in the loop.
|
|
// The test case "start lies near the start of aggregation interval and end lies near the end of another aggregation interval"
|
|
// shows this case.
|
|
if currentTime < end {
|
|
missingRanges = append(missingRanges, MissInterval{Start: currentTime, End: end})
|
|
} else if end%stepMs != 0 {
|
|
// check if end is a complete aggregation window if not then add it as a miss
|
|
prevAggEnd := end - (end % stepMs)
|
|
missingRanges = append(missingRanges, MissInterval{Start: prevAggEnd, End: end})
|
|
}
|
|
|
|
// Merge overlapping or adjacent missing ranges
|
|
if len(missingRanges) <= 1 {
|
|
return missingRanges
|
|
}
|
|
merged := []MissInterval{missingRanges[0]}
|
|
for _, curr := range missingRanges[1:] {
|
|
last := &merged[len(merged)-1]
|
|
if last.End >= curr.Start {
|
|
last.End = max(last.End, curr.End)
|
|
} else {
|
|
merged = append(merged, curr)
|
|
}
|
|
}
|
|
|
|
return merged
|
|
}
|
|
|
|
func (q *queryCache) FindMissingTimeRanges(start, end, step int64, cacheKey string) []MissInterval {
|
|
if q.cache == nil || cacheKey == "" {
|
|
return []MissInterval{{Start: start, End: end}}
|
|
}
|
|
|
|
cachedSeriesDataList := q.getCachedSeriesData(cacheKey)
|
|
|
|
// Sort the cached data by start time
|
|
sort.Slice(cachedSeriesDataList, func(i, j int) bool {
|
|
return cachedSeriesDataList[i].Start < cachedSeriesDataList[j].Start
|
|
})
|
|
|
|
zap.L().Info("Number of non-overlapping cached series data", zap.Int("count", len(cachedSeriesDataList)))
|
|
|
|
// Exclude the flux interval from the cached end time
|
|
|
|
// Why do we use `time.Now()` here?
|
|
// When querying for a range [start, now())
|
|
// we don't want to use the cached data inside the flux interval period
|
|
// because the data in the flux interval period might not be fully ingested
|
|
// and should not be used for caching.
|
|
// This is not an issue if the end time is before now() - fluxInterval
|
|
endMillis := time.Now().UnixMilli()
|
|
adjustStep := int64(math.Min(float64(step), 60))
|
|
roundedMillis := endMillis - (endMillis % (adjustStep * 1000))
|
|
|
|
if len(cachedSeriesDataList) > 0 {
|
|
lastCachedData := cachedSeriesDataList[len(cachedSeriesDataList)-1]
|
|
lastCachedData.End = int64(
|
|
math.Min(
|
|
float64(lastCachedData.End),
|
|
float64(roundedMillis-q.fluxInterval.Milliseconds()),
|
|
),
|
|
)
|
|
}
|
|
|
|
var missingRanges []MissInterval
|
|
currentTime := start
|
|
|
|
for _, data := range cachedSeriesDataList {
|
|
// Ignore cached data that ends before the start time
|
|
if data.End <= start {
|
|
continue
|
|
}
|
|
// Stop processing if we've reached the end time
|
|
if data.Start >= end {
|
|
break
|
|
}
|
|
|
|
// Add missing range if there's a gap
|
|
if currentTime < data.Start {
|
|
missingRanges = append(missingRanges, MissInterval{Start: currentTime, End: min(data.Start, end)})
|
|
}
|
|
|
|
// Update currentTime, but don't go past the end time
|
|
currentTime = max(currentTime, min(data.End, end))
|
|
}
|
|
|
|
// Add final missing range if necessary
|
|
if currentTime < end {
|
|
missingRanges = append(missingRanges, MissInterval{Start: currentTime, End: end})
|
|
}
|
|
|
|
return missingRanges
|
|
}
|
|
|
|
func (q *queryCache) getCachedSeriesData(cacheKey string) []*CachedSeriesData {
|
|
cachedData, _, _ := q.cache.Retrieve(cacheKey, true)
|
|
var cachedSeriesDataList []*CachedSeriesData
|
|
if err := json.Unmarshal(cachedData, &cachedSeriesDataList); err != nil {
|
|
return nil
|
|
}
|
|
return cachedSeriesDataList
|
|
}
|
|
|
|
func (q *queryCache) mergeSeries(cachedSeries, missedSeries []*v3.Series) []*v3.Series {
|
|
// Merge the missed series with the cached series by timestamp
|
|
mergedSeries := make([]*v3.Series, 0)
|
|
seriesesByLabels := make(map[uint64]*v3.Series)
|
|
for idx := range cachedSeries {
|
|
series := cachedSeries[idx]
|
|
seriesesByLabels[labels.FromMap(series.Labels).Hash()] = series
|
|
}
|
|
|
|
for idx := range missedSeries {
|
|
series := missedSeries[idx]
|
|
h := labels.FromMap(series.Labels).Hash()
|
|
if _, ok := seriesesByLabels[h]; !ok {
|
|
seriesesByLabels[h] = series
|
|
continue
|
|
}
|
|
seriesesByLabels[h].Points = append(seriesesByLabels[h].Points, series.Points...)
|
|
}
|
|
|
|
hashes := make([]uint64, 0, len(seriesesByLabels))
|
|
for h := range seriesesByLabels {
|
|
hashes = append(hashes, h)
|
|
}
|
|
sort.Slice(hashes, func(i, j int) bool {
|
|
return hashes[i] < hashes[j]
|
|
})
|
|
|
|
// Sort the points in each series by timestamp
|
|
for _, h := range hashes {
|
|
series := seriesesByLabels[h]
|
|
series.SortPoints()
|
|
series.RemoveDuplicatePoints()
|
|
mergedSeries = append(mergedSeries, series)
|
|
}
|
|
return mergedSeries
|
|
}
|
|
|
|
func (q *queryCache) storeMergedData(cacheKey string, mergedData []CachedSeriesData) {
|
|
if q.cache == nil {
|
|
return
|
|
}
|
|
mergedDataJSON, err := json.Marshal(mergedData)
|
|
if err != nil {
|
|
zap.L().Error("error marshalling merged data", zap.Error(err))
|
|
return
|
|
}
|
|
err = q.cache.Store(cacheKey, mergedDataJSON, 0)
|
|
if err != nil {
|
|
zap.L().Error("error storing merged data", zap.Error(err))
|
|
}
|
|
}
|
|
|
|
func (q *queryCache) MergeWithCachedSeriesDataV2(cacheKey string, newData []CachedSeriesData) []CachedSeriesData {
|
|
if q.cache == nil {
|
|
return newData
|
|
}
|
|
|
|
cachedData, _, _ := q.cache.Retrieve(cacheKey, true)
|
|
var existingData []CachedSeriesData
|
|
if err := json.Unmarshal(cachedData, &existingData); err != nil {
|
|
zap.L().Error("error unmarshalling existing data", zap.Error(err))
|
|
return newData
|
|
}
|
|
|
|
allData := append(existingData, newData...)
|
|
|
|
sort.Slice(allData, func(i, j int) bool {
|
|
return allData[i].Start < allData[j].Start
|
|
})
|
|
|
|
var mergedData []CachedSeriesData
|
|
var current *CachedSeriesData
|
|
|
|
for _, data := range allData {
|
|
if current == nil {
|
|
current = &CachedSeriesData{
|
|
Start: data.Start,
|
|
End: data.End,
|
|
Data: data.Data,
|
|
}
|
|
continue
|
|
}
|
|
if data.Start <= current.End {
|
|
// Overlapping intervals, merge them
|
|
current.End = max(current.End, data.End)
|
|
current.Start = min(current.Start, data.Start)
|
|
// Merge the Data fields
|
|
current.Data = q.mergeSeries(current.Data, data.Data)
|
|
} else {
|
|
// No overlap, add current to mergedData
|
|
mergedData = append(mergedData, *current)
|
|
// Start new current
|
|
current = &CachedSeriesData{
|
|
Start: data.Start,
|
|
End: data.End,
|
|
Data: data.Data,
|
|
}
|
|
}
|
|
}
|
|
|
|
// After the loop, add the last current
|
|
if current != nil {
|
|
mergedData = append(mergedData, *current)
|
|
}
|
|
|
|
return mergedData
|
|
}
|
|
|
|
func (q *queryCache) MergeWithCachedSeriesData(cacheKey string, newData []CachedSeriesData) []CachedSeriesData {
|
|
|
|
mergedData := q.MergeWithCachedSeriesDataV2(cacheKey, newData)
|
|
q.storeMergedData(cacheKey, mergedData)
|
|
return mergedData
|
|
}
|
|
|
|
func (q *queryCache) StoreSeriesInCache(cacheKey string, series []CachedSeriesData) {
|
|
q.storeMergedData(cacheKey, series)
|
|
}
|