Niels de Vos bec6090996 build: move e2e dependencies into e2e/go.mod
Several packages are only used while running the e2e suite. These
packages are less important to update, as the they can not influence the
final executable that is part of the Ceph-CSI container-image.

By moving these dependencies out of the main Ceph-CSI go.mod, it is
easier to identify if a reported CVE affects Ceph-CSI, or only the
testing (like most of the Kubernetes CVEs).

Signed-off-by: Niels de Vos <ndevos@ibm.com>
2025-03-07 16:05:04 +00:00

1229 lines
33 KiB
Go

// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This file implements parsers to convert legacy profiles into the
// profile.proto format.
package profile
import (
"bufio"
"bytes"
"fmt"
"io"
"math"
"regexp"
"strconv"
"strings"
)
var (
countStartRE = regexp.MustCompile(`\A(\S+) profile: total \d+\z`)
countRE = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\z`)
heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`)
heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`)
contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`)
hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`)
growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz?`)
fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz?`)
threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`)
threadStartRE = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`)
// Regular expressions to parse process mappings. Support the format used by Linux /proc/.../maps and other tools.
// Recommended format:
// Start End object file name offset(optional) linker build id
// 0x40000-0x80000 /path/to/binary (@FF00) abc123456
spaceDigits = `\s+[[:digit:]]+`
hexPair = `\s+[[:xdigit:]]+:[[:xdigit:]]+`
oSpace = `\s*`
// Capturing expressions.
cHex = `(?:0x)?([[:xdigit:]]+)`
cHexRange = `\s*` + cHex + `[\s-]?` + oSpace + cHex + `:?`
cSpaceString = `(?:\s+(\S+))?`
cSpaceHex = `(?:\s+([[:xdigit:]]+))?`
cSpaceAtOffset = `(?:\s+\(@([[:xdigit:]]+)\))?`
cPerm = `(?:\s+([-rwxp]+))?`
procMapsRE = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceHex + hexPair + spaceDigits + cSpaceString)
briefMapsRE = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceString + cSpaceAtOffset + cSpaceHex)
// Regular expression to parse log data, of the form:
// ... file:line] msg...
logInfoRE = regexp.MustCompile(`^[^\[\]]+:[0-9]+]\s`)
)
func isSpaceOrComment(line string) bool {
trimmed := strings.TrimSpace(line)
return len(trimmed) == 0 || trimmed[0] == '#'
}
// parseGoCount parses a Go count profile (e.g., threadcreate or
// goroutine) and returns a new Profile.
func parseGoCount(b []byte) (*Profile, error) {
s := bufio.NewScanner(bytes.NewBuffer(b))
// Skip comments at the beginning of the file.
for s.Scan() && isSpaceOrComment(s.Text()) {
}
if err := s.Err(); err != nil {
return nil, err
}
m := countStartRE.FindStringSubmatch(s.Text())
if m == nil {
return nil, errUnrecognized
}
profileType := m[1]
p := &Profile{
PeriodType: &ValueType{Type: profileType, Unit: "count"},
Period: 1,
SampleType: []*ValueType{{Type: profileType, Unit: "count"}},
}
locations := make(map[uint64]*Location)
for s.Scan() {
line := s.Text()
if isSpaceOrComment(line) {
continue
}
if strings.HasPrefix(line, "---") {
break
}
m := countRE.FindStringSubmatch(line)
if m == nil {
return nil, errMalformed
}
n, err := strconv.ParseInt(m[1], 0, 64)
if err != nil {
return nil, errMalformed
}
fields := strings.Fields(m[2])
locs := make([]*Location, 0, len(fields))
for _, stk := range fields {
addr, err := strconv.ParseUint(stk, 0, 64)
if err != nil {
return nil, errMalformed
}
// Adjust all frames by -1 to land on top of the call instruction.
addr--
loc := locations[addr]
if loc == nil {
loc = &Location{
Address: addr,
}
locations[addr] = loc
p.Location = append(p.Location, loc)
}
locs = append(locs, loc)
}
p.Sample = append(p.Sample, &Sample{
Location: locs,
Value: []int64{n},
})
}
if err := s.Err(); err != nil {
return nil, err
}
if err := parseAdditionalSections(s, p); err != nil {
return nil, err
}
return p, nil
}
// remapLocationIDs ensures there is a location for each address
// referenced by a sample, and remaps the samples to point to the new
// location ids.
func (p *Profile) remapLocationIDs() {
seen := make(map[*Location]bool, len(p.Location))
var locs []*Location
for _, s := range p.Sample {
for _, l := range s.Location {
if seen[l] {
continue
}
l.ID = uint64(len(locs) + 1)
locs = append(locs, l)
seen[l] = true
}
}
p.Location = locs
}
func (p *Profile) remapFunctionIDs() {
seen := make(map[*Function]bool, len(p.Function))
var fns []*Function
for _, l := range p.Location {
for _, ln := range l.Line {
fn := ln.Function
if fn == nil || seen[fn] {
continue
}
fn.ID = uint64(len(fns) + 1)
fns = append(fns, fn)
seen[fn] = true
}
}
p.Function = fns
}
// remapMappingIDs matches location addresses with existing mappings
// and updates them appropriately. This is O(N*M), if this ever shows
// up as a bottleneck, evaluate sorting the mappings and doing a
// binary search, which would make it O(N*log(M)).
func (p *Profile) remapMappingIDs() {
// Some profile handlers will incorrectly set regions for the main
// executable if its section is remapped. Fix them through heuristics.
if len(p.Mapping) > 0 {
// Remove the initial mapping if named '/anon_hugepage' and has a
// consecutive adjacent mapping.
if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") {
if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start {
p.Mapping = p.Mapping[1:]
}
}
}
// Subtract the offset from the start of the main mapping if it
// ends up at a recognizable start address.
if len(p.Mapping) > 0 {
const expectedStart = 0x400000
if m := p.Mapping[0]; m.Start-m.Offset == expectedStart {
m.Start = expectedStart
m.Offset = 0
}
}
// Associate each location with an address to the corresponding
// mapping. Create fake mapping if a suitable one isn't found.
var fake *Mapping
nextLocation:
for _, l := range p.Location {
a := l.Address
if l.Mapping != nil || a == 0 {
continue
}
for _, m := range p.Mapping {
if m.Start <= a && a < m.Limit {
l.Mapping = m
continue nextLocation
}
}
// Work around legacy handlers failing to encode the first
// part of mappings split into adjacent ranges.
for _, m := range p.Mapping {
if m.Offset != 0 && m.Start-m.Offset <= a && a < m.Start {
m.Start -= m.Offset
m.Offset = 0
l.Mapping = m
continue nextLocation
}
}
// If there is still no mapping, create a fake one.
// This is important for the Go legacy handler, which produced
// no mappings.
if fake == nil {
fake = &Mapping{
ID: 1,
Limit: ^uint64(0),
}
p.Mapping = append(p.Mapping, fake)
}
l.Mapping = fake
}
// Reset all mapping IDs.
for i, m := range p.Mapping {
m.ID = uint64(i + 1)
}
}
var cpuInts = []func([]byte) (uint64, []byte){
get32l,
get32b,
get64l,
get64b,
}
func get32l(b []byte) (uint64, []byte) {
if len(b) < 4 {
return 0, nil
}
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:]
}
func get32b(b []byte) (uint64, []byte) {
if len(b) < 4 {
return 0, nil
}
return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:]
}
func get64l(b []byte) (uint64, []byte) {
if len(b) < 8 {
return 0, nil
}
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:]
}
func get64b(b []byte) (uint64, []byte) {
if len(b) < 8 {
return 0, nil
}
return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:]
}
// parseCPU parses a profilez legacy profile and returns a newly
// populated Profile.
//
// The general format for profilez samples is a sequence of words in
// binary format. The first words are a header with the following data:
//
// 1st word -- 0
// 2nd word -- 3
// 3rd word -- 0 if a c++ application, 1 if a java application.
// 4th word -- Sampling period (in microseconds).
// 5th word -- Padding.
func parseCPU(b []byte) (*Profile, error) {
var parse func([]byte) (uint64, []byte)
var n1, n2, n3, n4, n5 uint64
for _, parse = range cpuInts {
var tmp []byte
n1, tmp = parse(b)
n2, tmp = parse(tmp)
n3, tmp = parse(tmp)
n4, tmp = parse(tmp)
n5, tmp = parse(tmp)
if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 {
b = tmp
return cpuProfile(b, int64(n4), parse)
}
if tmp != nil && n1 == 0 && n2 == 3 && n3 == 1 && n4 > 0 && n5 == 0 {
b = tmp
return javaCPUProfile(b, int64(n4), parse)
}
}
return nil, errUnrecognized
}
// cpuProfile returns a new Profile from C++ profilez data.
// b is the profile bytes after the header, period is the profiling
// period, and parse is a function to parse 8-byte chunks from the
// profile in its native endianness.
func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
p := &Profile{
Period: period * 1000,
PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
SampleType: []*ValueType{
{Type: "samples", Unit: "count"},
{Type: "cpu", Unit: "nanoseconds"},
},
}
var err error
if b, _, err = parseCPUSamples(b, parse, true, p); err != nil {
return nil, err
}
// If *most* samples have the same second-to-the-bottom frame, it
// strongly suggests that it is an uninteresting artifact of
// measurement -- a stack frame pushed by the signal handler. The
// bottom frame is always correct as it is picked up from the signal
// structure, not the stack. Check if this is the case and if so,
// remove.
// Remove up to two frames.
maxiter := 2
// Allow one different sample for this many samples with the same
// second-to-last frame.
similarSamples := 32
margin := len(p.Sample) / similarSamples
for iter := 0; iter < maxiter; iter++ {
addr1 := make(map[uint64]int)
for _, s := range p.Sample {
if len(s.Location) > 1 {
a := s.Location[1].Address
addr1[a] = addr1[a] + 1
}
}
for id1, count := range addr1 {
if count >= len(p.Sample)-margin {
// Found uninteresting frame, strip it out from all samples
for _, s := range p.Sample {
if len(s.Location) > 1 && s.Location[1].Address == id1 {
s.Location = append(s.Location[:1], s.Location[2:]...)
}
}
break
}
}
}
if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil {
return nil, err
}
cleanupDuplicateLocations(p)
return p, nil
}
func cleanupDuplicateLocations(p *Profile) {
// The profile handler may duplicate the leaf frame, because it gets
// its address both from stack unwinding and from the signal
// context. Detect this and delete the duplicate, which has been
// adjusted by -1. The leaf address should not be adjusted as it is
// not a call.
for _, s := range p.Sample {
if len(s.Location) > 1 && s.Location[0].Address == s.Location[1].Address+1 {
s.Location = append(s.Location[:1], s.Location[2:]...)
}
}
}
// parseCPUSamples parses a collection of profilez samples from a
// profile.
//
// profilez samples are a repeated sequence of stack frames of the
// form:
//
// 1st word -- The number of times this stack was encountered.
// 2nd word -- The size of the stack (StackSize).
// 3rd word -- The first address on the stack.
// ...
// StackSize + 2 -- The last address on the stack
//
// The last stack trace is of the form:
//
// 1st word -- 0
// 2nd word -- 1
// 3rd word -- 0
//
// Addresses from stack traces may point to the next instruction after
// each call. Optionally adjust by -1 to land somewhere on the actual
// call (except for the leaf, which is not a call).
func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) {
locs := make(map[uint64]*Location)
for len(b) > 0 {
var count, nstk uint64
count, b = parse(b)
nstk, b = parse(b)
if b == nil || nstk > uint64(len(b)/4) {
return nil, nil, errUnrecognized
}
var sloc []*Location
addrs := make([]uint64, nstk)
for i := 0; i < int(nstk); i++ {
addrs[i], b = parse(b)
}
if count == 0 && nstk == 1 && addrs[0] == 0 {
// End of data marker
break
}
for i, addr := range addrs {
if adjust && i > 0 {
addr--
}
loc := locs[addr]
if loc == nil {
loc = &Location{
Address: addr,
}
locs[addr] = loc
p.Location = append(p.Location, loc)
}
sloc = append(sloc, loc)
}
p.Sample = append(p.Sample,
&Sample{
Value: []int64{int64(count), int64(count) * p.Period},
Location: sloc,
})
}
// Reached the end without finding the EOD marker.
return b, locs, nil
}
// parseHeap parses a heapz legacy or a growthz profile and
// returns a newly populated Profile.
func parseHeap(b []byte) (p *Profile, err error) {
s := bufio.NewScanner(bytes.NewBuffer(b))
if !s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
return nil, errUnrecognized
}
p = &Profile{}
sampling := ""
hasAlloc := false
line := s.Text()
p.PeriodType = &ValueType{Type: "space", Unit: "bytes"}
if header := heapHeaderRE.FindStringSubmatch(line); header != nil {
sampling, p.Period, hasAlloc, err = parseHeapHeader(line)
if err != nil {
return nil, err
}
} else if header = growthHeaderRE.FindStringSubmatch(line); header != nil {
p.Period = 1
} else if header = fragmentationHeaderRE.FindStringSubmatch(line); header != nil {
p.Period = 1
} else {
return nil, errUnrecognized
}
if hasAlloc {
// Put alloc before inuse so that default pprof selection
// will prefer inuse_space.
p.SampleType = []*ValueType{
{Type: "alloc_objects", Unit: "count"},
{Type: "alloc_space", Unit: "bytes"},
{Type: "inuse_objects", Unit: "count"},
{Type: "inuse_space", Unit: "bytes"},
}
} else {
p.SampleType = []*ValueType{
{Type: "objects", Unit: "count"},
{Type: "space", Unit: "bytes"},
}
}
locs := make(map[uint64]*Location)
for s.Scan() {
line := strings.TrimSpace(s.Text())
if isSpaceOrComment(line) {
continue
}
if isMemoryMapSentinel(line) {
break
}
value, blocksize, addrs, err := parseHeapSample(line, p.Period, sampling, hasAlloc)
if err != nil {
return nil, err
}
var sloc []*Location
for _, addr := range addrs {
// Addresses from stack traces point to the next instruction after
// each call. Adjust by -1 to land somewhere on the actual call.
addr--
loc := locs[addr]
if locs[addr] == nil {
loc = &Location{
Address: addr,
}
p.Location = append(p.Location, loc)
locs[addr] = loc
}
sloc = append(sloc, loc)
}
p.Sample = append(p.Sample, &Sample{
Value: value,
Location: sloc,
NumLabel: map[string][]int64{"bytes": {blocksize}},
})
}
if err := s.Err(); err != nil {
return nil, err
}
if err := parseAdditionalSections(s, p); err != nil {
return nil, err
}
return p, nil
}
func parseHeapHeader(line string) (sampling string, period int64, hasAlloc bool, err error) {
header := heapHeaderRE.FindStringSubmatch(line)
if header == nil {
return "", 0, false, errUnrecognized
}
if len(header[6]) > 0 {
if period, err = strconv.ParseInt(header[6], 10, 64); err != nil {
return "", 0, false, errUnrecognized
}
}
if (header[3] != header[1] && header[3] != "0") || (header[4] != header[2] && header[4] != "0") {
hasAlloc = true
}
switch header[5] {
case "heapz_v2", "heap_v2":
return "v2", period, hasAlloc, nil
case "heapprofile":
return "", 1, hasAlloc, nil
case "heap":
return "v2", period / 2, hasAlloc, nil
default:
return "", 0, false, errUnrecognized
}
}
// parseHeapSample parses a single row from a heap profile into a new Sample.
func parseHeapSample(line string, rate int64, sampling string, includeAlloc bool) (value []int64, blocksize int64, addrs []uint64, err error) {
sampleData := heapSampleRE.FindStringSubmatch(line)
if len(sampleData) != 6 {
return nil, 0, nil, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData))
}
// This is a local-scoped helper function to avoid needing to pass
// around rate, sampling and many return parameters.
addValues := func(countString, sizeString string, label string) error {
count, err := strconv.ParseInt(countString, 10, 64)
if err != nil {
return fmt.Errorf("malformed sample: %s: %v", line, err)
}
size, err := strconv.ParseInt(sizeString, 10, 64)
if err != nil {
return fmt.Errorf("malformed sample: %s: %v", line, err)
}
if count == 0 && size != 0 {
return fmt.Errorf("%s count was 0 but %s bytes was %d", label, label, size)
}
if count != 0 {
blocksize = size / count
if sampling == "v2" {
count, size = scaleHeapSample(count, size, rate)
}
}
value = append(value, count, size)
return nil
}
if includeAlloc {
if err := addValues(sampleData[3], sampleData[4], "allocation"); err != nil {
return nil, 0, nil, err
}
}
if err := addValues(sampleData[1], sampleData[2], "inuse"); err != nil {
return nil, 0, nil, err
}
addrs, err = parseHexAddresses(sampleData[5])
if err != nil {
return nil, 0, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
}
return value, blocksize, addrs, nil
}
// parseHexAddresses extracts hex numbers from a string, attempts to convert
// each to an unsigned 64-bit number and returns the resulting numbers as a
// slice, or an error if the string contains hex numbers which are too large to
// handle (which means a malformed profile).
func parseHexAddresses(s string) ([]uint64, error) {
hexStrings := hexNumberRE.FindAllString(s, -1)
var addrs []uint64
for _, s := range hexStrings {
if addr, err := strconv.ParseUint(s, 0, 64); err == nil {
addrs = append(addrs, addr)
} else {
return nil, fmt.Errorf("failed to parse as hex 64-bit number: %s", s)
}
}
return addrs, nil
}
// scaleHeapSample adjusts the data from a heapz Sample to
// account for its probability of appearing in the collected
// data. heapz profiles are a sampling of the memory allocations
// requests in a program. We estimate the unsampled value by dividing
// each collected sample by its probability of appearing in the
// profile. heapz v2 profiles rely on a poisson process to determine
// which samples to collect, based on the desired average collection
// rate R. The probability of a sample of size S to appear in that
// profile is 1-exp(-S/R).
func scaleHeapSample(count, size, rate int64) (int64, int64) {
if count == 0 || size == 0 {
return 0, 0
}
if rate <= 1 {
// if rate==1 all samples were collected so no adjustment is needed.
// if rate<1 treat as unknown and skip scaling.
return count, size
}
avgSize := float64(size) / float64(count)
scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
return int64(float64(count) * scale), int64(float64(size) * scale)
}
// parseContention parses a mutex or contention profile. There are 2 cases:
// "--- contentionz " for legacy C++ profiles (and backwards compatibility)
// "--- mutex:" or "--- contention:" for profiles generated by the Go runtime.
func parseContention(b []byte) (*Profile, error) {
s := bufio.NewScanner(bytes.NewBuffer(b))
if !s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
return nil, errUnrecognized
}
switch l := s.Text(); {
case strings.HasPrefix(l, "--- contentionz "):
case strings.HasPrefix(l, "--- mutex:"):
case strings.HasPrefix(l, "--- contention:"):
default:
return nil, errUnrecognized
}
p := &Profile{
PeriodType: &ValueType{Type: "contentions", Unit: "count"},
Period: 1,
SampleType: []*ValueType{
{Type: "contentions", Unit: "count"},
{Type: "delay", Unit: "nanoseconds"},
},
}
var cpuHz int64
// Parse text of the form "attribute = value" before the samples.
const delimiter = "="
for s.Scan() {
line := s.Text()
if line = strings.TrimSpace(line); isSpaceOrComment(line) {
continue
}
if strings.HasPrefix(line, "---") {
break
}
attr := strings.SplitN(line, delimiter, 2)
if len(attr) != 2 {
break
}
key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])
var err error
switch key {
case "cycles/second":
if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil {
return nil, errUnrecognized
}
case "sampling period":
if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil {
return nil, errUnrecognized
}
case "ms since reset":
ms, err := strconv.ParseInt(val, 0, 64)
if err != nil {
return nil, errUnrecognized
}
p.DurationNanos = ms * 1000 * 1000
case "format":
// CPP contentionz profiles don't have format.
return nil, errUnrecognized
case "resolution":
// CPP contentionz profiles don't have resolution.
return nil, errUnrecognized
case "discarded samples":
default:
return nil, errUnrecognized
}
}
if err := s.Err(); err != nil {
return nil, err
}
locs := make(map[uint64]*Location)
for {
line := strings.TrimSpace(s.Text())
if strings.HasPrefix(line, "---") {
break
}
if !isSpaceOrComment(line) {
value, addrs, err := parseContentionSample(line, p.Period, cpuHz)
if err != nil {
return nil, err
}
var sloc []*Location
for _, addr := range addrs {
// Addresses from stack traces point to the next instruction after
// each call. Adjust by -1 to land somewhere on the actual call.
addr--
loc := locs[addr]
if locs[addr] == nil {
loc = &Location{
Address: addr,
}
p.Location = append(p.Location, loc)
locs[addr] = loc
}
sloc = append(sloc, loc)
}
p.Sample = append(p.Sample, &Sample{
Value: value,
Location: sloc,
})
}
if !s.Scan() {
break
}
}
if err := s.Err(); err != nil {
return nil, err
}
if err := parseAdditionalSections(s, p); err != nil {
return nil, err
}
return p, nil
}
// parseContentionSample parses a single row from a contention profile
// into a new Sample.
func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) {
sampleData := contentionSampleRE.FindStringSubmatch(line)
if sampleData == nil {
return nil, nil, errUnrecognized
}
v1, err := strconv.ParseInt(sampleData[1], 10, 64)
if err != nil {
return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
}
v2, err := strconv.ParseInt(sampleData[2], 10, 64)
if err != nil {
return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
}
// Unsample values if period and cpuHz are available.
// - Delays are scaled to cycles and then to nanoseconds.
// - Contentions are scaled to cycles.
if period > 0 {
if cpuHz > 0 {
cpuGHz := float64(cpuHz) / 1e9
v1 = int64(float64(v1) * float64(period) / cpuGHz)
}
v2 = v2 * period
}
value = []int64{v2, v1}
addrs, err = parseHexAddresses(sampleData[3])
if err != nil {
return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
}
return value, addrs, nil
}
// parseThread parses a Threadz profile and returns a new Profile.
func parseThread(b []byte) (*Profile, error) {
s := bufio.NewScanner(bytes.NewBuffer(b))
// Skip past comments and empty lines seeking a real header.
for s.Scan() && isSpaceOrComment(s.Text()) {
}
line := s.Text()
if m := threadzStartRE.FindStringSubmatch(line); m != nil {
// Advance over initial comments until first stack trace.
for s.Scan() {
if line = s.Text(); isMemoryMapSentinel(line) || strings.HasPrefix(line, "-") {
break
}
}
} else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
return nil, errUnrecognized
}
p := &Profile{
SampleType: []*ValueType{{Type: "thread", Unit: "count"}},
PeriodType: &ValueType{Type: "thread", Unit: "count"},
Period: 1,
}
locs := make(map[uint64]*Location)
// Recognize each thread and populate profile samples.
for !isMemoryMapSentinel(line) {
if strings.HasPrefix(line, "---- no stack trace for") {
break
}
if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
return nil, errUnrecognized
}
var addrs []uint64
var err error
line, addrs, err = parseThreadSample(s)
if err != nil {
return nil, err
}
if len(addrs) == 0 {
// We got a --same as previous threads--. Bump counters.
if len(p.Sample) > 0 {
s := p.Sample[len(p.Sample)-1]
s.Value[0]++
}
continue
}
var sloc []*Location
for i, addr := range addrs {
// Addresses from stack traces point to the next instruction after
// each call. Adjust by -1 to land somewhere on the actual call
// (except for the leaf, which is not a call).
if i > 0 {
addr--
}
loc := locs[addr]
if locs[addr] == nil {
loc = &Location{
Address: addr,
}
p.Location = append(p.Location, loc)
locs[addr] = loc
}
sloc = append(sloc, loc)
}
p.Sample = append(p.Sample, &Sample{
Value: []int64{1},
Location: sloc,
})
}
if err := parseAdditionalSections(s, p); err != nil {
return nil, err
}
cleanupDuplicateLocations(p)
return p, nil
}
// parseThreadSample parses a symbolized or unsymbolized stack trace.
// Returns the first line after the traceback, the sample (or nil if
// it hits a 'same-as-previous' marker) and an error.
func parseThreadSample(s *bufio.Scanner) (nextl string, addrs []uint64, err error) {
var line string
sameAsPrevious := false
for s.Scan() {
line = strings.TrimSpace(s.Text())
if line == "" {
continue
}
if strings.HasPrefix(line, "---") {
break
}
if strings.Contains(line, "same as previous thread") {
sameAsPrevious = true
continue
}
curAddrs, err := parseHexAddresses(line)
if err != nil {
return "", nil, fmt.Errorf("malformed sample: %s: %v", line, err)
}
addrs = append(addrs, curAddrs...)
}
if err := s.Err(); err != nil {
return "", nil, err
}
if sameAsPrevious {
return line, nil, nil
}
return line, addrs, nil
}
// parseAdditionalSections parses any additional sections in the
// profile, ignoring any unrecognized sections.
func parseAdditionalSections(s *bufio.Scanner, p *Profile) error {
for !isMemoryMapSentinel(s.Text()) && s.Scan() {
}
if err := s.Err(); err != nil {
return err
}
return p.ParseMemoryMapFromScanner(s)
}
// ParseProcMaps parses a memory map in the format of /proc/self/maps.
// ParseMemoryMap should be called after setting on a profile to
// associate locations to the corresponding mapping based on their
// address.
func ParseProcMaps(rd io.Reader) ([]*Mapping, error) {
s := bufio.NewScanner(rd)
return parseProcMapsFromScanner(s)
}
func parseProcMapsFromScanner(s *bufio.Scanner) ([]*Mapping, error) {
var mapping []*Mapping
var attrs []string
const delimiter = "="
r := strings.NewReplacer()
for s.Scan() {
line := r.Replace(removeLoggingInfo(s.Text()))
m, err := parseMappingEntry(line)
if err != nil {
if err == errUnrecognized {
// Recognize assignments of the form: attr=value, and replace
// $attr with value on subsequent mappings.
if attr := strings.SplitN(line, delimiter, 2); len(attr) == 2 {
attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]))
r = strings.NewReplacer(attrs...)
}
// Ignore any unrecognized entries
continue
}
return nil, err
}
if m == nil {
continue
}
mapping = append(mapping, m)
}
if err := s.Err(); err != nil {
return nil, err
}
return mapping, nil
}
// removeLoggingInfo detects and removes log prefix entries generated
// by the glog package. If no logging prefix is detected, the string
// is returned unmodified.
func removeLoggingInfo(line string) string {
if match := logInfoRE.FindStringIndex(line); match != nil {
return line[match[1]:]
}
return line
}
// ParseMemoryMap parses a memory map in the format of
// /proc/self/maps, and overrides the mappings in the current profile.
// It renumbers the samples and locations in the profile correspondingly.
func (p *Profile) ParseMemoryMap(rd io.Reader) error {
return p.ParseMemoryMapFromScanner(bufio.NewScanner(rd))
}
// ParseMemoryMapFromScanner parses a memory map in the format of
// /proc/self/maps or a variety of legacy format, and overrides the
// mappings in the current profile. It renumbers the samples and
// locations in the profile correspondingly.
func (p *Profile) ParseMemoryMapFromScanner(s *bufio.Scanner) error {
mapping, err := parseProcMapsFromScanner(s)
if err != nil {
return err
}
p.Mapping = append(p.Mapping, mapping...)
p.massageMappings()
p.remapLocationIDs()
p.remapFunctionIDs()
p.remapMappingIDs()
return nil
}
func parseMappingEntry(l string) (*Mapping, error) {
var start, end, perm, file, offset, buildID string
if me := procMapsRE.FindStringSubmatch(l); len(me) == 6 {
start, end, perm, offset, file = me[1], me[2], me[3], me[4], me[5]
} else if me := briefMapsRE.FindStringSubmatch(l); len(me) == 7 {
start, end, perm, file, offset, buildID = me[1], me[2], me[3], me[4], me[5], me[6]
} else {
return nil, errUnrecognized
}
var err error
mapping := &Mapping{
File: file,
BuildID: buildID,
}
if perm != "" && !strings.Contains(perm, "x") {
// Skip non-executable entries.
return nil, nil
}
if mapping.Start, err = strconv.ParseUint(start, 16, 64); err != nil {
return nil, errUnrecognized
}
if mapping.Limit, err = strconv.ParseUint(end, 16, 64); err != nil {
return nil, errUnrecognized
}
if offset != "" {
if mapping.Offset, err = strconv.ParseUint(offset, 16, 64); err != nil {
return nil, errUnrecognized
}
}
return mapping, nil
}
var memoryMapSentinels = []string{
"--- Memory map: ---",
"MAPPED_LIBRARIES:",
}
// isMemoryMapSentinel returns true if the string contains one of the
// known sentinels for memory map information.
func isMemoryMapSentinel(line string) bool {
for _, s := range memoryMapSentinels {
if strings.Contains(line, s) {
return true
}
}
return false
}
func (p *Profile) addLegacyFrameInfo() {
switch {
case isProfileType(p, heapzSampleTypes):
p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr
case isProfileType(p, contentionzSampleTypes):
p.DropFrames, p.KeepFrames = lockRxStr, ""
default:
p.DropFrames, p.KeepFrames = cpuProfilerRxStr, ""
}
}
var heapzSampleTypes = [][]string{
{"allocations", "size"}, // early Go pprof profiles
{"objects", "space"},
{"inuse_objects", "inuse_space"},
{"alloc_objects", "alloc_space"},
{"alloc_objects", "alloc_space", "inuse_objects", "inuse_space"}, // Go pprof legacy profiles
}
var contentionzSampleTypes = [][]string{
{"contentions", "delay"},
}
func isProfileType(p *Profile, types [][]string) bool {
st := p.SampleType
nextType:
for _, t := range types {
if len(st) != len(t) {
continue
}
for i := range st {
if st[i].Type != t[i] {
continue nextType
}
}
return true
}
return false
}
var allocRxStr = strings.Join([]string{
// POSIX entry points.
`calloc`,
`cfree`,
`malloc`,
`free`,
`memalign`,
`do_memalign`,
`(__)?posix_memalign`,
`pvalloc`,
`valloc`,
`realloc`,
// TC malloc.
`tcmalloc::.*`,
`tc_calloc`,
`tc_cfree`,
`tc_malloc`,
`tc_free`,
`tc_memalign`,
`tc_posix_memalign`,
`tc_pvalloc`,
`tc_valloc`,
`tc_realloc`,
`tc_new`,
`tc_delete`,
`tc_newarray`,
`tc_deletearray`,
`tc_new_nothrow`,
`tc_newarray_nothrow`,
// Memory-allocation routines on OS X.
`malloc_zone_malloc`,
`malloc_zone_calloc`,
`malloc_zone_valloc`,
`malloc_zone_realloc`,
`malloc_zone_memalign`,
`malloc_zone_free`,
// Go runtime
`runtime\..*`,
// Other misc. memory allocation routines
`BaseArena::.*`,
`(::)?do_malloc_no_errno`,
`(::)?do_malloc_pages`,
`(::)?do_malloc`,
`DoSampledAllocation`,
`MallocedMemBlock::MallocedMemBlock`,
`_M_allocate`,
`__builtin_(vec_)?delete`,
`__builtin_(vec_)?new`,
`__gnu_cxx::new_allocator::allocate`,
`__libc_malloc`,
`__malloc_alloc_template::allocate`,
`allocate`,
`cpp_alloc`,
`operator new(\[\])?`,
`simple_alloc::allocate`,
}, `|`)
var allocSkipRxStr = strings.Join([]string{
// Preserve Go runtime frames that appear in the middle/bottom of
// the stack.
`runtime\.panic`,
`runtime\.reflectcall`,
`runtime\.call[0-9]*`,
}, `|`)
var cpuProfilerRxStr = strings.Join([]string{
`ProfileData::Add`,
`ProfileData::prof_handler`,
`CpuProfiler::prof_handler`,
`__pthread_sighandler`,
`__restore`,
}, `|`)
var lockRxStr = strings.Join([]string{
`RecordLockProfileData`,
`(base::)?RecordLockProfileData.*`,
`(base::)?SubmitMutexProfileData.*`,
`(base::)?SubmitSpinLockProfileData.*`,
`(base::Mutex::)?AwaitCommon.*`,
`(base::Mutex::)?Unlock.*`,
`(base::Mutex::)?UnlockSlow.*`,
`(base::Mutex::)?ReaderUnlock.*`,
`(base::MutexLock::)?~MutexLock.*`,
`(Mutex::)?AwaitCommon.*`,
`(Mutex::)?Unlock.*`,
`(Mutex::)?UnlockSlow.*`,
`(Mutex::)?ReaderUnlock.*`,
`(MutexLock::)?~MutexLock.*`,
`(SpinLock::)?Unlock.*`,
`(SpinLock::)?SlowUnlock.*`,
`(SpinLockHolder::)?~SpinLockHolder.*`,
}, `|`)