diff --git a/model/textparse/README.md b/model/textparse/README.md index 697966f097..4fcfbceea4 100644 --- a/model/textparse/README.md +++ b/model/textparse/README.md @@ -3,4 +3,4 @@ In the rare case that you need to update the textparse lexers, edit promlex.l or `golex -o=promlex.l.go promlex.l` Note that you need golex installed: -`go get -u modernc.org/golex` \ No newline at end of file +`go install -u modernc.org/golex@latest` \ No newline at end of file diff --git a/model/textparse/interface.go b/model/textparse/interface.go index df01dbc34f..2dcf4fb4a8 100644 --- a/model/textparse/interface.go +++ b/model/textparse/interface.go @@ -51,6 +51,9 @@ type Parser interface { // The returned byte slices become invalid after the next call to Next. Unit() ([]byte, []byte) + // IdentifyingLabels returns the identifying labels for info metrics + IdentifyingLabels() []string + // Comment returns the text of the current comment. // Must only be called after Next returned a comment entry. // The returned byte slice becomes invalid after the next call to Next. diff --git a/model/textparse/openmetricslex.l b/model/textparse/openmetricslex.l index 9afbbbd8bd..b0b0350ab0 100644 --- a/model/textparse/openmetricslex.l +++ b/model/textparse/openmetricslex.l @@ -33,10 +33,10 @@ func (l *openMetricsLexer) Lex() token { D [0-9] L [a-zA-Z_] M [a-zA-Z_:] -C [^\n] +C [^\n()] S [ ] -%x sComment sMeta1 sMeta2 sLabels sLValue sValue sTimestamp sExemplar sEValue sETimestamp +%x sComment sMeta1 sMeta2 sLabels sLValue sValue sTimestamp sExemplar sEValue sETimestamp sIdens %yyc c %yyn c = l.next() @@ -52,7 +52,12 @@ S [ ] "EOF"\n? l.state = sInit; return tEOFWord \"(\\.|[^\\"])*\" l.state = sMeta2; return tMName {M}({M}|{D})* l.state = sMeta2; return tMName -{S}{C}*\n l.state = sInit; return tText +{S}{C}* l.state = sMeta2; return tText +\( l.state = sIdens; return tParentOpen +{L}({L}|{D})* return tLName +, return tComma +\) l.state = sMeta2; return tParentClose +\n l.state = sInit; return tLinebreak {M}({M}|{D})* l.state = sValue; return tMName \{ l.state = sLabels; return tBraceOpen diff --git a/model/textparse/openmetricslex.l.go b/model/textparse/openmetricslex.l.go index c8789ef60d..333d2c80fd 100644 --- a/model/textparse/openmetricslex.l.go +++ b/model/textparse/openmetricslex.l.go @@ -43,19 +43,21 @@ yystate0: case 3: // start condition: sMeta2 goto yystart31 case 4: // start condition: sLabels - goto yystart34 + goto yystart35 case 5: // start condition: sLValue - goto yystart42 + goto yystart43 case 6: // start condition: sValue - goto yystart46 + goto yystart47 case 7: // start condition: sTimestamp - goto yystart50 + goto yystart51 case 8: // start condition: sExemplar - goto yystart57 + goto yystart58 case 9: // start condition: sEValue - goto yystart62 + goto yystart63 case 10: // start condition: sETimestamp - goto yystart68 + goto yystart69 + case 11: // start condition: sIdens + goto yystart73 } yystate1: @@ -89,14 +91,14 @@ yystate4: c = l.next() switch { default: - goto yyrule9 + goto yyrule14 case c >= '0' && c <= ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': goto yystate4 } yystate5: c = l.next() - goto yyrule11 + goto yyrule16 yystate6: c = l.next() @@ -319,344 +321,381 @@ yystart31: default: goto yyabort case c == ' ': + goto yystate33 + case c == '(': + goto yystate34 + case c == '\n': goto yystate32 } yystate32: + c = l.next() + goto yyrule13 + +yystate33: c = l.next() switch { default: - goto yyabort - case c == '\n': + goto yyrule8 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\'' || c >= '*' && c <= 'ÿ': goto yystate33 - case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': - goto yystate32 } -yystate33: +yystate34: c = l.next() - goto yyrule8 + goto yyrule9 -yystate34: +yystate35: c = l.next() -yystart34: +yystart35: switch { default: goto yyabort case c == '"': - goto yystate35 + goto yystate36 case c == ',': - goto yystate38 - case c == '=': goto yystate39 + case c == '=': + goto yystate40 case c == '}': - goto yystate41 + goto yystate42 case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': - goto yystate40 + goto yystate41 } -yystate35: +yystate36: c = l.next() switch { default: goto yyabort case c == '"': - goto yystate36 - case c == '\\': goto yystate37 + case c == '\\': + goto yystate38 case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': - goto yystate35 + goto yystate36 } -yystate36: +yystate37: c = l.next() - goto yyrule13 + goto yyrule18 -yystate37: +yystate38: c = l.next() switch { default: goto yyabort case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': - goto yystate35 + goto yystate36 } -yystate38: - c = l.next() - goto yyrule16 - yystate39: c = l.next() - goto yyrule15 + goto yyrule21 yystate40: + c = l.next() + goto yyrule20 + +yystate41: c = l.next() switch { default: - goto yyrule12 + goto yyrule17 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': - goto yystate40 + goto yystate41 } -yystate41: +yystate42: c = l.next() - goto yyrule14 + goto yyrule19 -yystate42: +yystate43: c = l.next() -yystart42: +yystart43: switch { default: goto yyabort case c == '"': - goto yystate43 + goto yystate44 } -yystate43: +yystate44: c = l.next() switch { default: goto yyabort case c == '"': - goto yystate44 - case c == '\\': goto yystate45 + case c == '\\': + goto yystate46 case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': - goto yystate43 + goto yystate44 } -yystate44: +yystate45: c = l.next() - goto yyrule17 + goto yyrule22 -yystate45: +yystate46: c = l.next() switch { default: goto yyabort case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': - goto yystate43 + goto yystate44 } -yystate46: +yystate47: c = l.next() -yystart46: +yystart47: switch { default: goto yyabort case c == ' ': - goto yystate47 + goto yystate48 case c == '{': - goto yystate49 + goto yystate50 } -yystate47: +yystate48: c = l.next() switch { default: goto yyabort case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate48 + goto yystate49 } -yystate48: +yystate49: c = l.next() switch { default: - goto yyrule18 + goto yyrule23 case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate48 + goto yystate49 } -yystate49: +yystate50: c = l.next() - goto yyrule10 + goto yyrule15 -yystate50: +yystate51: c = l.next() -yystart50: +yystart51: switch { default: goto yyabort case c == ' ': - goto yystate52 + goto yystate53 case c == '\n': - goto yystate51 + goto yystate52 } -yystate51: +yystate52: c = l.next() - goto yyrule20 + goto yyrule25 -yystate52: +yystate53: c = l.next() switch { default: goto yyabort case c == '#': - goto yystate54 + goto yystate55 case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c == '!' || c == '"' || c >= '$' && c <= 'ÿ': - goto yystate53 + goto yystate54 } -yystate53: +yystate54: c = l.next() switch { default: - goto yyrule19 + goto yyrule24 case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate53 + goto yystate54 } -yystate54: +yystate55: c = l.next() switch { default: - goto yyrule19 + goto yyrule24 case c == ' ': - goto yystate55 + goto yystate56 case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate53 + goto yystate54 } -yystate55: +yystate56: c = l.next() switch { default: goto yyabort case c == '{': - goto yystate56 + goto yystate57 } -yystate56: +yystate57: c = l.next() - goto yyrule21 + goto yyrule26 -yystate57: +yystate58: c = l.next() -yystart57: +yystart58: switch { default: goto yyabort case c == ',': - goto yystate58 - case c == '=': goto yystate59 + case c == '=': + goto yystate60 case c == '}': - goto yystate61 + goto yystate62 case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': - goto yystate60 + goto yystate61 } -yystate58: - c = l.next() - goto yyrule26 - yystate59: c = l.next() - goto yyrule24 + goto yyrule31 yystate60: + c = l.next() + goto yyrule29 + +yystate61: c = l.next() switch { default: - goto yyrule22 + goto yyrule27 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': - goto yystate60 + goto yystate61 } -yystate61: +yystate62: c = l.next() - goto yyrule23 + goto yyrule28 -yystate62: +yystate63: c = l.next() -yystart62: +yystart63: switch { default: goto yyabort case c == ' ': - goto yystate63 + goto yystate64 case c == '"': - goto yystate65 + goto yystate66 } -yystate63: +yystate64: c = l.next() switch { default: goto yyabort case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate64 + goto yystate65 } -yystate64: +yystate65: c = l.next() switch { default: - goto yyrule27 + goto yyrule32 case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate64 + goto yystate65 } -yystate65: +yystate66: c = l.next() switch { default: goto yyabort case c == '"': - goto yystate66 - case c == '\\': goto yystate67 + case c == '\\': + goto yystate68 case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': - goto yystate65 + goto yystate66 } -yystate66: +yystate67: c = l.next() - goto yyrule25 + goto yyrule30 -yystate67: +yystate68: c = l.next() switch { default: goto yyabort case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': - goto yystate65 + goto yystate66 } -yystate68: +yystate69: c = l.next() -yystart68: +yystart69: switch { default: goto yyabort case c == ' ': - goto yystate70 + goto yystate71 case c == '\n': - goto yystate69 + goto yystate70 } -yystate69: +yystate70: c = l.next() - goto yyrule29 + goto yyrule34 -yystate70: +yystate71: c = l.next() switch { default: goto yyabort case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate71 + goto yystate72 } -yystate71: +yystate72: c = l.next() switch { default: - goto yyrule28 + goto yyrule33 case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate71 + goto yystate72 + } + +yystate73: + c = l.next() +yystart73: + switch { + default: + goto yyabort + case c == ')': + goto yystate74 + case c == ',': + goto yystate75 + case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate76 + } + +yystate74: + c = l.next() + goto yyrule12 + +yystate75: + c = l.next() + goto yyrule11 + +yystate76: + c = l.next() + switch { + default: + goto yyrule10 + case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate76 } yyrule1: // #{S} @@ -700,121 +739,147 @@ yyrule7: // {M}({M}|{D})* return tMName goto yystate0 } -yyrule8: // {S}{C}*\n +yyrule8: // {S}{C}* { - l.state = sInit + l.state = sMeta2 return tText goto yystate0 } -yyrule9: // {M}({M}|{D})* +yyrule9: // \( + { + l.state = sIdens + return tParentOpen + goto yystate0 + } +yyrule10: // {L}({L}|{D})* + { + return tLName + } +yyrule11: // , + { + return tComma + } +yyrule12: // \) + { + l.state = sMeta2 + return tParentClose + goto yystate0 + } +yyrule13: // \n + { + l.state = sInit + return tLinebreak + goto yystate0 + } +yyrule14: // {M}({M}|{D})* { l.state = sValue return tMName goto yystate0 } -yyrule10: // \{ +yyrule15: // \{ { l.state = sLabels return tBraceOpen goto yystate0 } -yyrule11: // \{ +yyrule16: // \{ { l.state = sLabels return tBraceOpen goto yystate0 } -yyrule12: // {L}({L}|{D})* +yyrule17: // {L}({L}|{D})* { return tLName } -yyrule13: // \"(\\.|[^\\"])*\" +yyrule18: // \"(\\.|[^\\"])*\" { l.state = sLabels return tQString goto yystate0 } -yyrule14: // \} +yyrule19: // \} { l.state = sValue return tBraceClose goto yystate0 } -yyrule15: // = +yyrule20: // = { l.state = sLValue return tEqual goto yystate0 } -yyrule16: // , +yyrule21: // , { return tComma } -yyrule17: // \"(\\.|[^\\"\n])*\" +yyrule22: // \"(\\.|[^\\"\n])*\" { l.state = sLabels return tLValue goto yystate0 } -yyrule18: // {S}[^ \n]+ +yyrule23: // {S}[^ \n]+ { l.state = sTimestamp return tValue goto yystate0 } -yyrule19: // {S}[^ \n]+ +yyrule24: // {S}[^ \n]+ { return tTimestamp } -yyrule20: // \n +yyrule25: // \n { l.state = sInit return tLinebreak goto yystate0 } -yyrule21: // {S}#{S}\{ +yyrule26: // {S}#{S}\{ { l.state = sExemplar return tComment goto yystate0 } -yyrule22: // {L}({L}|{D})* +yyrule27: // {L}({L}|{D})* { return tLName } -yyrule23: // \} +yyrule28: // \} { l.state = sEValue return tBraceClose goto yystate0 } -yyrule24: // = +yyrule29: // = { l.state = sEValue return tEqual goto yystate0 } -yyrule25: // \"(\\.|[^\\"\n])*\" +yyrule30: // \"(\\.|[^\\"\n])*\" { l.state = sExemplar return tLValue goto yystate0 } -yyrule26: // , +yyrule31: // , { return tComma } -yyrule27: // {S}[^ \n]+ +yyrule32: // {S}[^ \n]+ { l.state = sETimestamp return tValue goto yystate0 } -yyrule28: // {S}[^ \n]+ +yyrule33: // {S}[^ \n]+ { return tTimestamp } -yyrule29: // \n +yyrule34: // \n if true { // avoid go vet determining the below panic will not be reached l.state = sInit return tLinebreak @@ -844,25 +909,28 @@ yyabort: // no lexem recognized goto yystate31 } if false { - goto yystate34 + goto yystate35 + } + if false { + goto yystate43 } if false { - goto yystate42 + goto yystate47 } if false { - goto yystate46 + goto yystate51 } if false { - goto yystate50 + goto yystate58 } if false { - goto yystate57 + goto yystate63 } if false { - goto yystate62 + goto yystate69 } if false { - goto yystate68 + goto yystate73 } } diff --git a/model/textparse/openmetricsparse.go b/model/textparse/openmetricsparse.go index b7ad1dd85c..3efe57f7b5 100644 --- a/model/textparse/openmetricsparse.go +++ b/model/textparse/openmetricsparse.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:generate go get -u modernc.org/golex +//go:generate go install -u modernc.org/golex@latest //go:generate golex -o=openmetricslex.l.go openmetricslex.l package textparse @@ -21,6 +21,7 @@ import ( "fmt" "io" "math" + "sort" "strings" "unicode/utf8" @@ -89,11 +90,12 @@ type OpenMetricsParser struct { // of the label name and value start and end characters. offsets []int - eOffsets []int - exemplar []byte - exemplarVal float64 - exemplarTs int64 - hasExemplarTs bool + identifyingLabels []string + eOffsets []int + exemplar []byte + exemplarVal float64 + exemplarTs int64 + hasExemplarTs bool } // NewOpenMetricsParser returns a new parser of the byte slice. @@ -141,6 +143,10 @@ func (p *OpenMetricsParser) Type() ([]byte, model.MetricType) { return p.l.b[p.offsets[0]:p.offsets[1]], p.mtype } +func (p *OpenMetricsParser) IdentifyingLabels() []string { + return p.identifyingLabels +} + // Unit returns the metric name and unit in the current entry. // Must only be called after Next returned a unit entry. // The returned byte slices become invalid after the next call to Next. @@ -246,6 +252,7 @@ func (p *OpenMetricsParser) Next() (Entry, error) { p.start = p.l.i p.offsets = p.offsets[:0] + p.identifyingLabels = p.identifyingLabels[:0] p.eOffsets = p.eOffsets[:0] p.exemplar = p.exemplar[:0] p.exemplarVal = 0 @@ -275,10 +282,19 @@ func (p *OpenMetricsParser) Next() (Entry, error) { switch t2 := p.nextToken(); t2 { case tText: if len(p.l.buf()) > 1 { - p.text = p.l.buf()[1 : len(p.l.buf())-1] + p.text = p.l.buf()[1:len(p.l.buf())] } else { p.text = []byte{} } + t3 := p.nextToken() + if t == tType && t3 == tParentOpen { + if p.identifyingLabels, err = p.parseIdentifierLabels(); err != nil { + return EntryInvalid, err + } + } else if t3 != tLinebreak { + return EntryInvalid, p.parseError("expected linebreak get", t3) + } + default: return EntryInvalid, fmt.Errorf("expected text in %s", t.String()) } @@ -304,6 +320,7 @@ func (p *OpenMetricsParser) Next() (Entry, error) { default: return EntryInvalid, fmt.Errorf("invalid metric type %q", s) } + case tHelp: if !utf8.Valid(p.text) { return EntryInvalid, fmt.Errorf("help text %q is not a valid utf8 string", p.text) @@ -403,6 +420,46 @@ func (p *OpenMetricsParser) parseComment() error { return nil } +func (p *OpenMetricsParser) parseIdentifierLabels() ([]string, error) { + res := p.identifyingLabels + t := p.nextToken() + // for the case when there is an empty () + if t == tParentClose { + if l := p.nextToken(); l != tLinebreak { + return nil, p.parseError("expected linebreak after )", l) + } + return nil, nil + } + + for { + curTStart := p.l.start + curTI := p.l.i + + switch t { + case tLName: + res = append(res, string(p.l.b[curTStart:curTI])) + default: + return nil, p.parseError("expected label or )", t) + } + + t := p.nextToken() + switch t { + case tComma: + p.nextToken() + continue + case tParentClose: + if l := p.nextToken(); l != tLinebreak { + return nil, p.parseError("expected linebreak after )", l) + } + // sort before returning labels + sort.Strings(res) + return res, nil + default: + return nil, p.parseError("expected comma or )", t) + } + } +} + func (p *OpenMetricsParser) parseLVals(offsets []int, isExemplar bool) ([]int, error) { t := p.nextToken() for { diff --git a/model/textparse/openmetricsparse_test.go b/model/textparse/openmetricsparse_test.go index bc76a540d3..fbf37414e2 100644 --- a/model/textparse/openmetricsparse_test.go +++ b/model/textparse/openmetricsparse_test.go @@ -68,6 +68,8 @@ foo_total 17.0 1520879607.789 # {id="counter-test"} 5` input += "\n# HELP metric foo\x00bar" input += "\nnull_byte_metric{a=\"abc\x00\"} 1" + input += "\n# TYPE ii info(a,b)" + input += "\nii{a=\"foo\",b=\"bar\",c=\"far\"} 1" input += "\n# EOF\n" int64p := func(x int64) *int64 { return &x } @@ -232,6 +234,14 @@ foo_total 17.0 1520879607.789 # {id="counter-test"} 5` m: "null_byte_metric{a=\"abc\x00\"}", v: 1, lset: labels.FromStrings("__name__", "null_byte_metric", "a", "abc\x00"), + }, { + m: "ii", + typ: model.MetricTypeInfo, + identlbs: []string{"a", "b"}, + }, { + m: "ii{a=\"foo\",b=\"bar\",c=\"far\"}", + v: 1, + lset: labels.FromStrings("__name__", "ii", "a", "foo", "b", "bar", "c", "far"), }, } diff --git a/model/textparse/promlex.l b/model/textparse/promlex.l index e9fa1fb71c..506b0cff51 100644 --- a/model/textparse/promlex.l +++ b/model/textparse/promlex.l @@ -30,6 +30,7 @@ const ( sExemplar sEValue sETimestamp + sIdens ) // Lex is called by the parser generated by "go tool yacc" to obtain each diff --git a/model/textparse/promlex.l.go b/model/textparse/promlex.l.go index a083e5549b..6b411584f8 100644 --- a/model/textparse/promlex.l.go +++ b/model/textparse/promlex.l.go @@ -31,6 +31,7 @@ const ( sExemplar sEValue sETimestamp + sIdens ) // Lex is called by the parser generated by "go tool yacc" to obtain each diff --git a/model/textparse/promparse.go b/model/textparse/promparse.go index a611f3aea7..56b48e0d2b 100644 --- a/model/textparse/promparse.go +++ b/model/textparse/promparse.go @@ -58,6 +58,8 @@ const ( tBlank tMName tQString + tParentOpen + tParentClose tBraceOpen tBraceClose tLName @@ -66,6 +68,7 @@ const ( tEqual tTimestamp tValue + tIdens ) func (t token) String() string { @@ -173,6 +176,10 @@ func NewPromParser(b []byte, st *labels.SymbolTable) Parser { } } +func (p *PromParser) IdentifyingLabels() []string { + return nil +} + // Series returns the bytes of the series, the timestamp if set, and the value // of the current sample. func (p *PromParser) Series() ([]byte, *int64, float64) { diff --git a/model/textparse/promparse_test.go b/model/textparse/promparse_test.go index 66986291d7..b4b24168a8 100644 --- a/model/textparse/promparse_test.go +++ b/model/textparse/promparse_test.go @@ -32,15 +32,16 @@ import ( ) type expectedParse struct { - lset labels.Labels - m string - t *int64 - v float64 - typ model.MetricType - help string - unit string - comment string - e *exemplar.Exemplar + lset labels.Labels + m string + t *int64 + v float64 + typ model.MetricType + help string + unit string + comment string + e *exemplar.Exemplar + identlbs []string } func TestPromParse(t *testing.T) { @@ -223,6 +224,7 @@ func checkParseResults(t *testing.T, p Parser, exp []expectedParse) { m, typ := p.Type() require.Equal(t, exp[i].m, string(m)) require.Equal(t, exp[i].typ, typ) + require.Equal(t, exp[i].identlbs, p.IdentifyingLabels()) case EntryHelp: m, h := p.Help() diff --git a/model/textparse/protobufparse.go b/model/textparse/protobufparse.go index ea3a2e1a34..c8a9eb2e01 100644 --- a/model/textparse/protobufparse.go +++ b/model/textparse/protobufparse.go @@ -91,6 +91,10 @@ func NewProtobufParser(b []byte, parseClassicHistograms bool, st *labels.SymbolT } } +func (p *ProtobufParser) IdentifyingLabels() []string { + return nil +} + // Series returns the bytes of a series with a simple float64 as a // value, the timestamp if set, and the value of the current sample. func (p *ProtobufParser) Series() ([]byte, *int64, float64) {