diff --git a/tools/readme_linter/README.md b/tools/readme_linter/README.md new file mode 100644 index 000000000..463e7d7f5 --- /dev/null +++ b/tools/readme_linter/README.md @@ -0,0 +1,29 @@ +# README.md linter + +## Building + +```shell +telegraf/tools/readme_linter$ go build . +``` + +## Running + +Run readme_linter with the filenames of the readme files you want to lint. + +```shell +telegraf/tools/readme_linter$ ./readme_linter +``` + +You can lint multiple filenames at once. This works well with shell globs. + +To lint all the plugin readmes: + +```shell +telegraf/tools/readme_linter$ ./readme_linter ../../plugins/*/*/README.md +``` + +To lint readmes for inputs starting a-d: + +```shell +telegraf/tools/readme_linter$ ./readme_linter ../../plugins/inputs/[a-d]*/README.md +``` diff --git a/tools/readme_linter/assert.go b/tools/readme_linter/assert.go new file mode 100644 index 000000000..a0f54a0e4 --- /dev/null +++ b/tools/readme_linter/assert.go @@ -0,0 +1,142 @@ +package main + +import ( + "fmt" + "regexp" + "runtime" + "sort" + + "github.com/yuin/goldmark/ast" +) + +//type for all linter assert methods +type T struct { + filename string + markdown []byte + newlineOffsets []int + sourceFlag bool + + fails int +} + +// called by all assert functions that involve a node +func (t *T) printFailedAssertf(n ast.Node, format string, args ...interface{}) { + t.printFile(n) + fmt.Printf(format+"\n", args...) + t.printRule(3) + t.fails++ +} + +// Assert function that doesnt involve a node, for example if something is missing +func (t *T) assertf(format string, args ...interface{}) { + t.assertLine2f(0, format, args...) // There's no line number associated, so use the first +} + +func (t *T) assertNodef(n ast.Node, format string, args ...interface{}) { + t.printFailedAssertf(n, format, args...) +} + +func (t *T) assertLinef(line int, format string, args ...interface{}) { + //this func only exists to make the call stack to t.printRule the same depth + //as when called through assertf + + t.assertLine2f(line, format, args...) +} + +func (t *T) assertLine2f(line int, format string, args ...interface{}) { + t.printFileLine(line) + fmt.Printf(format+"\n", args...) + t.printRule(3) + t.fails++ +} + +func (t *T) printRule(callers int) { + if !t.sourceFlag { + return + } + + pc, codeFilename, codeLine, ok := runtime.Caller(callers) + if !ok { + panic("can not get caller") + } + + f := runtime.FuncForPC(pc) + var funcName string + if f != nil { + funcName = f.Name() + } + + fmt.Printf("%s:%d: ", codeFilename, codeLine) + if len(funcName) == 0 { + fmt.Printf("failed assert\n") + } else { + fmt.Printf("failed assert in function %s\n", funcName) + } +} + +func (t *T) line(offset int) int { + return sort.SearchInts(t.newlineOffsets, offset) +} + +func (t *T) printFile(n ast.Node) { + lines := n.Lines() + if lines == nil || lines.Len() == 0 { + t.printFileLine(0) + return + } + offset := lines.At(0).Start + line := t.line(offset) + t.printFileLine(line) +} + +func (t *T) printFileLine(line int) { + fmt.Printf("%s:%d: ", t.filename, line+1) // Lines start with 1 +} + +func (t *T) printPassFail() { + if t.fails == 0 { + fmt.Printf("Pass %s\n", t.filename) + } else { + fmt.Printf("Fail %s, %d failed assertions\n", t.filename, t.fails) + } +} + +func (t *T) assertKind(expected ast.NodeKind, n ast.Node) { + if n.Kind() == expected { + return + } + + t.printFailedAssertf(n, "expected %s, have %s", expected.String(), n.Kind().String()) +} + +func (t *T) assertFirstChildRegexp(expectedPattern string, n ast.Node) { + var validRegexp = regexp.MustCompile(expectedPattern) + + if !n.HasChildren() { + t.printFailedAssertf(n, "expected children") + return + } + c := n.FirstChild() + + actual := string(c.Text(t.markdown)) + + if !validRegexp.MatchString(actual) { + t.printFailedAssertf(n, "'%s' does not match regexp '%s'", actual, expectedPattern) + return + } +} + +func (t *T) assertHeadingLevel(expected int, n ast.Node) { + h, ok := n.(*ast.Heading) + if !ok { + fmt.Printf("failed Heading type assertion\n") + t.fails++ + return + } + + if h.Level == expected { + return + } + + t.printFailedAssertf(n, "expected header level %d, have %d", expected, h.Level) +} diff --git a/tools/readme_linter/main.go b/tools/readme_linter/main.go new file mode 100644 index 000000000..f561100f5 --- /dev/null +++ b/tools/readme_linter/main.go @@ -0,0 +1,116 @@ +package main + +import ( + "bufio" + "bytes" + "flag" + "os" + + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +func main() { + sourceFlag := flag.Bool("source", false, "include location of linter code that failed assertion") + + flag.Parse() + + var err error + for _, filename := range flag.Args() { + err = checkFile(filename, guessPluginType(filename), *sourceFlag) + if err != nil { + panic(err) + } + } +} + +type ruleFunc func(*T, ast.Node) error + +type rulesMap map[plugin][]ruleFunc + +var rules rulesMap + +func init() { + rules = make(rulesMap) + + //rules for all plugin types + all := []ruleFunc{ + firstSection, + noLongLinesInParagraphs(80), + configSection, + relativeTelegrafLinks, + } + for i := pluginInput; i <= pluginParser; i++ { + rules[i] = all + } + + inputRules := []ruleFunc{ + requiredSectionsClose([]string{ + "Example Output", + "Metrics", + }), + } + rules[pluginInput] = append(rules[pluginInput], inputRules...) +} + +func checkFile(filename string, pluginType plugin, sourceFlag bool) error { + md, err := os.ReadFile(filename) + if err != nil { + return err + } + + // Goldmark returns locations as offsets. We want line + // numbers. Find the newlines in the file so we can translate + // later. + scanner := bufio.NewScanner(bytes.NewReader(md)) + scanner.Split(bufio.ScanRunes) + offset := 0 + newlineOffsets := []int{} + for scanner.Scan() { + if scanner.Text() == "\n" { + newlineOffsets = append(newlineOffsets, offset) + } + + offset++ + } + + p := goldmark.DefaultParser() + + // We need goldmark to parse tables, otherwise they show up as + // paragraphs. Since tables often have long lines and we check for long + // lines in paragraphs, without table parsing there are false positive long + // lines in tables. + // + // The tableParagraphTransformer is an extension and not part of the default + // parser so we add it. There may be an easier way to do it, but this works: + p.AddOptions( + parser.WithParagraphTransformers( + util.Prioritized(extension.NewTableParagraphTransformer(), 99), + ), + ) + + r := text.NewReader(md) + root := p.Parse(r) + + rules := rules[pluginType] + + tester := T{ + filename: filename, + markdown: md, + newlineOffsets: newlineOffsets, + sourceFlag: sourceFlag, + } + for _, rule := range rules { + err = rule(&tester, root) + if err != nil { + return err + } + } + tester.printPassFail() + + return nil +} diff --git a/tools/readme_linter/plugin.go b/tools/readme_linter/plugin.go new file mode 100644 index 000000000..641268092 --- /dev/null +++ b/tools/readme_linter/plugin.go @@ -0,0 +1,33 @@ +package main + +import ( + "strings" +) + +type plugin int + +const ( + pluginNone plugin = iota + pluginInput + pluginOutput + pluginProcessor + pluginAggregator + pluginParser +) + +func guessPluginType(filename string) plugin { + switch { + case strings.Contains(filename, "plugins/inputs/"): + return pluginInput + case strings.Contains(filename, "plugins/outputs/"): + return pluginOutput + case strings.Contains(filename, "plugins/processors/"): + return pluginProcessor + case strings.Contains(filename, "plugins/aggregators/"): + return pluginAggregator + case strings.Contains(filename, "plugins/parsers/"): + return pluginParser + default: + return pluginNone + } +} diff --git a/tools/readme_linter/rules.go b/tools/readme_linter/rules.go new file mode 100644 index 000000000..2250ac52b --- /dev/null +++ b/tools/readme_linter/rules.go @@ -0,0 +1,210 @@ +package main + +import ( + "bytes" + "strings" + + "github.com/yuin/goldmark/ast" +) + +// The first section is a heading with plugin name and paragraph short +// description +func firstSection(t *T, root ast.Node) error { + var n ast.Node + n = root.FirstChild() + + t.assertKind(ast.KindHeading, n) + t.assertHeadingLevel(1, n) + t.assertFirstChildRegexp(` Plugin$`, n) + + // Make sure there is some text after the heading + n = n.NextSibling() + t.assertKind(ast.KindParagraph, n) + length := len(n.Text(t.markdown)) + min := 30 + if length < min { + t.assertNodef(n, "short first section. Please add short description of plugin. length %d, minimum %d", length, min) + } + + return nil +} + +// Somewhere there should be a heading "sample configuration" and a +// toml code block. The toml should match what is in the plugin's go +// code + +// Second level headings should include +func requiredSections(t *T, root ast.Node, headings []string) error { + headingsSet := newSet(headings) + + expectedLevel := 2 + + titleCounts := make(map[string]int) + + for n := root.FirstChild(); n != nil; n = n.NextSibling() { + var h *ast.Heading + var ok bool + if h, ok = n.(*ast.Heading); !ok { + continue + } + + child := h.FirstChild() + if child == nil { + continue + } + title := string(child.Text(t.markdown)) + if headingsSet.has(title) && h.Level != expectedLevel { + t.assertNodef(n, "has required section '%s' but wrong heading level. Expected level %d, found %d", + title, expectedLevel, h.Level) + } + + titleCounts[title]++ + } + + headingsSet.forEach(func(title string) { + if _, exists := titleCounts[title]; !exists { + t.assertf("missing required section '%s'", title) + } + }) + + return nil +} + +// Use this to make a rule that looks for a list of settings. (this is +// a closure of func requiredSection) +func requiredSectionsClose(headings []string) func(*T, ast.Node) error { + return func(t *T, root ast.Node) error { + return requiredSections(t, root, headings) + } +} + +func noLongLinesInParagraphs(threshold int) func(*T, ast.Node) error { + return func(t *T, root ast.Node) error { + // We're looking for long lines in paragraphs. Find paragraphs + // first, then which lines are in paragraphs + paraLines := []int{} + for n := root.FirstChild(); n != nil; n = n.NextSibling() { + var p *ast.Paragraph + var ok bool + if p, ok = n.(*ast.Paragraph); !ok { + continue //only looking for paragraphs + } + + segs := p.Lines() + for _, seg := range segs.Sliced(0, segs.Len()) { + line := t.line(seg.Start) + paraLines = append(paraLines, line) + // t.printFileLine(line) + // fmt.Printf("paragraph line\n") + } + } + + // Find long lines in the whole file + longLines := []int{} + last := 0 + for i, cur := range t.newlineOffsets { + length := cur - last - 1 // -1 to exclude the newline + if length > threshold { + longLines = append(longLines, i) + // t.printFileLine(i) + // fmt.Printf("long line\n") + } + last = cur + } + + // Merge both lists + p := 0 + l := 0 + bads := []int{} + for p < len(paraLines) && l < len(longLines) { + long := longLines[l] + para := paraLines[p] + switch { + case long == para: + bads = append(bads, long) + p++ + l++ + case long < para: + l++ + case long > para: + p++ + } + } + + for _, bad := range bads { + t.assertLinef(bad, "long line in paragraph") + } + return nil + } +} + +func configSection(t *T, root ast.Node) error { + var config *ast.Heading + config = nil + expectedTitle := "Configuration" + for n := root.FirstChild(); n != nil; n = n.NextSibling() { + var h *ast.Heading + var ok bool + if h, ok = n.(*ast.Heading); !ok { + continue + } + + title := string(h.FirstChild().Text(t.markdown)) + if title == expectedTitle { + config = h + continue + } + } + + if config == nil { + t.assertf("missing section '%s'", expectedTitle) + return nil + } + + toml := config.NextSibling() + if toml == nil { + t.assertNodef(toml, "missing config next sibling") + return nil + } + + var b *ast.FencedCodeBlock + var ok bool + if b, ok = toml.(*ast.FencedCodeBlock); !ok { + t.assertNodef(toml, "config next sibling isn't a fenced code block") + return nil + } + + if !bytes.Equal(b.Language(t.markdown), []byte("toml")) { + t.assertNodef(b, "config fenced code block isn't toml language") + return nil + } + + return nil +} + +// Links from one markdown file to another in the repo should be relative +func relativeTelegrafLinks(t *T, root ast.Node) error { + for n := root.FirstChild(); n != nil; n = n.NextSibling() { + if _, ok := n.(*ast.Paragraph); !ok { + continue + } + + for n2 := n.FirstChild(); n2 != nil; n2 = n2.NextSibling() { + var l *ast.Link + var ok bool + if l, ok = n2.(*ast.Link); !ok { + continue + } + link := string(l.Destination) + if strings.HasPrefix(link, "https://github.com/influxdata/telegraf/blob") { + t.assertNodef(n, "in-repo link must be relative: %s", link) + } + } + } + return nil +} + +// To do: Check markdown files that aren't plugin readme files for paragraphs +// with long lines + +// To do: Check the toml inside the configuration section for syntax errors diff --git a/tools/readme_linter/set.go b/tools/readme_linter/set.go new file mode 100644 index 000000000..b5ee8f711 --- /dev/null +++ b/tools/readme_linter/set.go @@ -0,0 +1,32 @@ +package main + +type set struct { + m map[string]struct{} +} + +func (s *set) add(key string) { + s.m[key] = struct{}{} +} + +func (s *set) has(key string) bool { + var ok bool + _, ok = s.m[key] + return ok +} + +func (s *set) forEach(f func(string)) { + for key := range s.m { + f(key) + } +} + +func newSet(elems []string) *set { + s := &set{ + m: make(map[string]struct{}), + } + + for _, elem := range elems { + s.add(elem) + } + return s +}