chore: add readme linter (#10916)

2022-04-19 11:59:25 -06:00 · 2022-04-19 11:59:25 -06:00 · 956264076d
parent 368b448f2b
commit 956264076d
6 changed files with 562 additions and 0 deletions
--- a/tools/readme_linter/README.md
+++ b/tools/readme_linter/README.md
@ -0,0 +1,29 @@
 # README.md linter
 ## Building
 ```shell
 telegraf/tools/readme_linter$ go build .
 ```
 ## Running
 Run readme_linter with the filenames of the readme files you want to lint.
 ```shell
 telegraf/tools/readme_linter$ ./readme_linter <path to readme>
 ```
 You can lint multiple filenames at once. This works well with shell globs.
 To lint all the plugin readmes:
 ```shell
 telegraf/tools/readme_linter$ ./readme_linter ../../plugins/*/*/README.md
 ```
 To lint readmes for inputs starting a-d:
 ```shell
 telegraf/tools/readme_linter$ ./readme_linter ../../plugins/inputs/[a-d]*/README.md
 ```
--- a/tools/readme_linter/assert.go
+++ b/tools/readme_linter/assert.go
@ -0,0 +1,142 @@
 package main
 import (
 	"fmt"
 	"regexp"
 	"runtime"
 	"sort"
 	"github.com/yuin/goldmark/ast"
 )
 //type for all linter assert methods
 type T struct {
 	filename       string
 	markdown       []byte
 	newlineOffsets []int
 	sourceFlag     bool
 	fails int
 }
 // called by all assert functions that involve a node
 func (t *T) printFailedAssertf(n ast.Node, format string, args ...interface{}) {
 	t.printFile(n)
 	fmt.Printf(format+"\n", args...)
 	t.printRule(3)
 	t.fails++
 }
 // Assert function that doesnt involve a node, for example if something is missing
 func (t *T) assertf(format string, args ...interface{}) {
 	t.assertLine2f(0, format, args...) // There's no line number associated, so use the first
 }
 func (t *T) assertNodef(n ast.Node, format string, args ...interface{}) {
 	t.printFailedAssertf(n, format, args...)
 }
 func (t *T) assertLinef(line int, format string, args ...interface{}) {
 	//this func only exists to make the call stack to t.printRule the same depth
 	//as when called through assertf
 	t.assertLine2f(line, format, args...)
 }
 func (t *T) assertLine2f(line int, format string, args ...interface{}) {
 	t.printFileLine(line)
 	fmt.Printf(format+"\n", args...)
 	t.printRule(3)
 	t.fails++
 }
 func (t *T) printRule(callers int) {
 	if !t.sourceFlag {
 		return
 	}
 	pc, codeFilename, codeLine, ok := runtime.Caller(callers)
 	if !ok {
 		panic("can not get caller")
 	}
 	f := runtime.FuncForPC(pc)
 	var funcName string
 	if f != nil {
 		funcName = f.Name()
 	}
 	fmt.Printf("%s:%d: ", codeFilename, codeLine)
 	if len(funcName) == 0 {
 		fmt.Printf("failed assert\n")
 	} else {
 		fmt.Printf("failed assert in function %s\n", funcName)
 	}
 }
 func (t *T) line(offset int) int {
 	return sort.SearchInts(t.newlineOffsets, offset)
 }
 func (t *T) printFile(n ast.Node) {
 	lines := n.Lines()
 	if lines == nil || lines.Len() == 0 {
 		t.printFileLine(0)
 		return
 	}
 	offset := lines.At(0).Start
 	line := t.line(offset)
 	t.printFileLine(line)
 }
 func (t *T) printFileLine(line int) {
 	fmt.Printf("%s:%d: ", t.filename, line+1) // Lines start with 1
 }
 func (t *T) printPassFail() {
 	if t.fails == 0 {
 		fmt.Printf("Pass %s\n", t.filename)
 	} else {
 		fmt.Printf("Fail %s, %d failed assertions\n", t.filename, t.fails)
 	}
 }
 func (t *T) assertKind(expected ast.NodeKind, n ast.Node) {
 	if n.Kind() == expected {
 		return
 	}
 	t.printFailedAssertf(n, "expected %s, have %s", expected.String(), n.Kind().String())
 }
 func (t *T) assertFirstChildRegexp(expectedPattern string, n ast.Node) {
 	var validRegexp = regexp.MustCompile(expectedPattern)
 	if !n.HasChildren() {
 		t.printFailedAssertf(n, "expected children")
 		return
 	}
 	c := n.FirstChild()
 	actual := string(c.Text(t.markdown))
 	if !validRegexp.MatchString(actual) {
 		t.printFailedAssertf(n, "'%s' does not match regexp '%s'", actual, expectedPattern)
 		return
 	}
 }
 func (t *T) assertHeadingLevel(expected int, n ast.Node) {
 	h, ok := n.(*ast.Heading)
 	if !ok {
 		fmt.Printf("failed Heading type assertion\n")
 		t.fails++
 		return
 	}
 	if h.Level == expected {
 		return
 	}
 	t.printFailedAssertf(n, "expected header level %d, have %d", expected, h.Level)
 }
--- a/tools/readme_linter/main.go
+++ b/tools/readme_linter/main.go
@ -0,0 +1,116 @@
 package main
 import (
 	"bufio"
 	"bytes"
 	"flag"
 	"os"
 	"github.com/yuin/goldmark"
 	"github.com/yuin/goldmark/ast"
 	"github.com/yuin/goldmark/extension"
 	"github.com/yuin/goldmark/parser"
 	"github.com/yuin/goldmark/text"
 	"github.com/yuin/goldmark/util"
 )
 func main() {
 	sourceFlag := flag.Bool("source", false, "include location of linter code that failed assertion")
 	flag.Parse()
 	var err error
 	for _, filename := range flag.Args() {
 		err = checkFile(filename, guessPluginType(filename), *sourceFlag)
 		if err != nil {
 			panic(err)
 		}
 	}
 }
 type ruleFunc func(*T, ast.Node) error
 type rulesMap map[plugin][]ruleFunc
 var rules rulesMap
 func init() {
 	rules = make(rulesMap)
 	//rules for all plugin types
 	all := []ruleFunc{
 		firstSection,
 		noLongLinesInParagraphs(80),
 		configSection,
 		relativeTelegrafLinks,
 	}
 	for i := pluginInput; i <= pluginParser; i++ {
 		rules[i] = all
 	}
 	inputRules := []ruleFunc{
 		requiredSectionsClose([]string{
 			"Example Output",
 			"Metrics",
 		}),
 	}
 	rules[pluginInput] = append(rules[pluginInput], inputRules...)
 }
 func checkFile(filename string, pluginType plugin, sourceFlag bool) error {
 	md, err := os.ReadFile(filename)
 	if err != nil {
 		return err
 	}
 	// Goldmark returns locations as offsets. We want line
 	// numbers. Find the newlines in the file so we can translate
 	// later.
 	scanner := bufio.NewScanner(bytes.NewReader(md))
 	scanner.Split(bufio.ScanRunes)
 	offset := 0
 	newlineOffsets := []int{}
 	for scanner.Scan() {
 		if scanner.Text() == "\n" {
 			newlineOffsets = append(newlineOffsets, offset)
 		}
 		offset++
 	}
 	p := goldmark.DefaultParser()
 	// We need goldmark to parse tables, otherwise they show up as
 	// paragraphs. Since tables often have long lines and we check for long
 	// lines in paragraphs, without table parsing there are false positive long
 	// lines in tables.
 	//
 	// The tableParagraphTransformer is an extension and not part of the default
 	// parser so we add it. There may be an easier way to do it, but this works:
 	p.AddOptions(
 		parser.WithParagraphTransformers(
 			util.Prioritized(extension.NewTableParagraphTransformer(), 99),
 		),
 	)
 	r := text.NewReader(md)
 	root := p.Parse(r)
 	rules := rules[pluginType]
 	tester := T{
 		filename:       filename,
 		markdown:       md,
 		newlineOffsets: newlineOffsets,
 		sourceFlag:     sourceFlag,
 	}
 	for _, rule := range rules {
 		err = rule(&tester, root)
 		if err != nil {
 			return err
 		}
 	}
 	tester.printPassFail()
 	return nil
 }
--- a/tools/readme_linter/plugin.go
+++ b/tools/readme_linter/plugin.go
@ -0,0 +1,33 @@
 package main
 import (
 	"strings"
 )
 type plugin int
 const (
 	pluginNone plugin = iota
 	pluginInput
 	pluginOutput
 	pluginProcessor
 	pluginAggregator
 	pluginParser
 )
 func guessPluginType(filename string) plugin {
 	switch {
 	case strings.Contains(filename, "plugins/inputs/"):
 		return pluginInput
 	case strings.Contains(filename, "plugins/outputs/"):
 		return pluginOutput
 	case strings.Contains(filename, "plugins/processors/"):
 		return pluginProcessor
 	case strings.Contains(filename, "plugins/aggregators/"):
 		return pluginAggregator
 	case strings.Contains(filename, "plugins/parsers/"):
 		return pluginParser
 	default:
 		return pluginNone
 	}
 }
--- a/tools/readme_linter/rules.go
+++ b/tools/readme_linter/rules.go
@ -0,0 +1,210 @@
 package main
 import (
 	"bytes"
 	"strings"
 	"github.com/yuin/goldmark/ast"
 )
 // The first section is a heading with plugin name and paragraph short
 // description
 func firstSection(t *T, root ast.Node) error {
 	var n ast.Node
 	n = root.FirstChild()
 	t.assertKind(ast.KindHeading, n)
 	t.assertHeadingLevel(1, n)
 	t.assertFirstChildRegexp(` Plugin$`, n)
 	// Make sure there is some text after the heading
 	n = n.NextSibling()
 	t.assertKind(ast.KindParagraph, n)
 	length := len(n.Text(t.markdown))
 	min := 30
 	if length < min {
 		t.assertNodef(n, "short first section. Please add short description of plugin. length %d, minimum %d", length, min)
 	}
 	return nil
 }
 // Somewhere there should be a heading "sample configuration" and a
 // toml code block. The toml should match what is in the plugin's go
 // code
 // Second level headings should include
 func requiredSections(t *T, root ast.Node, headings []string) error {
 	headingsSet := newSet(headings)
 	expectedLevel := 2
 	titleCounts := make(map[string]int)
 	for n := root.FirstChild(); n != nil; n = n.NextSibling() {
 		var h *ast.Heading
 		var ok bool
 		if h, ok = n.(*ast.Heading); !ok {
 			continue
 		}
 		child := h.FirstChild()
 		if child == nil {
 			continue
 		}
 		title := string(child.Text(t.markdown))
 		if headingsSet.has(title) && h.Level != expectedLevel {
 			t.assertNodef(n, "has required section '%s' but wrong heading level. Expected level %d, found %d",
 				title, expectedLevel, h.Level)
 		}
 		titleCounts[title]++
 	}
 	headingsSet.forEach(func(title string) {
 		if _, exists := titleCounts[title]; !exists {
 			t.assertf("missing required section '%s'", title)
 		}
 	})
 	return nil
 }
 // Use this to make a rule that looks for a list of settings. (this is
 // a closure of func requiredSection)
 func requiredSectionsClose(headings []string) func(*T, ast.Node) error {
 	return func(t *T, root ast.Node) error {
 		return requiredSections(t, root, headings)
 	}
 }
 func noLongLinesInParagraphs(threshold int) func(*T, ast.Node) error {
 	return func(t *T, root ast.Node) error {
 		// We're looking for long lines in paragraphs. Find paragraphs
 		// first, then which lines are in paragraphs
 		paraLines := []int{}
 		for n := root.FirstChild(); n != nil; n = n.NextSibling() {
 			var p *ast.Paragraph
 			var ok bool
 			if p, ok = n.(*ast.Paragraph); !ok {
 				continue //only looking for paragraphs
 			}
 			segs := p.Lines()
 			for _, seg := range segs.Sliced(0, segs.Len()) {
 				line := t.line(seg.Start)
 				paraLines = append(paraLines, line)
 				// t.printFileLine(line)
 				// fmt.Printf("paragraph line\n")
 			}
 		}
 		// Find long lines in the whole file
 		longLines := []int{}
 		last := 0
 		for i, cur := range t.newlineOffsets {
 			length := cur - last - 1 // -1 to exclude the newline
 			if length > threshold {
 				longLines = append(longLines, i)
 				// t.printFileLine(i)
 				// fmt.Printf("long line\n")
 			}
 			last = cur
 		}
 		// Merge both lists
 		p := 0
 		l := 0
 		bads := []int{}
 		for p < len(paraLines) && l < len(longLines) {
 			long := longLines[l]
 			para := paraLines[p]
 			switch {
 			case long == para:
 				bads = append(bads, long)
 				p++
 				l++
 			case long < para:
 				l++
 			case long > para:
 				p++
 			}
 		}
 		for _, bad := range bads {
 			t.assertLinef(bad, "long line in paragraph")
 		}
 		return nil
 	}
 }
 func configSection(t *T, root ast.Node) error {
 	var config *ast.Heading
 	config = nil
 	expectedTitle := "Configuration"
 	for n := root.FirstChild(); n != nil; n = n.NextSibling() {
 		var h *ast.Heading
 		var ok bool
 		if h, ok = n.(*ast.Heading); !ok {
 			continue
 		}
 		title := string(h.FirstChild().Text(t.markdown))
 		if title == expectedTitle {
 			config = h
 			continue
 		}
 	}
 	if config == nil {
 		t.assertf("missing section '%s'", expectedTitle)
 		return nil
 	}
 	toml := config.NextSibling()
 	if toml == nil {
 		t.assertNodef(toml, "missing config next sibling")
 		return nil
 	}
 	var b *ast.FencedCodeBlock
 	var ok bool
 	if b, ok = toml.(*ast.FencedCodeBlock); !ok {
 		t.assertNodef(toml, "config next sibling isn't a fenced code block")
 		return nil
 	}
 	if !bytes.Equal(b.Language(t.markdown), []byte("toml")) {
 		t.assertNodef(b, "config fenced code block isn't toml language")
 		return nil
 	}
 	return nil
 }
 // Links from one markdown file to another in the repo should be relative
 func relativeTelegrafLinks(t *T, root ast.Node) error {
 	for n := root.FirstChild(); n != nil; n = n.NextSibling() {
 		if _, ok := n.(*ast.Paragraph); !ok {
 			continue
 		}
 		for n2 := n.FirstChild(); n2 != nil; n2 = n2.NextSibling() {
 			var l *ast.Link
 			var ok bool
 			if l, ok = n2.(*ast.Link); !ok {
 				continue
 			}
 			link := string(l.Destination)
 			if strings.HasPrefix(link, "https://github.com/influxdata/telegraf/blob") {
 				t.assertNodef(n, "in-repo link must be relative: %s", link)
 			}
 		}
 	}
 	return nil
 }
 // To do: Check markdown files that aren't plugin readme files for paragraphs
 // with long lines
 // To do: Check the toml inside the configuration section for syntax errors
--- a/tools/readme_linter/set.go
+++ b/tools/readme_linter/set.go
@ -0,0 +1,32 @@
 package main
 type set struct {
 	m map[string]struct{}
 }
 func (s *set) add(key string) {
 	s.m[key] = struct{}{}
 }
 func (s *set) has(key string) bool {
 	var ok bool
 	_, ok = s.m[key]
 	return ok
 }
 func (s *set) forEach(f func(string)) {
 	for key := range s.m {
 		f(key)
 	}
 }
 func newSet(elems []string) *set {
 	s := &set{
 		m: make(map[string]struct{}),
 	}
 	for _, elem := range elems {
 		s.add(elem)
 	}
 	return s
 }