chore: add readme linter (#10916)

2022-04-19 11:59:25 -06:00 · 2022-04-19 11:59:25 -06:00 · 956264076d
parent 368b448f2b
commit 956264076d
6 changed files with 562 additions and 0 deletions
--- a/tools/readme_linter/README.md
+++ b/tools/readme_linter/README.md
@ -0,0 +1,29 @@
+# README.md linter
+
+## Building
+
+```shell
+telegraf/tools/readme_linter$ go build .
+```
+
+## Running
+
+Run readme_linter with the filenames of the readme files you want to lint.
+
+```shell
+telegraf/tools/readme_linter$ ./readme_linter <path to readme>
+```
+
+You can lint multiple filenames at once. This works well with shell globs.
+
+To lint all the plugin readmes:
+
+```shell
+telegraf/tools/readme_linter$ ./readme_linter ../../plugins/*/*/README.md
+```
+
+To lint readmes for inputs starting a-d:
+
+```shell
+telegraf/tools/readme_linter$ ./readme_linter ../../plugins/inputs/[a-d]*/README.md
+```
--- a/tools/readme_linter/assert.go
+++ b/tools/readme_linter/assert.go
@ -0,0 +1,142 @@
+package main
+
+import (
+	"fmt"
+	"regexp"
+	"runtime"
+	"sort"
+
+	"github.com/yuin/goldmark/ast"
+)
+
+//type for all linter assert methods
+type T struct {
+	filename       string
+	markdown       []byte
+	newlineOffsets []int
+	sourceFlag     bool
+
+	fails int
+}
+
+// called by all assert functions that involve a node
+func (t *T) printFailedAssertf(n ast.Node, format string, args ...interface{}) {
+	t.printFile(n)
+	fmt.Printf(format+"\n", args...)
+	t.printRule(3)
+	t.fails++
+}
+
+// Assert function that doesnt involve a node, for example if something is missing
+func (t *T) assertf(format string, args ...interface{}) {
+	t.assertLine2f(0, format, args...) // There's no line number associated, so use the first
+}
+
+func (t *T) assertNodef(n ast.Node, format string, args ...interface{}) {
+	t.printFailedAssertf(n, format, args...)
+}
+
+func (t *T) assertLinef(line int, format string, args ...interface{}) {
+	//this func only exists to make the call stack to t.printRule the same depth
+	//as when called through assertf
+
+	t.assertLine2f(line, format, args...)
+}
+
+func (t *T) assertLine2f(line int, format string, args ...interface{}) {
+	t.printFileLine(line)
+	fmt.Printf(format+"\n", args...)
+	t.printRule(3)
+	t.fails++
+}
+
+func (t *T) printRule(callers int) {
+	if !t.sourceFlag {
+		return
+	}
+
+	pc, codeFilename, codeLine, ok := runtime.Caller(callers)
+	if !ok {
+		panic("can not get caller")
+	}
+
+	f := runtime.FuncForPC(pc)
+	var funcName string
+	if f != nil {
+		funcName = f.Name()
+	}
+
+	fmt.Printf("%s:%d: ", codeFilename, codeLine)
+	if len(funcName) == 0 {
+		fmt.Printf("failed assert\n")
+	} else {
+		fmt.Printf("failed assert in function %s\n", funcName)
+	}
+}
+
+func (t *T) line(offset int) int {
+	return sort.SearchInts(t.newlineOffsets, offset)
+}
+
+func (t *T) printFile(n ast.Node) {
+	lines := n.Lines()
+	if lines == nil || lines.Len() == 0 {
+		t.printFileLine(0)
+		return
+	}
+	offset := lines.At(0).Start
+	line := t.line(offset)
+	t.printFileLine(line)
+}
+
+func (t *T) printFileLine(line int) {
+	fmt.Printf("%s:%d: ", t.filename, line+1) // Lines start with 1
+}
+
+func (t *T) printPassFail() {
+	if t.fails == 0 {
+		fmt.Printf("Pass %s\n", t.filename)
+	} else {
+		fmt.Printf("Fail %s, %d failed assertions\n", t.filename, t.fails)
+	}
+}
+
+func (t *T) assertKind(expected ast.NodeKind, n ast.Node) {
+	if n.Kind() == expected {
+		return
+	}
+
+	t.printFailedAssertf(n, "expected %s, have %s", expected.String(), n.Kind().String())
+}
+
+func (t *T) assertFirstChildRegexp(expectedPattern string, n ast.Node) {
+	var validRegexp = regexp.MustCompile(expectedPattern)
+
+	if !n.HasChildren() {
+		t.printFailedAssertf(n, "expected children")
+		return
+	}
+	c := n.FirstChild()
+
+	actual := string(c.Text(t.markdown))
+
+	if !validRegexp.MatchString(actual) {
+		t.printFailedAssertf(n, "'%s' does not match regexp '%s'", actual, expectedPattern)
+		return
+	}
+}
+
+func (t *T) assertHeadingLevel(expected int, n ast.Node) {
+	h, ok := n.(*ast.Heading)
+	if !ok {
+		fmt.Printf("failed Heading type assertion\n")
+		t.fails++
+		return
+	}
+
+	if h.Level == expected {
+		return
+	}
+
+	t.printFailedAssertf(n, "expected header level %d, have %d", expected, h.Level)
+}
--- a/tools/readme_linter/main.go
+++ b/tools/readme_linter/main.go
@ -0,0 +1,116 @@
+package main
+
+import (
+	"bufio"
+	"bytes"
+	"flag"
+	"os"
+
+	"github.com/yuin/goldmark"
+	"github.com/yuin/goldmark/ast"
+	"github.com/yuin/goldmark/extension"
+	"github.com/yuin/goldmark/parser"
+	"github.com/yuin/goldmark/text"
+	"github.com/yuin/goldmark/util"
+)
+
+func main() {
+	sourceFlag := flag.Bool("source", false, "include location of linter code that failed assertion")
+
+	flag.Parse()
+
+	var err error
+	for _, filename := range flag.Args() {
+		err = checkFile(filename, guessPluginType(filename), *sourceFlag)
+		if err != nil {
+			panic(err)
+		}
+	}
+}
+
+type ruleFunc func(*T, ast.Node) error
+
+type rulesMap map[plugin][]ruleFunc
+
+var rules rulesMap
+
+func init() {
+	rules = make(rulesMap)
+
+	//rules for all plugin types
+	all := []ruleFunc{
+		firstSection,
+		noLongLinesInParagraphs(80),
+		configSection,
+		relativeTelegrafLinks,
+	}
+	for i := pluginInput; i <= pluginParser; i++ {
+		rules[i] = all
+	}
+
+	inputRules := []ruleFunc{
+		requiredSectionsClose([]string{
+			"Example Output",
+			"Metrics",
+		}),
+	}
+	rules[pluginInput] = append(rules[pluginInput], inputRules...)
+}
+
+func checkFile(filename string, pluginType plugin, sourceFlag bool) error {
+	md, err := os.ReadFile(filename)
+	if err != nil {
+		return err
+	}
+
+	// Goldmark returns locations as offsets. We want line
+	// numbers. Find the newlines in the file so we can translate
+	// later.
+	scanner := bufio.NewScanner(bytes.NewReader(md))
+	scanner.Split(bufio.ScanRunes)
+	offset := 0
+	newlineOffsets := []int{}
+	for scanner.Scan() {
+		if scanner.Text() == "\n" {
+			newlineOffsets = append(newlineOffsets, offset)
+		}
+
+		offset++
+	}
+
+	p := goldmark.DefaultParser()
+
+	// We need goldmark to parse tables, otherwise they show up as
+	// paragraphs. Since tables often have long lines and we check for long
+	// lines in paragraphs, without table parsing there are false positive long
+	// lines in tables.
+	//
+	// The tableParagraphTransformer is an extension and not part of the default
+	// parser so we add it. There may be an easier way to do it, but this works:
+	p.AddOptions(
+		parser.WithParagraphTransformers(
+			util.Prioritized(extension.NewTableParagraphTransformer(), 99),
+		),
+	)
+
+	r := text.NewReader(md)
+	root := p.Parse(r)
+
+	rules := rules[pluginType]
+
+	tester := T{
+		filename:       filename,
+		markdown:       md,
+		newlineOffsets: newlineOffsets,
+		sourceFlag:     sourceFlag,
+	}
+	for _, rule := range rules {
+		err = rule(&tester, root)
+		if err != nil {
+			return err
+		}
+	}
+	tester.printPassFail()
+
+	return nil
+}
--- a/tools/readme_linter/plugin.go
+++ b/tools/readme_linter/plugin.go
@ -0,0 +1,33 @@
+package main
+
+import (
+	"strings"
+)
+
+type plugin int
+
+const (
+	pluginNone plugin = iota
+	pluginInput
+	pluginOutput
+	pluginProcessor
+	pluginAggregator
+	pluginParser
+)
+
+func guessPluginType(filename string) plugin {
+	switch {
+	case strings.Contains(filename, "plugins/inputs/"):
+		return pluginInput
+	case strings.Contains(filename, "plugins/outputs/"):
+		return pluginOutput
+	case strings.Contains(filename, "plugins/processors/"):
+		return pluginProcessor
+	case strings.Contains(filename, "plugins/aggregators/"):
+		return pluginAggregator
+	case strings.Contains(filename, "plugins/parsers/"):
+		return pluginParser
+	default:
+		return pluginNone
+	}
+}
--- a/tools/readme_linter/rules.go
+++ b/tools/readme_linter/rules.go
@ -0,0 +1,210 @@
+package main
+
+import (
+	"bytes"
+	"strings"
+
+	"github.com/yuin/goldmark/ast"
+)
+
+// The first section is a heading with plugin name and paragraph short
+// description
+func firstSection(t *T, root ast.Node) error {
+	var n ast.Node
+	n = root.FirstChild()
+
+	t.assertKind(ast.KindHeading, n)
+	t.assertHeadingLevel(1, n)
+	t.assertFirstChildRegexp(` Plugin$`, n)
+
+	// Make sure there is some text after the heading
+	n = n.NextSibling()
+	t.assertKind(ast.KindParagraph, n)
+	length := len(n.Text(t.markdown))
+	min := 30
+	if length < min {
+		t.assertNodef(n, "short first section. Please add short description of plugin. length %d, minimum %d", length, min)
+	}
+
+	return nil
+}
+
+// Somewhere there should be a heading "sample configuration" and a
+// toml code block. The toml should match what is in the plugin's go
+// code
+
+// Second level headings should include
+func requiredSections(t *T, root ast.Node, headings []string) error {
+	headingsSet := newSet(headings)
+
+	expectedLevel := 2
+
+	titleCounts := make(map[string]int)
+
+	for n := root.FirstChild(); n != nil; n = n.NextSibling() {
+		var h *ast.Heading
+		var ok bool
+		if h, ok = n.(*ast.Heading); !ok {
+			continue
+		}
+
+		child := h.FirstChild()
+		if child == nil {
+			continue
+		}
+		title := string(child.Text(t.markdown))
+		if headingsSet.has(title) && h.Level != expectedLevel {
+			t.assertNodef(n, "has required section '%s' but wrong heading level. Expected level %d, found %d",
+				title, expectedLevel, h.Level)
+		}
+
+		titleCounts[title]++
+	}
+
+	headingsSet.forEach(func(title string) {
+		if _, exists := titleCounts[title]; !exists {
+			t.assertf("missing required section '%s'", title)
+		}
+	})
+
+	return nil
+}
+
+// Use this to make a rule that looks for a list of settings. (this is
+// a closure of func requiredSection)
+func requiredSectionsClose(headings []string) func(*T, ast.Node) error {
+	return func(t *T, root ast.Node) error {
+		return requiredSections(t, root, headings)
+	}
+}
+
+func noLongLinesInParagraphs(threshold int) func(*T, ast.Node) error {
+	return func(t *T, root ast.Node) error {
+		// We're looking for long lines in paragraphs. Find paragraphs
+		// first, then which lines are in paragraphs
+		paraLines := []int{}
+		for n := root.FirstChild(); n != nil; n = n.NextSibling() {
+			var p *ast.Paragraph
+			var ok bool
+			if p, ok = n.(*ast.Paragraph); !ok {
+				continue //only looking for paragraphs
+			}
+
+			segs := p.Lines()
+			for _, seg := range segs.Sliced(0, segs.Len()) {
+				line := t.line(seg.Start)
+				paraLines = append(paraLines, line)
+				// t.printFileLine(line)
+				// fmt.Printf("paragraph line\n")
+			}
+		}
+
+		// Find long lines in the whole file
+		longLines := []int{}
+		last := 0
+		for i, cur := range t.newlineOffsets {
+			length := cur - last - 1 // -1 to exclude the newline
+			if length > threshold {
+				longLines = append(longLines, i)
+				// t.printFileLine(i)
+				// fmt.Printf("long line\n")
+			}
+			last = cur
+		}
+
+		// Merge both lists
+		p := 0
+		l := 0
+		bads := []int{}
+		for p < len(paraLines) && l < len(longLines) {
+			long := longLines[l]
+			para := paraLines[p]
+			switch {
+			case long == para:
+				bads = append(bads, long)
+				p++
+				l++
+			case long < para:
+				l++
+			case long > para:
+				p++
+			}
+		}
+
+		for _, bad := range bads {
+			t.assertLinef(bad, "long line in paragraph")
+		}
+		return nil
+	}
+}
+
+func configSection(t *T, root ast.Node) error {
+	var config *ast.Heading
+	config = nil
+	expectedTitle := "Configuration"
+	for n := root.FirstChild(); n != nil; n = n.NextSibling() {
+		var h *ast.Heading
+		var ok bool
+		if h, ok = n.(*ast.Heading); !ok {
+			continue
+		}
+
+		title := string(h.FirstChild().Text(t.markdown))
+		if title == expectedTitle {
+			config = h
+			continue
+		}
+	}
+
+	if config == nil {
+		t.assertf("missing section '%s'", expectedTitle)
+		return nil
+	}
+
+	toml := config.NextSibling()
+	if toml == nil {
+		t.assertNodef(toml, "missing config next sibling")
+		return nil
+	}
+
+	var b *ast.FencedCodeBlock
+	var ok bool
+	if b, ok = toml.(*ast.FencedCodeBlock); !ok {
+		t.assertNodef(toml, "config next sibling isn't a fenced code block")
+		return nil
+	}
+
+	if !bytes.Equal(b.Language(t.markdown), []byte("toml")) {
+		t.assertNodef(b, "config fenced code block isn't toml language")
+		return nil
+	}
+
+	return nil
+}
+
+// Links from one markdown file to another in the repo should be relative
+func relativeTelegrafLinks(t *T, root ast.Node) error {
+	for n := root.FirstChild(); n != nil; n = n.NextSibling() {
+		if _, ok := n.(*ast.Paragraph); !ok {
+			continue
+		}
+
+		for n2 := n.FirstChild(); n2 != nil; n2 = n2.NextSibling() {
+			var l *ast.Link
+			var ok bool
+			if l, ok = n2.(*ast.Link); !ok {
+				continue
+			}
+			link := string(l.Destination)
+			if strings.HasPrefix(link, "https://github.com/influxdata/telegraf/blob") {
+				t.assertNodef(n, "in-repo link must be relative: %s", link)
+			}
+		}
+	}
+	return nil
+}
+
+// To do: Check markdown files that aren't plugin readme files for paragraphs
+// with long lines
+
+// To do: Check the toml inside the configuration section for syntax errors
--- a/tools/readme_linter/set.go
+++ b/tools/readme_linter/set.go
@ -0,0 +1,32 @@
+package main
+
+type set struct {
+	m map[string]struct{}
+}
+
+func (s *set) add(key string) {
+	s.m[key] = struct{}{}
+}
+
+func (s *set) has(key string) bool {
+	var ok bool
+	_, ok = s.m[key]
+	return ok
+}
+
+func (s *set) forEach(f func(string)) {
+	for key := range s.m {
+		f(key)
+	}
+}
+
+func newSet(elems []string) *set {
+	s := &set{
+		m: make(map[string]struct{}),
+	}
+
+	for _, elem := range elems {
+		s.add(elem)
+	}
+	return s
+}