chore: add readme linter (#10916)

This commit is contained in:
reimda 2022-04-19 11:59:25 -06:00 committed by GitHub
parent 368b448f2b
commit 956264076d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 562 additions and 0 deletions

View File

@ -0,0 +1,29 @@
# README.md linter
## Building
```shell
telegraf/tools/readme_linter$ go build .
```
## Running
Run readme_linter with the filenames of the readme files you want to lint.
```shell
telegraf/tools/readme_linter$ ./readme_linter <path to readme>
```
You can lint multiple filenames at once. This works well with shell globs.
To lint all the plugin readmes:
```shell
telegraf/tools/readme_linter$ ./readme_linter ../../plugins/*/*/README.md
```
To lint readmes for inputs starting a-d:
```shell
telegraf/tools/readme_linter$ ./readme_linter ../../plugins/inputs/[a-d]*/README.md
```

View File

@ -0,0 +1,142 @@
package main
import (
"fmt"
"regexp"
"runtime"
"sort"
"github.com/yuin/goldmark/ast"
)
//type for all linter assert methods
type T struct {
filename string
markdown []byte
newlineOffsets []int
sourceFlag bool
fails int
}
// called by all assert functions that involve a node
func (t *T) printFailedAssertf(n ast.Node, format string, args ...interface{}) {
t.printFile(n)
fmt.Printf(format+"\n", args...)
t.printRule(3)
t.fails++
}
// Assert function that doesnt involve a node, for example if something is missing
func (t *T) assertf(format string, args ...interface{}) {
t.assertLine2f(0, format, args...) // There's no line number associated, so use the first
}
func (t *T) assertNodef(n ast.Node, format string, args ...interface{}) {
t.printFailedAssertf(n, format, args...)
}
func (t *T) assertLinef(line int, format string, args ...interface{}) {
//this func only exists to make the call stack to t.printRule the same depth
//as when called through assertf
t.assertLine2f(line, format, args...)
}
func (t *T) assertLine2f(line int, format string, args ...interface{}) {
t.printFileLine(line)
fmt.Printf(format+"\n", args...)
t.printRule(3)
t.fails++
}
func (t *T) printRule(callers int) {
if !t.sourceFlag {
return
}
pc, codeFilename, codeLine, ok := runtime.Caller(callers)
if !ok {
panic("can not get caller")
}
f := runtime.FuncForPC(pc)
var funcName string
if f != nil {
funcName = f.Name()
}
fmt.Printf("%s:%d: ", codeFilename, codeLine)
if len(funcName) == 0 {
fmt.Printf("failed assert\n")
} else {
fmt.Printf("failed assert in function %s\n", funcName)
}
}
func (t *T) line(offset int) int {
return sort.SearchInts(t.newlineOffsets, offset)
}
func (t *T) printFile(n ast.Node) {
lines := n.Lines()
if lines == nil || lines.Len() == 0 {
t.printFileLine(0)
return
}
offset := lines.At(0).Start
line := t.line(offset)
t.printFileLine(line)
}
func (t *T) printFileLine(line int) {
fmt.Printf("%s:%d: ", t.filename, line+1) // Lines start with 1
}
func (t *T) printPassFail() {
if t.fails == 0 {
fmt.Printf("Pass %s\n", t.filename)
} else {
fmt.Printf("Fail %s, %d failed assertions\n", t.filename, t.fails)
}
}
func (t *T) assertKind(expected ast.NodeKind, n ast.Node) {
if n.Kind() == expected {
return
}
t.printFailedAssertf(n, "expected %s, have %s", expected.String(), n.Kind().String())
}
func (t *T) assertFirstChildRegexp(expectedPattern string, n ast.Node) {
var validRegexp = regexp.MustCompile(expectedPattern)
if !n.HasChildren() {
t.printFailedAssertf(n, "expected children")
return
}
c := n.FirstChild()
actual := string(c.Text(t.markdown))
if !validRegexp.MatchString(actual) {
t.printFailedAssertf(n, "'%s' does not match regexp '%s'", actual, expectedPattern)
return
}
}
func (t *T) assertHeadingLevel(expected int, n ast.Node) {
h, ok := n.(*ast.Heading)
if !ok {
fmt.Printf("failed Heading type assertion\n")
t.fails++
return
}
if h.Level == expected {
return
}
t.printFailedAssertf(n, "expected header level %d, have %d", expected, h.Level)
}

116
tools/readme_linter/main.go Normal file
View File

@ -0,0 +1,116 @@
package main
import (
"bufio"
"bytes"
"flag"
"os"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
)
func main() {
sourceFlag := flag.Bool("source", false, "include location of linter code that failed assertion")
flag.Parse()
var err error
for _, filename := range flag.Args() {
err = checkFile(filename, guessPluginType(filename), *sourceFlag)
if err != nil {
panic(err)
}
}
}
type ruleFunc func(*T, ast.Node) error
type rulesMap map[plugin][]ruleFunc
var rules rulesMap
func init() {
rules = make(rulesMap)
//rules for all plugin types
all := []ruleFunc{
firstSection,
noLongLinesInParagraphs(80),
configSection,
relativeTelegrafLinks,
}
for i := pluginInput; i <= pluginParser; i++ {
rules[i] = all
}
inputRules := []ruleFunc{
requiredSectionsClose([]string{
"Example Output",
"Metrics",
}),
}
rules[pluginInput] = append(rules[pluginInput], inputRules...)
}
func checkFile(filename string, pluginType plugin, sourceFlag bool) error {
md, err := os.ReadFile(filename)
if err != nil {
return err
}
// Goldmark returns locations as offsets. We want line
// numbers. Find the newlines in the file so we can translate
// later.
scanner := bufio.NewScanner(bytes.NewReader(md))
scanner.Split(bufio.ScanRunes)
offset := 0
newlineOffsets := []int{}
for scanner.Scan() {
if scanner.Text() == "\n" {
newlineOffsets = append(newlineOffsets, offset)
}
offset++
}
p := goldmark.DefaultParser()
// We need goldmark to parse tables, otherwise they show up as
// paragraphs. Since tables often have long lines and we check for long
// lines in paragraphs, without table parsing there are false positive long
// lines in tables.
//
// The tableParagraphTransformer is an extension and not part of the default
// parser so we add it. There may be an easier way to do it, but this works:
p.AddOptions(
parser.WithParagraphTransformers(
util.Prioritized(extension.NewTableParagraphTransformer(), 99),
),
)
r := text.NewReader(md)
root := p.Parse(r)
rules := rules[pluginType]
tester := T{
filename: filename,
markdown: md,
newlineOffsets: newlineOffsets,
sourceFlag: sourceFlag,
}
for _, rule := range rules {
err = rule(&tester, root)
if err != nil {
return err
}
}
tester.printPassFail()
return nil
}

View File

@ -0,0 +1,33 @@
package main
import (
"strings"
)
type plugin int
const (
pluginNone plugin = iota
pluginInput
pluginOutput
pluginProcessor
pluginAggregator
pluginParser
)
func guessPluginType(filename string) plugin {
switch {
case strings.Contains(filename, "plugins/inputs/"):
return pluginInput
case strings.Contains(filename, "plugins/outputs/"):
return pluginOutput
case strings.Contains(filename, "plugins/processors/"):
return pluginProcessor
case strings.Contains(filename, "plugins/aggregators/"):
return pluginAggregator
case strings.Contains(filename, "plugins/parsers/"):
return pluginParser
default:
return pluginNone
}
}

View File

@ -0,0 +1,210 @@
package main
import (
"bytes"
"strings"
"github.com/yuin/goldmark/ast"
)
// The first section is a heading with plugin name and paragraph short
// description
func firstSection(t *T, root ast.Node) error {
var n ast.Node
n = root.FirstChild()
t.assertKind(ast.KindHeading, n)
t.assertHeadingLevel(1, n)
t.assertFirstChildRegexp(` Plugin$`, n)
// Make sure there is some text after the heading
n = n.NextSibling()
t.assertKind(ast.KindParagraph, n)
length := len(n.Text(t.markdown))
min := 30
if length < min {
t.assertNodef(n, "short first section. Please add short description of plugin. length %d, minimum %d", length, min)
}
return nil
}
// Somewhere there should be a heading "sample configuration" and a
// toml code block. The toml should match what is in the plugin's go
// code
// Second level headings should include
func requiredSections(t *T, root ast.Node, headings []string) error {
headingsSet := newSet(headings)
expectedLevel := 2
titleCounts := make(map[string]int)
for n := root.FirstChild(); n != nil; n = n.NextSibling() {
var h *ast.Heading
var ok bool
if h, ok = n.(*ast.Heading); !ok {
continue
}
child := h.FirstChild()
if child == nil {
continue
}
title := string(child.Text(t.markdown))
if headingsSet.has(title) && h.Level != expectedLevel {
t.assertNodef(n, "has required section '%s' but wrong heading level. Expected level %d, found %d",
title, expectedLevel, h.Level)
}
titleCounts[title]++
}
headingsSet.forEach(func(title string) {
if _, exists := titleCounts[title]; !exists {
t.assertf("missing required section '%s'", title)
}
})
return nil
}
// Use this to make a rule that looks for a list of settings. (this is
// a closure of func requiredSection)
func requiredSectionsClose(headings []string) func(*T, ast.Node) error {
return func(t *T, root ast.Node) error {
return requiredSections(t, root, headings)
}
}
func noLongLinesInParagraphs(threshold int) func(*T, ast.Node) error {
return func(t *T, root ast.Node) error {
// We're looking for long lines in paragraphs. Find paragraphs
// first, then which lines are in paragraphs
paraLines := []int{}
for n := root.FirstChild(); n != nil; n = n.NextSibling() {
var p *ast.Paragraph
var ok bool
if p, ok = n.(*ast.Paragraph); !ok {
continue //only looking for paragraphs
}
segs := p.Lines()
for _, seg := range segs.Sliced(0, segs.Len()) {
line := t.line(seg.Start)
paraLines = append(paraLines, line)
// t.printFileLine(line)
// fmt.Printf("paragraph line\n")
}
}
// Find long lines in the whole file
longLines := []int{}
last := 0
for i, cur := range t.newlineOffsets {
length := cur - last - 1 // -1 to exclude the newline
if length > threshold {
longLines = append(longLines, i)
// t.printFileLine(i)
// fmt.Printf("long line\n")
}
last = cur
}
// Merge both lists
p := 0
l := 0
bads := []int{}
for p < len(paraLines) && l < len(longLines) {
long := longLines[l]
para := paraLines[p]
switch {
case long == para:
bads = append(bads, long)
p++
l++
case long < para:
l++
case long > para:
p++
}
}
for _, bad := range bads {
t.assertLinef(bad, "long line in paragraph")
}
return nil
}
}
func configSection(t *T, root ast.Node) error {
var config *ast.Heading
config = nil
expectedTitle := "Configuration"
for n := root.FirstChild(); n != nil; n = n.NextSibling() {
var h *ast.Heading
var ok bool
if h, ok = n.(*ast.Heading); !ok {
continue
}
title := string(h.FirstChild().Text(t.markdown))
if title == expectedTitle {
config = h
continue
}
}
if config == nil {
t.assertf("missing section '%s'", expectedTitle)
return nil
}
toml := config.NextSibling()
if toml == nil {
t.assertNodef(toml, "missing config next sibling")
return nil
}
var b *ast.FencedCodeBlock
var ok bool
if b, ok = toml.(*ast.FencedCodeBlock); !ok {
t.assertNodef(toml, "config next sibling isn't a fenced code block")
return nil
}
if !bytes.Equal(b.Language(t.markdown), []byte("toml")) {
t.assertNodef(b, "config fenced code block isn't toml language")
return nil
}
return nil
}
// Links from one markdown file to another in the repo should be relative
func relativeTelegrafLinks(t *T, root ast.Node) error {
for n := root.FirstChild(); n != nil; n = n.NextSibling() {
if _, ok := n.(*ast.Paragraph); !ok {
continue
}
for n2 := n.FirstChild(); n2 != nil; n2 = n2.NextSibling() {
var l *ast.Link
var ok bool
if l, ok = n2.(*ast.Link); !ok {
continue
}
link := string(l.Destination)
if strings.HasPrefix(link, "https://github.com/influxdata/telegraf/blob") {
t.assertNodef(n, "in-repo link must be relative: %s", link)
}
}
}
return nil
}
// To do: Check markdown files that aren't plugin readme files for paragraphs
// with long lines
// To do: Check the toml inside the configuration section for syntax errors

View File

@ -0,0 +1,32 @@
package main
type set struct {
m map[string]struct{}
}
func (s *set) add(key string) {
s.m[key] = struct{}{}
}
func (s *set) has(key string) bool {
var ok bool
_, ok = s.m[key]
return ok
}
func (s *set) forEach(f func(string)) {
for key := range s.m {
f(key)
}
}
func newSet(elems []string) *set {
s := &set{
m: make(map[string]struct{}),
}
for _, elem := range elems {
s.add(elem)
}
return s
}