From df6d44e43dda93f8a22a7e0d40f899541e14ef46 Mon Sep 17 00:00:00 2001 From: skartikey <1942366+skartikey@users.noreply.github.com> Date: Wed, 9 Apr 2025 16:08:11 +0100 Subject: [PATCH] feat(inputs.whois): Support IDN domains (#16700) --- .../testcases/invalid_domain/expected.err | 2 ++ .../testcases/invalid_domain/telegraf.conf | 2 +- .../testcases/valid_idn_domain/expected.out | 2 ++ .../valid_idn_domain/input_münchen.de.txt | 7 ++++++ .../input_xn--mnchn-kva.de.txt | 7 ++++++ .../testcases/valid_idn_domain/telegraf.conf | 3 +++ plugins/inputs/whois/whois.go | 23 +++++++++++++++++-- 7 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 plugins/inputs/whois/testcases/invalid_domain/expected.err create mode 100644 plugins/inputs/whois/testcases/valid_idn_domain/expected.out create mode 100644 plugins/inputs/whois/testcases/valid_idn_domain/input_münchen.de.txt create mode 100644 plugins/inputs/whois/testcases/valid_idn_domain/input_xn--mnchn-kva.de.txt create mode 100644 plugins/inputs/whois/testcases/valid_idn_domain/telegraf.conf diff --git a/plugins/inputs/whois/testcases/invalid_domain/expected.err b/plugins/inputs/whois/testcases/invalid_domain/expected.err new file mode 100644 index 000000000..f6abbb9a0 --- /dev/null +++ b/plugins/inputs/whois/testcases/invalid_domain/expected.err @@ -0,0 +1,2 @@ +invalid domain format: "*.example.com" +invalid domain format: "no-tld" \ No newline at end of file diff --git a/plugins/inputs/whois/testcases/invalid_domain/telegraf.conf b/plugins/inputs/whois/testcases/invalid_domain/telegraf.conf index 7f2a7ca46..698d227ec 100644 --- a/plugins/inputs/whois/testcases/invalid_domain/telegraf.conf +++ b/plugins/inputs/whois/testcases/invalid_domain/telegraf.conf @@ -1,3 +1,3 @@ [[inputs.whois]] - domains = ["invalid-domain.xyz"] + domains = ["invalid-domain.xyz", "*.example.com", "no-tld"] timeout = "5s" diff --git a/plugins/inputs/whois/testcases/valid_idn_domain/expected.out b/plugins/inputs/whois/testcases/valid_idn_domain/expected.out new file mode 100644 index 000000000..767c26f75 --- /dev/null +++ b/plugins/inputs/whois/testcases/valid_idn_domain/expected.out @@ -0,0 +1,2 @@ +whois,domain=münchen.de,status=unknown registrant="not set",registrar="DENIC eG (the German domain registry)",name_servers="ns01e.muenchen.de,ns02e.muenchen.de",dnssec_enabled=false,creation_timestamp=808358400i,expiration_timestamp=1912896000i,updated_timestamp=1704067200i,expiry=172283583i +whois,domain=xn--mnchn-kva.de,status=unknown registrant="not set",registrar="DENIC eG",name_servers="ns01e.muenchen.de,ns02e.muenchen.de",dnssec_enabled=false,creation_timestamp=808358400i,expiration_timestamp=1912896000i,updated_timestamp=1704067200i,expiry=172283583i \ No newline at end of file diff --git a/plugins/inputs/whois/testcases/valid_idn_domain/input_münchen.de.txt b/plugins/inputs/whois/testcases/valid_idn_domain/input_münchen.de.txt new file mode 100644 index 000000000..58488c770 --- /dev/null +++ b/plugins/inputs/whois/testcases/valid_idn_domain/input_münchen.de.txt @@ -0,0 +1,7 @@ +Domain Name: münchen.de +Registrar: DENIC eG (the German domain registry) +Updated Date: 2024-01-01T00:00:00Z +Creation Date: 1995-08-14T00:00:00Z +Registry Expiry Date: 2030-08-14T00:00:00Z +Name Server: ns01e.muenchen.de +Name Server: ns02e.muenchen.de diff --git a/plugins/inputs/whois/testcases/valid_idn_domain/input_xn--mnchn-kva.de.txt b/plugins/inputs/whois/testcases/valid_idn_domain/input_xn--mnchn-kva.de.txt new file mode 100644 index 000000000..ebdfcf70d --- /dev/null +++ b/plugins/inputs/whois/testcases/valid_idn_domain/input_xn--mnchn-kva.de.txt @@ -0,0 +1,7 @@ +Domain Name: xn--mnchn-kva.de +Registrar: DENIC eG +Updated Date: 2024-01-01T00:00:00Z +Creation Date: 1995-08-14T00:00:00Z +Registry Expiry Date: 2030-08-14T00:00:00Z +Name Server: ns01e.muenchen.de +Name Server: ns02e.muenchen.de diff --git a/plugins/inputs/whois/testcases/valid_idn_domain/telegraf.conf b/plugins/inputs/whois/testcases/valid_idn_domain/telegraf.conf new file mode 100644 index 000000000..afca49180 --- /dev/null +++ b/plugins/inputs/whois/testcases/valid_idn_domain/telegraf.conf @@ -0,0 +1,3 @@ +[[inputs.whois]] + domains = ["münchen.de", "xn--mnchn-kva.de"] + timeout = "5s" diff --git a/plugins/inputs/whois/whois.go b/plugins/inputs/whois/whois.go index 35cbe17d1..9be80bd63 100644 --- a/plugins/inputs/whois/whois.go +++ b/plugins/inputs/whois/whois.go @@ -12,6 +12,7 @@ import ( "github.com/likexian/whois" "github.com/likexian/whois-parser" + "golang.org/x/net/idna" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/config" @@ -21,6 +22,8 @@ import ( //go:embed sample.conf var sampleConfig string +const maxDomainLength = 253 + type Whois struct { Domains []string `toml:"domains"` Server string `toml:"server"` @@ -55,10 +58,26 @@ func (w *Whois) Init() error { return nil } -var domainRegex = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9-]{0,253}[a-zA-Z0-9]\.[a-zA-Z]{2,}$`) +var asciiDomainRegex = regexp.MustCompile(`^([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,63}$`) func isValidDomain(domain string) bool { - return domainRegex.MatchString(domain) + if len(domain) > maxDomainLength { + return false + } + + // Handle standard ASCII domains + if asciiDomainRegex.MatchString(domain) { + return true + } + + // Try to convert to Punycode (handles IDNs) + p := idna.New(idna.MapForLookup(), idna.StrictDomainName(true)) + punycodeVersion, err := p.ToASCII(domain) + if err != nil { + return false + } + + return asciiDomainRegex.MatchString(punycodeVersion) } func (w *Whois) Gather(acc telegraf.Accumulator) error {