From e687cd7a5f8ae2684b152c041ad45b107fe6cac3 Mon Sep 17 00:00:00 2001 From: Avinash Nigam <56562150+avinash-nigam@users.noreply.github.com> Date: Tue, 8 Dec 2020 12:40:11 -0800 Subject: [PATCH] SQL Server HA/DR Availability Group queries (#8379) --- etc/telegraf.conf | 94 +++++++++---------- plugins/inputs/sqlserver/README.md | 68 ++++++++++---- plugins/inputs/sqlserver/sqlserver.go | 96 ++++++++++---------- plugins/inputs/sqlserver/sqlserverqueries.go | 94 +++++++++++++++++++ 4 files changed, 235 insertions(+), 117 deletions(-) diff --git a/etc/telegraf.conf b/etc/telegraf.conf index f67ddfbf1..50224cb00 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -5132,62 +5132,58 @@ # ## See https://github.com/denisenkom/go-mssqldb for detailed connection # ## parameters, in particular, tls connections can be created like so: # ## "encrypt=true;certificate=;hostNameInCertificate=" -# # servers = [ -# # "Server=192.168.1.10;Port=1433;User Id=;Password=;app name=telegraf;log=1;", -# # ] -# -# ## This enables a specific set of queries depending on the database type. If specified, it replaces azuredb = true/false and query_version = 2 -# ## In the config file, the sql server plugin section should be repeated each with a set of servers for a specific database_type. -# ## Possible values for database_type are -# ## "AzureSQLDB" -# ## "SQLServer" -# ## "AzureSQLManagedInstance" +# servers = [ +# "Server=192.168.1.10;Port=1433;User Id=;Password=;app name=telegraf;log=1;", +# ] + +# ## "database_type" enables a specific set of queries depending on the database type. If specified, it replaces azuredb = true/false and query_version = 2 +# ## In the config file, the sql server plugin section should be repeated each with a set of servers for a specific database_type. +# ## Possible values for database_type are - "AzureSQLDB" or "AzureSQLManagedInstance" or "SQLServer" + +# ## Queries enabled by default for database_type = "AzureSQLDB" are - +# ## AzureSQLDBResourceStats, AzureSQLDBResourceGovernance, AzureSQLDBWaitStats, AzureSQLDBDatabaseIO, AzureSQLDBServerProperties, +# ## AzureSQLDBOsWaitstats, AzureSQLDBMemoryClerks, AzureSQLDBPerformanceCounters, AzureSQLDBRequests, AzureSQLDBSchedulers + # # database_type = "AzureSQLDB" -# -# + +# ## A list of queries to include. If not specified, all the above listed queries are used. +# # include_query = [] + +# ## A list of queries to explicitly ignore. +# # exclude_query = [] + +# ## Queries enabled by default for database_type = "AzureSQLManagedInstance" are - +# ## AzureSQLMIResourceStats, AzureSQLMIResourceGovernance, AzureSQLMIDatabaseIO, AzureSQLMIServerProperties, AzureSQLMIOsWaitstats, +# ## AzureSQLMIMemoryClerks, AzureSQLMIPerformanceCounters, AzureSQLMIRequests, AzureSQLMISchedulers + +# # database_type = "AzureSQLManagedInstance" + +# # include_query = [] + +# # exclude_query = [] + +# ## Queries enabled by default for database_type = "SQLServer" are - +# ## SQLServerPerformanceCounters, SQLServerWaitStatsCategorized, SQLServerDatabaseIO, SQLServerProperties, SQLServerMemoryClerks, +# ## SQLServerSchedulers, SQLServerRequests, SQLServerVolumeSpace, SQLServerCpu + +# database_type = "SQLServer" + +# include_query = [] + +# ## SQLServerAvailabilityReplicaStates and SQLServerDatabaseReplicaStates are optional queries and hence excluded here as default +# exclude_query = ["SQLServerAvailabilityReplicaStates", "SQLServerDatabaseReplicaStates"] + +# ## Following are old config settings, you may use them only if you are using the earlier flavor of queries, however it is recommended to use +# ## the new mechanism of identifying the database_type there by use it's corresponding queries + # ## Optional parameter, setting this to 2 will use a new version # ## of the collection queries that break compatibility with the original # ## dashboards. # ## Version 2 - is compatible from SQL Server 2012 and later versions and also for SQL Azure DB -# query_version = 2 -# +# # query_version = 2 + # ## If you are using AzureDB, setting this to true will gather resource utilization metrics # # azuredb = false -# -# ## Possible queries -# ## Version 2: -# ## - PerformanceCounters -# ## - WaitStatsCategorized -# ## - DatabaseIO -# ## - ServerProperties -# ## - MemoryClerk -# ## - Schedulers -# ## - SqlRequests -# ## - VolumeSpace -# ## - Cpu -# -# ## Version 1: -# ## - PerformanceCounters -# ## - WaitStatsCategorized -# ## - CPUHistory -# ## - DatabaseIO -# ## - DatabaseSize -# ## - DatabaseStats -# ## - DatabaseProperties -# ## - MemoryClerk -# ## - VolumeSpace -# ## - PerformanceMetrics -# -# -# ## Queries enabled by default for specific Database Type -# ## database_type = AzureSQLDB -# ## AzureDBWaitStats, AzureDBResourceStats, AzureDBResourceGovernance, sqlAzureDBDatabaseIO -# -# ## A list of queries to include. If not specified, all the above listed queries are used. -# # include_query = [] -# -# ## A list of queries to explicitly ignore. -# exclude_query = [ 'Schedulers' , 'SqlRequests'] # # Gather timeseries from Google Cloud Platform v3 monitoring API diff --git a/plugins/inputs/sqlserver/README.md b/plugins/inputs/sqlserver/README.md index be5b98aa8..db15c4af7 100644 --- a/plugins/inputs/sqlserver/README.md +++ b/plugins/inputs/sqlserver/README.md @@ -48,22 +48,54 @@ GO ## See https://github.com/denisenkom/go-mssqldb for detailed connection ## parameters, in particular, tls connections can be created like so: ## "encrypt=true;certificate=;hostNameInCertificate=" - # servers = [ - # "Server=192.168.1.10;Port=1433;User Id=;Password=;app name=telegraf;log=1;", - # ] + servers = [ + "Server=192.168.1.10;Port=1433;User Id=;Password=;app name=telegraf;log=1;", + ] + + ## "database_type" enables a specific set of queries depending on the database type. If specified, it replaces azuredb = true/false and query_version = 2 + ## In the config file, the sql server plugin section should be repeated each with a set of servers for a specific database_type. + ## Possible values for database_type are - "AzureSQLDB" or "AzureSQLManagedInstance" or "SQLServer" + + ## Queries enabled by default for database_type = "AzureSQLDB" are - + ## AzureSQLDBResourceStats, AzureSQLDBResourceGovernance, AzureSQLDBWaitStats, AzureSQLDBDatabaseIO, AzureSQLDBServerProperties, + ## AzureSQLDBOsWaitstats, AzureSQLDBMemoryClerks, AzureSQLDBPerformanceCounters, AzureSQLDBRequests, AzureSQLDBSchedulers - ## This enables a specific set of queries depending on the database type. If specified, it replaces azuredb = true/false and query_version = 2 - ## In the config file, the sql server plugin section should be repeated each with a set of servers for a specific database_type. - ## Possible values for database_type are - ## "AzureSQLDB" - ## "SQLServer" - ## "AzureSQLManagedInstance" # database_type = "AzureSQLDB" + ## A list of queries to include. If not specified, all the above listed queries are used. + # include_query = [] + + ## A list of queries to explicitly ignore. + # exclude_query = [] + + ## Queries enabled by default for database_type = "AzureSQLManagedInstance" are - + ## AzureSQLMIResourceStats, AzureSQLMIResourceGovernance, AzureSQLMIDatabaseIO, AzureSQLMIServerProperties, AzureSQLMIOsWaitstats, + ## AzureSQLMIMemoryClerks, AzureSQLMIPerformanceCounters, AzureSQLMIRequests, AzureSQLMISchedulers + + # database_type = "AzureSQLManagedInstance" + + # include_query = [] + + # exclude_query = [] + + ## Queries enabled by default for database_type = "SQLServer" are - + ## SQLServerPerformanceCounters, SQLServerWaitStatsCategorized, SQLServerDatabaseIO, SQLServerProperties, SQLServerMemoryClerks, + ## SQLServerSchedulers, SQLServerRequests, SQLServerVolumeSpace, SQLServerCpu + + database_type = "SQLServer" + + include_query = [] + + ## SQLServerAvailabilityReplicaStates and SQLServerDatabaseReplicaStates are optional queries and hence excluded here as default + exclude_query = ["SQLServerAvailabilityReplicaStates", "SQLServerDatabaseReplicaStates"] + + ## Following are old config settings, you may use them only if you are using the earlier flavor of queries, however it is recommended to use + ## the new mechanism of identifying the database_type there by use it's corresponding queries + ## Optional parameter, setting this to 2 will use a new version - ## of the collection queries that break compatibility with the original dashboards. - ## Version 2 - is compatible from SQL Server 2008 Sp3 and later versions and also for SQL Azure DB - ## Version 2 is in the process of being deprecated, please consider using database_type. + ## of the collection queries that break compatibility with the original + ## dashboards. + ## Version 2 - is compatible from SQL Server 2012 and later versions and also for SQL Azure DB # query_version = 2 ## If you are using AzureDB, setting this to true will gather resource utilization metrics @@ -105,6 +137,9 @@ GO ## - SQLServerRequests ## - SQLServerVolumeSpace ## - SQLServerCpu + ## and following as optional (if mentioned in the include_query list) + ## - SQLServerAvailabilityReplicaStates + ## - SQLServerDatabaseReplicaStates ## Version 2 by default collects the following queries ## Version 2 is being deprecated, please consider using database_type. @@ -133,13 +168,6 @@ GO - ## A list of queries to include. If not specified, all the above listed queries are used. - # include_query = [] - - ## A list of queries to explicitly ignore. - exclude_query = [ 'Schedulers' , 'SqlRequests' ] - - ``` @@ -240,6 +268,8 @@ These are metrics for Azure SQL Managed instance, are very similar to version 2 blocking sessions. - SQLServerVolumeSpace - uses `sys.dm_os_volume_stats` to get total, used and occupied space on every disk that contains a data or log file. (Note that even if enabled it won't get any data from Azure SQL Database or SQL Managed Instance). It is pointless to run this with high frequency (ie: every 10s), but it won't cause any problem. - SQLServerCpu - uses the buffer ring (`sys.dm_os_ring_buffers`) to get CPU data, the table is updated once per minute. (Note that even if enabled it won't get any data from Azure SQL Database or SQL Managed Instance). +- SQLServerAvailabilityReplicaStates: Collects availability replica state information from `sys.dm_hadr_availability_replica_states` for a High Availability / Disaster Recovery (HADR) setup +- SQLServerDatabaseReplicaStates: Collects database replica state information from `sys.dm_hadr_database_replica_states` for a High Availability / Disaster Recovery (HADR) setup #### Output Measures diff --git a/plugins/inputs/sqlserver/sqlserver.go b/plugins/inputs/sqlserver/sqlserver.go index 4b0bd5e3f..3baa5ed6a 100644 --- a/plugins/inputs/sqlserver/sqlserver.go +++ b/plugins/inputs/sqlserver/sqlserver.go @@ -46,62 +46,58 @@ const sampleConfig = ` ## See https://github.com/denisenkom/go-mssqldb for detailed connection ## parameters, in particular, tls connections can be created like so: ## "encrypt=true;certificate=;hostNameInCertificate=" -# servers = [ -# "Server=192.168.1.10;Port=1433;User Id=;Password=;app name=telegraf;log=1;", -# ] +servers = [ + "Server=192.168.1.10;Port=1433;User Id=;Password=;app name=telegraf;log=1;", +] + +## "database_type" enables a specific set of queries depending on the database type. If specified, it replaces azuredb = true/false and query_version = 2 +## In the config file, the sql server plugin section should be repeated each with a set of servers for a specific database_type. +## Possible values for database_type are - "AzureSQLDB" or "AzureSQLManagedInstance" or "SQLServer" + +## Queries enabled by default for database_type = "AzureSQLDB" are - +## AzureSQLDBResourceStats, AzureSQLDBResourceGovernance, AzureSQLDBWaitStats, AzureSQLDBDatabaseIO, AzureSQLDBServerProperties, +## AzureSQLDBOsWaitstats, AzureSQLDBMemoryClerks, AzureSQLDBPerformanceCounters, AzureSQLDBRequests, AzureSQLDBSchedulers -## This enables a specific set of queries depending on the database type. If specified, it replaces azuredb = true/false and query_version = 2 -## In the config file, the sql server plugin section should be repeated each with a set of servers for a specific database_type. -## Possible values for database_type are -## "AzureSQLDB" -## "SQLServer" -## "AzureSQLManagedInstance" # database_type = "AzureSQLDB" - -## Optional parameter, setting this to 2 will use a new version -## of the collection queries that break compatibility with the original -## dashboards. -## Version 2 - is compatible from SQL Server 2012 and later versions and also for SQL Azure DB -query_version = 2 - -## If you are using AzureDB, setting this to true will gather resource utilization metrics -# azuredb = false - -## Possible queries -## Version 2: -## - PerformanceCounters -## - WaitStatsCategorized -## - DatabaseIO -## - ServerProperties -## - MemoryClerk -## - Schedulers -## - SqlRequests -## - VolumeSpace -## - Cpu - -## Version 1: -## - PerformanceCounters -## - WaitStatsCategorized -## - CPUHistory -## - DatabaseIO -## - DatabaseSize -## - DatabaseStats -## - DatabaseProperties -## - MemoryClerk -## - VolumeSpace -## - PerformanceMetrics - - -## Queries enabled by default for specific Database Type -## database_type = AzureSQLDB - ## AzureDBWaitStats, AzureDBResourceStats, AzureDBResourceGovernance, sqlAzureDBDatabaseIO - ## A list of queries to include. If not specified, all the above listed queries are used. # include_query = [] ## A list of queries to explicitly ignore. -exclude_query = [ 'Schedulers' , 'SqlRequests'] +# exclude_query = [] + +## Queries enabled by default for database_type = "AzureSQLManagedInstance" are - +## AzureSQLMIResourceStats, AzureSQLMIResourceGovernance, AzureSQLMIDatabaseIO, AzureSQLMIServerProperties, AzureSQLMIOsWaitstats, +## AzureSQLMIMemoryClerks, AzureSQLMIPerformanceCounters, AzureSQLMIRequests, AzureSQLMISchedulers + +# database_type = "AzureSQLManagedInstance" + +# include_query = [] + +# exclude_query = [] + +## Queries enabled by default for database_type = "SQLServer" are - +## SQLServerPerformanceCounters, SQLServerWaitStatsCategorized, SQLServerDatabaseIO, SQLServerProperties, SQLServerMemoryClerks, +## SQLServerSchedulers, SQLServerRequests, SQLServerVolumeSpace, SQLServerCpu + +database_type = "SQLServer" + +include_query = [] + +## SQLServerAvailabilityReplicaStates and SQLServerDatabaseReplicaStates are optional queries and hence excluded here as default +exclude_query = ["SQLServerAvailabilityReplicaStates", "SQLServerDatabaseReplicaStates"] + +## Following are old config settings, you may use them only if you are using the earlier flavor of queries, however it is recommended to use +## the new mechanism of identifying the database_type there by use it's corresponding queries + +## Optional parameter, setting this to 2 will use a new version +## of the collection queries that break compatibility with the original +## dashboards. +## Version 2 - is compatible from SQL Server 2012 and later versions and also for SQL Azure DB +# query_version = 2 + +## If you are using AzureDB, setting this to true will gather resource utilization metrics +# azuredb = false ` // SampleConfig return the sample configuration @@ -159,6 +155,8 @@ func initQueries(s *SQLServer) error { queries["SQLServerRequests"] = Query{ScriptName: "SQLServerRequests", Script: sqlServerRequests, ResultByRow: false} queries["SQLServerVolumeSpace"] = Query{ScriptName: "SQLServerVolumeSpace", Script: sqlServerVolumeSpace, ResultByRow: false} queries["SQLServerCpu"] = Query{ScriptName: "SQLServerCpu", Script: sqlServerRingBufferCpu, ResultByRow: false} + queries["SQLServerAvailabilityReplicaStates"] = Query{ScriptName: "SQLServerAvailabilityReplicaStates", Script: sqlServerAvailabilityReplicaStates, ResultByRow: false} + queries["SQLServerDatabaseReplicaStates"] = Query{ScriptName: "SQLServerDatabaseReplicaStates", Script: sqlServerDatabaseReplicaStates, ResultByRow: false} } else { // If this is an AzureDB instance, grab some extra metrics if s.AzureDB { diff --git a/plugins/inputs/sqlserver/sqlserverqueries.go b/plugins/inputs/sqlserver/sqlserverqueries.go index 3fdbd5d34..2af8e1eb7 100644 --- a/plugins/inputs/sqlserver/sqlserverqueries.go +++ b/plugins/inputs/sqlserver/sqlserverqueries.go @@ -1150,3 +1150,97 @@ FROM ( ORDER BY [record_id] DESC ) AS z ` + +const sqlServerAvailabilityReplicaStates string = ` +IF SERVERPROPERTY('EngineEdition') NOT IN (2,3,4) BEGIN /*NOT IN Standard,Enterpris,Express*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@ServerName + ',Database:' + DB_NAME() +' is not a SQL Server Standard,Enterprise or Express. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN +END + +IF SERVERPROPERTY('IsHadrEnabled') = 1 BEGIN + SELECT + 'sqlserver_hadr_replica_states' AS [measurement], + REPLACE(@@SERVERNAME,'\',':') AS [sql_instance], + convert(nvarchar(36), hars.replica_id) as replica_id, + ar.replica_server_name, + convert(nvarchar(36), hars.group_id) as group_id, + ag.name AS group_name, + ag.basic_features, + ag.is_distributed, + hags.synchronization_health_desc AS ag_synchronization_health_desc, + ar.replica_metadata_id, + ar.availability_mode, + ar.availability_mode_desc, + ar.failover_mode, + ar.failover_mode_desc, + ar.session_timeout, + ar.primary_role_allow_connections, + ar.primary_role_allow_connections_desc, + ar.secondary_role_allow_connections, + ar.secondary_role_allow_connections_desc, + ar.seeding_mode, + ar.seeding_mode_desc, + hars.is_local, + hars.role, + hars.role_desc, + hars.operational_state, + hars.operational_state_desc, + hars.connected_state, + hars.connected_state_desc, + hars.recovery_health, + hars.recovery_health_desc, + hars.synchronization_health AS replica_synchronization_health, + hars.synchronization_health_desc AS replica_synchronization_health_desc, + hars.last_connect_error_number, + hars.last_connect_error_description, + hars.last_connect_error_timestamp + from sys.dm_hadr_availability_replica_states AS hars + inner join sys.availability_replicas AS ar on hars.replica_id = ar.replica_id + inner join sys.availability_groups AS ag on ar.group_id = ag.group_id + inner join sys.dm_hadr_availability_group_states AS hags ON hags.group_id = ag.group_id +END +` + +const sqlServerDatabaseReplicaStates string = ` +IF SERVERPROPERTY('EngineEdition') NOT IN (2,3,4) BEGIN /*NOT IN Standard,Enterpris,Express*/ + DECLARE @ErrorMessage AS nvarchar(500) = 'Telegraf - Connection string Server:'+ @@ServerName + ',Database:' + DB_NAME() +' is not a SQL Server Standard,Enterprise or Express. Check the database_type parameter in the telegraf configuration.'; + RAISERROR (@ErrorMessage,11,1) + RETURN +END + +IF SERVERPROPERTY('IsHadrEnabled') = 1 BEGIN + SELECT + 'sqlserver_hadr_dbreplica_states' AS [measurement], + REPLACE(@@SERVERNAME,'\',':') AS [sql_instance], + database_id, + db_name(database_id) as database_name, + convert(nvarchar(36), drs.replica_id) as replica_id, + ar.replica_server_name, + convert(nvarchar(36), drs.group_database_id) as group_database_id, + is_primary_replica, + synchronization_state, + synchronization_state_desc, + is_commit_participant, + synchronization_health, + synchronization_health_desc, + database_state, + database_state_desc, + is_suspended, + suspend_reason, + suspend_reason_desc, + last_sent_time, + last_received_time, + last_hardened_time, + last_redone_time, + log_send_queue_size, + log_send_rate, + redo_queue_size, + redo_rate, + filestream_send_rate, + last_commit_time, + secondary_lag_seconds + from sys.dm_hadr_database_replica_states AS drs + inner join sys.availability_replicas AS ar on drs.replica_id = ar.replica_id +END +`