Skip to content

Commit d29840a

Browse files
authored
update wait_class query; add vault; cleanup logs; update some deps (#34)
--------- Signed-off-by: Mark Nelson <mark.x.nelson@oracle.com>
1 parent 1d4091b commit d29840a

File tree

10 files changed

+208
-81
lines changed

10 files changed

+208
-81
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ OS_TYPE ?= $(shell uname -s | tr '[:upper:]' '[:lower:]')
33
ARCH_TYPE ?= $(subst x86_64,amd64,$(patsubst i%86,386,$(ARCH)))
44
GOOS ?= $(shell go env GOOS)
55
GOARCH ?= $(shell go env GOARCH)
6-
VERSION ?= 1.0.0
6+
VERSION ?= 1.1.0
77
LDFLAGS := -X main.Version=$(VERSION)
88
GOFLAGS := -ldflags "$(LDFLAGS) -s -w"
99
BUILD_ARGS = --build-arg VERSION=$(VERSION)

README.md

+24-4
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,14 @@
22

33
This project aims to provide observability for the Oracle Database so that users can understand performance and diagnose issues easily across applications and database. Over time, this project will provide not just metrics, but also logging and tracing support, and integration into popular frameworks like Spring Boot. The project aims to deliver functionality to support both cloud and on-premises databases, including those running in Kubernetes and containers.
44

5-
In the first production release, v1.0, this project provides a [Prometheus](https://prometheus.io/) exporter for Oracle Database that is based in part on a Prometheus exporter created by [Seth Miller](https://github.com/iamseth/oracledb_exporter) with changes to comply with various Oracle standards and policies.
5+
From the first production release, v1.0, onwards, this project provides a [Prometheus](https://prometheus.io/) exporter for Oracle Database that is based in part on a Prometheus exporter created by [Seth Miller](https://github.com/iamseth/oracledb_exporter) with changes to comply with various Oracle standards and policies.
66

77
Contributions are welcome - please see [contributing](CONTRIBUTING.md).
88

99

1010
### Table of Contents
1111

12+
- [Release Notes](#release-notes)
1213
- [Roadmap](#roadmap)
1314
- [Standard metrics](#standard-metrics)
1415
- [Database permissions required](#database-permissions-required)
@@ -17,15 +18,26 @@ Contributions are welcome - please see [contributing](CONTRIBUTING.md).
1718
- [Test/demo environment using Docker Compose](#testdemo-environment-with-docker-compose)
1819
- [Kubernetes](#kubernetes)
1920
- [Standalone binary](#standalone-binary)
21+
- [Using OCI Vault](#using-oci-vault)
2022
- [Custom metrics](#custom-metrics)
2123
- [Grafana dashboards](#grafana-dashboards)
2224
- [Monitoring Transactional Event Queues](#monitoring-transactional-event-queues)
2325
- [Developer notes](#developer-notes)
2426

27+
## Release Notes
2528

26-
## Roadmap
29+
### Version 1.1, October 27, 2023
30+
31+
This release includes the following changes:
2732

28-
### Version 1.0
33+
- The query for the standard metric `wait_class` has been updated so that it will work in both container databases
34+
and pluggable databases, including in Oracle Autonomous Database instances. Note that this query will not return
35+
any data unless the database instance is under load.
36+
- Support for reading the database password from OCI Vault has been added (see [details](#using-oci-vault))
37+
- Log messages have been improved
38+
- Some dependencies have been updated
39+
40+
### Version 1.0, September 13, 2023
2941

3042
The first production release, v1.0, includes the following features:
3143

@@ -41,7 +53,7 @@ Note that this exporter uses a different Oracle Database driver which in turn us
4153

4254
The interfaces for this version have been kept as close as possible to those of earlier alpha releases in this repository to assist with migration. However, it should be expected that there may be breaking changes in future releases.
4355

44-
### Plans
56+
## Roadmap
4557

4658
We always welcome input on features you would like to see supported. Please open an issue in this repository with your suggestions.
4759

@@ -355,6 +367,14 @@ Usage of oracledb_exporter:
355367
Path to configuration file that can enable TLS or authentication.
356368
```
357369

370+
### Using OCI Vault
371+
372+
The exporter will read the password from a secret stored in OCI Vault if you set these two environment
373+
variables:
374+
375+
- `VAULT_ID` should be set to the OCID of the OCI vault that you wish to use
376+
- `VAULT_SECRET_NAME` should be set to the name of the secret in the OCI vault which contains the database password
377+
358378
## Custom metrics
359379

360380
The exporter allows definition of arbitrary custom metrics in a TOML file. To specify this file to the

collector/collector.go

+53-44
Original file line numberDiff line numberDiff line change
@@ -261,21 +261,22 @@ func (e *Exporter) scrape(ch chan<- prometheus.Metric) {
261261

262262
if err = e.db.Ping(); err != nil {
263263
if strings.Contains(err.Error(), "sql: database is closed") {
264-
level.Info(e.logger).Log("Reconnecting to DB")
264+
level.Info(e.logger).Log("msg", "Reconnecting to DB")
265265
err = e.connect()
266266
if err != nil {
267-
level.Error(e.logger).Log("Error reconnecting to DB", err)
267+
level.Error(e.logger).Log("msg", "Error reconnecting to DB", err)
268268
}
269269
}
270270
}
271271

272272
if err = e.db.Ping(); err != nil {
273-
level.Error(e.logger).Log("Error pinging oracle:", err)
273+
level.Error(e.logger).Log("msg", "Error pinging oracle:",
274+
"error", err)
274275
e.up.Set(0)
275276
return
276277
}
277278

278-
level.Debug(e.logger).Log("Successfully pinged Oracle database: ", maskDsn(e.connectString))
279+
level.Debug(e.logger).Log("msg", "Successfully pinged Oracle database: "+maskDsn(e.connectString))
279280
e.up.Set(1)
280281

281282
if e.checkIfMetricsChanged() {
@@ -291,50 +292,57 @@ func (e *Exporter) scrape(ch chan<- prometheus.Metric) {
291292
go func() {
292293
defer wg.Done()
293294

294-
level.Debug(e.logger).Log("About to scrape metric: ")
295-
level.Debug(e.logger).Log("- Metric MetricsDesc: ", metric.MetricsDesc)
296-
level.Debug(e.logger).Log("- Metric Context: ", metric.Context)
297-
level.Debug(e.logger).Log("- Metric MetricsType: ", metric.MetricsType)
298-
level.Debug(e.logger).Log("- Metric MetricsBuckets: ", metric.MetricsBuckets, "(Ignored unless Histogram type)")
299-
level.Debug(e.logger).Log("- Metric Labels: ", metric.Labels)
300-
level.Debug(e.logger).Log("- Metric FieldToAppend: ", metric.FieldToAppend)
301-
level.Debug(e.logger).Log("- Metric IgnoreZeroResult: ", metric.IgnoreZeroResult)
302-
level.Debug(e.logger).Log("- Metric Request: ", metric.Request)
295+
level.Debug(e.logger).Log("msg", "About to scrape metric",
296+
"Context", metric.Context,
297+
"MetricsDesc", fmt.Sprint(metric.MetricsDesc),
298+
"MetricsType", fmt.Sprint(metric.MetricsType),
299+
"MetricsBuckets", fmt.Sprint(metric.MetricsBuckets), // ignored unless histogram
300+
"Labels", fmt.Sprint(metric.Labels),
301+
"FieldToAppend", metric.FieldToAppend,
302+
"IgnoreZeroResult", metric.IgnoreZeroResult,
303+
"Request", metric.Request)
303304

304305
if len(metric.Request) == 0 {
305-
level.Error(e.logger).Log("Error scraping for ", metric.MetricsDesc, ". Did you forget to define request in your toml file?")
306+
level.Error(e.logger).Log("msg", "Error scraping for "+fmt.Sprint(metric.MetricsDesc)+". Did you forget to define request in your toml file?")
306307
return
307308
}
308309

309310
if len(metric.MetricsDesc) == 0 {
310-
level.Error(e.logger).Log("Error scraping for query", metric.Request, ". Did you forget to define metricsdesc in your toml file?")
311+
level.Error(e.logger).Log("msg", "Error scraping for query"+fmt.Sprint(metric.Request)+". Did you forget to define metricsdesc in your toml file?")
311312
return
312313
}
313314

314315
for column, metricType := range metric.MetricsType {
315316
if metricType == "histogram" {
316317
_, ok := metric.MetricsBuckets[column]
317318
if !ok {
318-
level.Error(e.logger).Log("Unable to find MetricsBuckets configuration key for metric. (metric=" + column + ")")
319+
level.Error(e.logger).Log("msg", "Unable to find MetricsBuckets configuration key for metric. (metric="+column+")")
319320
return
320321
}
321322
}
322323
}
323324

324325
scrapeStart := time.Now()
325326
if err = e.ScrapeMetric(e.db, ch, metric); err != nil {
326-
level.Error(e.logger).Log("Error scraping for", metric.Context, "_", metric.MetricsDesc, time.Since(scrapeStart), ":", err)
327+
level.Error(e.logger).Log("msg", "Error scraping metric",
328+
"Context", metric.Context,
329+
"MetricsDesc", fmt.Sprint(metric.MetricsDesc),
330+
"time", time.Since(scrapeStart),
331+
"error", err)
327332
e.scrapeErrors.WithLabelValues(metric.Context).Inc()
328333
} else {
329-
level.Debug(e.logger).Log("Successfully scraped metric: ", metric.Context, metric.MetricsDesc, time.Since(scrapeStart))
334+
level.Debug(e.logger).Log("msg", "Successfully scraped metric",
335+
"Context", metric.Context,
336+
"MetricDesc", fmt.Sprint(metric.MetricsDesc),
337+
"time", time.Since(scrapeStart))
330338
}
331339
}()
332340
}
333341
wg.Wait()
334342
}
335343

336344
func (e *Exporter) connect() error {
337-
level.Debug(e.logger).Log("Launching connection: ", maskDsn(e.connectString))
345+
level.Debug(e.logger).Log("msg", "Launching connection to "+maskDsn(e.connectString))
338346

339347
var P godror.ConnectionParams
340348
P.Username, P.Password, P.ConnectString = e.user, godror.NewPassword(e.password), e.connectString
@@ -344,11 +352,11 @@ func (e *Exporter) connect() error {
344352
// level.Error(e.logger).Log("Error while connecting to", e.dsn)
345353
// return err
346354
// }
347-
level.Debug(e.logger).Log("set max idle connections to ", e.config.MaxIdleConns)
355+
level.Debug(e.logger).Log("msg", "set max idle connections to "+strconv.Itoa(e.config.MaxIdleConns))
348356
db.SetMaxIdleConns(e.config.MaxIdleConns)
349-
level.Debug(e.logger).Log("set max open connections to ", e.config.MaxOpenConns)
357+
level.Debug(e.logger).Log("msg", "set max open connections to "+strconv.Itoa(e.config.MaxOpenConns))
350358
db.SetMaxOpenConns(e.config.MaxOpenConns)
351-
level.Debug(e.logger).Log("Successfully connected to: ", maskDsn(e.connectString))
359+
level.Debug(e.logger).Log("msg", "Successfully connected to "+maskDsn(e.connectString))
352360
e.db = db
353361
return nil
354362
}
@@ -358,15 +366,15 @@ func (e *Exporter) checkIfMetricsChanged() bool {
358366
if len(_customMetrics) == 0 {
359367
continue
360368
}
361-
level.Debug(e.logger).Log("Checking modifications in following metrics definition file:", _customMetrics)
369+
level.Debug(e.logger).Log("msg", "Checking modifications in following metrics definition file:"+_customMetrics)
362370
h := sha256.New()
363371
if err := hashFile(h, _customMetrics); err != nil {
364-
level.Error(e.logger).Log("Unable to get file hash", err)
372+
level.Error(e.logger).Log("msg", "Unable to get file hash", "error", err)
365373
return false
366374
}
367375
// If any of files has been changed reload metrics
368376
if !bytes.Equal(hashMap[i], h.Sum(nil)) {
369-
level.Info(e.logger).Log(_customMetrics, "has been changed. Reloading metrics...")
377+
level.Info(e.logger).Log("msg", _customMetrics+" has been changed. Reloading metrics...")
370378
hashMap[i] = h.Sum(nil)
371379
return true
372380
}
@@ -401,18 +409,18 @@ func (e *Exporter) reloadMetrics() {
401409
level.Error(e.logger).Log(err)
402410
panic(errors.New("Error while loading " + _customMetrics))
403411
} else {
404-
level.Info(e.logger).Log("Successfully loaded custom metrics from: " + _customMetrics)
412+
level.Info(e.logger).Log("msg", "Successfully loaded custom metrics from "+_customMetrics)
405413
}
406414
e.metricsToScrape.Metric = append(e.metricsToScrape.Metric, additionalMetrics.Metric...)
407415
}
408416
} else {
409-
level.Debug(e.logger).Log("No custom metrics defined.")
417+
level.Debug(e.logger).Log("msg", "No custom metrics defined.")
410418
}
411419
}
412420

413421
// ScrapeMetric is an interface method to call scrapeGenericValues using Metric struct values
414422
func (e *Exporter) ScrapeMetric(db *sql.DB, ch chan<- prometheus.Metric, metricDefinition Metric) error {
415-
level.Debug(e.logger).Log("Calling function ScrapeGenericValues()")
423+
level.Debug(e.logger).Log("msg", "Calling function ScrapeGenericValues()")
416424
return e.scrapeGenericValues(db, ch, metricDefinition.Context, metricDefinition.Labels,
417425
metricDefinition.MetricsDesc, metricDefinition.MetricsType, metricDefinition.MetricsBuckets,
418426
metricDefinition.FieldToAppend, metricDefinition.IgnoreZeroResult,
@@ -434,11 +442,12 @@ func (e *Exporter) scrapeGenericValues(db *sql.DB, ch chan<- prometheus.Metric,
434442
value, err := strconv.ParseFloat(strings.TrimSpace(row[metric]), 64)
435443
// If not a float, skip current metric
436444
if err != nil {
437-
level.Error(e.logger).Log("Unable to convert current value to float (metric=" + metric +
438-
",metricHelp=" + metricHelp + ",value=<" + row[metric] + ">)")
445+
level.Error(e.logger).Log("msg", "Unable to convert current value to float (metric="+metric+
446+
",metricHelp="+metricHelp+",value=<"+row[metric]+">)")
439447
continue
440448
}
441-
level.Debug(e.logger).Log("Query result looks like: ", value)
449+
level.Debug(e.logger).Log("msg", "Query result",
450+
"value", value)
442451
// If metric do not use a field content in metric's name
443452
if strings.Compare(fieldToAppend, "") == 0 {
444453
desc := prometheus.NewDesc(
@@ -449,21 +458,21 @@ func (e *Exporter) scrapeGenericValues(db *sql.DB, ch chan<- prometheus.Metric,
449458
if metricsType[strings.ToLower(metric)] == "histogram" {
450459
count, err := strconv.ParseUint(strings.TrimSpace(row["count"]), 10, 64)
451460
if err != nil {
452-
level.Error(e.logger).Log("Unable to convert count value to int (metric=" + metric +
453-
",metricHelp=" + metricHelp + ",value=<" + row["count"] + ">)")
461+
level.Error(e.logger).Log("msg", "Unable to convert count value to int (metric="+metric+
462+
",metricHelp="+metricHelp+",value=<"+row["count"]+">)")
454463
continue
455464
}
456465
buckets := make(map[float64]uint64)
457466
for field, le := range metricsBuckets[metric] {
458467
lelimit, err := strconv.ParseFloat(strings.TrimSpace(le), 64)
459468
if err != nil {
460-
level.Error(e.logger).Log("Unable to convert bucket limit value to float (metric=" + metric +
461-
",metricHelp=" + metricHelp + ",bucketlimit=<" + le + ">)")
469+
level.Error(e.logger).Log("msg", "Unable to convert bucket limit value to float (metric="+metric+
470+
",metricHelp="+metricHelp+",bucketlimit=<"+le+">)")
462471
continue
463472
}
464473
counter, err := strconv.ParseUint(strings.TrimSpace(row[field]), 10, 64)
465474
if err != nil {
466-
level.Error(e.logger).Log("Unable to convert ", field, " value to int (metric="+metric+
475+
level.Error(e.logger).Log("msg", "Unable to convert ", field, " value to int (metric="+metric+
467476
",metricHelp="+metricHelp+",value=<"+row[field]+">)")
468477
continue
469478
}
@@ -483,21 +492,21 @@ func (e *Exporter) scrapeGenericValues(db *sql.DB, ch chan<- prometheus.Metric,
483492
if metricsType[strings.ToLower(metric)] == "histogram" {
484493
count, err := strconv.ParseUint(strings.TrimSpace(row["count"]), 10, 64)
485494
if err != nil {
486-
level.Error(e.logger).Log("Unable to convert count value to int (metric=" + metric +
487-
",metricHelp=" + metricHelp + ",value=<" + row["count"] + ">)")
495+
level.Error(e.logger).Log("msg", "Unable to convert count value to int (metric="+metric+
496+
",metricHelp="+metricHelp+",value=<"+row["count"]+">)")
488497
continue
489498
}
490499
buckets := make(map[float64]uint64)
491500
for field, le := range metricsBuckets[metric] {
492501
lelimit, err := strconv.ParseFloat(strings.TrimSpace(le), 64)
493502
if err != nil {
494-
level.Error(e.logger).Log("Unable to convert bucket limit value to float (metric=" + metric +
495-
",metricHelp=" + metricHelp + ",bucketlimit=<" + le + ">)")
503+
level.Error(e.logger).Log("msg", "Unable to convert bucket limit value to float (metric="+metric+
504+
",metricHelp="+metricHelp+",bucketlimit=<"+le+">)")
496505
continue
497506
}
498507
counter, err := strconv.ParseUint(strings.TrimSpace(row[field]), 10, 64)
499508
if err != nil {
500-
level.Error(e.logger).Log("Unable to convert ", field, " value to int (metric="+metric+
509+
level.Error(e.logger).Log("msg", "Unable to convert ", field, " value to int (metric="+metric+
501510
",metricHelp="+metricHelp+",value=<"+row[field]+">)")
502511
continue
503512
}
@@ -512,14 +521,14 @@ func (e *Exporter) scrapeGenericValues(db *sql.DB, ch chan<- prometheus.Metric,
512521
}
513522
return nil
514523
}
515-
level.Debug(e.logger).Log("Calling function GeneratePrometheusMetrics()")
524+
level.Debug(e.logger).Log("msg", "Calling function GeneratePrometheusMetrics()")
516525
err := e.generatePrometheusMetrics(db, genericParser, request)
517-
level.Debug(e.logger).Log("ScrapeGenericValues() - metricsCount: ", metricsCount)
526+
level.Debug(e.logger).Log("msg", "ScrapeGenericValues() - metricsCount: "+strconv.Itoa(metricsCount))
518527
if err != nil {
519528
return err
520529
}
521530
if !ignoreZeroResult && metricsCount == 0 {
522-
return errors.New("No metrics found while parsing")
531+
return errors.New("no metrics found while parsing, query returned no rows")
523532
}
524533
return err
525534
}

collector/default_metrics.go

+7-8
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,11 @@ context = "wait_time"
5050
metricsdesc = { value="Generic counter metric from v$waitclassmetric view in Oracle." }
5151
fieldtoappend= "wait_class"
5252
request = '''
53-
SELECT
54-
n.wait_class as WAIT_CLASS,
55-
round(m.time_waited/m.INTSIZE_CSEC,3) as VALUE
56-
FROM
57-
v$waitclassmetric m, v$system_wait_class n
58-
WHERE
59-
m.wait_class_id=n.wait_class_id AND n.wait_class != 'Idle'
53+
SELECT wait_class as WAIT_CLASS, sum(time_waited) as VALUE
54+
FROM gv$active_session_history
55+
where wait_class is not null
56+
and sample_time > sysdate - interval '1' hour
57+
GROUP BY wait_class;
6058
'''
6159
6260
[[metric]]
@@ -82,7 +80,8 @@ func (e *Exporter) DefaultMetrics() Metrics {
8280
var metricsToScrape Metrics
8381
if e.config.DefaultMetricsFile != "" {
8482
if _, err := toml.DecodeFile(filepath.Clean(e.config.DefaultMetricsFile), &metricsToScrape); err != nil {
85-
level.Error(e.logger).Log(fmt.Sprintf("there was an issue while loading specified default metrics file at: "+e.config.DefaultMetricsFile+", proceeding to run with default metrics."), err)
83+
level.Error(e.logger).Log("msg", fmt.Sprintf("there was an issue while loading specified default metrics file at: "+e.config.DefaultMetricsFile+", proceeding to run with default metrics."),
84+
"error", err)
8685
}
8786
return metricsToScrape
8887
}

default-metrics.toml

+10-8
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@ request = "SELECT status, type, COUNT(*) as value FROM v$session GROUP BY status
88
context = "resource"
99
labels = [ "resource_name" ]
1010
metricsdesc = { current_utilization= "Generic counter metric from v$resource_limit view in Oracle (current value).", limit_value="Generic counter metric from v$resource_limit view in Oracle (UNLIMITED: -1)." }
11-
request="SELECT resource_name,current_utilization,CASE WHEN TRIM(limit_value) LIKE 'UNLIMITED' THEN '-1' ELSE TRIM(limit_value) END as limit_value FROM v$resource_limit"
11+
request = '''
12+
SELECT resource_name, current_utilization, CASE WHEN TRIM(limit_value) LIKE 'UNLIMITED' THEN '-1' ELSE TRIM(limit_value) END as limit_value
13+
FROM v$resource_limit
14+
'''
1215

1316
[[metric]]
1417
context = "asm_diskgroup"
@@ -33,14 +36,13 @@ context = "wait_time"
3336
metricsdesc = { value="Generic counter metric from v$waitclassmetric view in Oracle." }
3437
fieldtoappend= "wait_class"
3538
request = '''
36-
SELECT
37-
n.wait_class as WAIT_CLASS,
38-
round(m.time_waited/m.INTSIZE_CSEC,3) as VALUE
39-
FROM
40-
v$waitclassmetric m, v$system_wait_class n
41-
WHERE
42-
m.wait_class_id=n.wait_class_id AND n.wait_class != 'Idle'
39+
SELECT wait_class as WAIT_CLASS, sum(time_waited) as VALUE
40+
FROM gv$active_session_history
41+
where wait_class is not null
42+
and sample_time > sysdate - interval '1' hour
43+
GROUP BY wait_class
4344
'''
45+
ignorezeroresult = true
4446

4547
[[metric]]
4648
context = "tablespace"

docker-compose/compose.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ services:
4343
start_period: 30s
4444

4545
exporter:
46-
image: container-registry.oracle.com/database/observability-exporter:1.0.0
46+
image: container-registry.oracle.com/database/observability-exporter:1.0.1
4747
container_name: exporter
4848
ports:
4949
- 9161:9161

0 commit comments

Comments
 (0)