Skip to content

Commit

Permalink
Downgrade error log lines on Windows filesystem access issues (#907)
Browse files Browse the repository at this point in the history
  • Loading branch information
thampiotr authored May 23, 2024
1 parent d018e6e commit bb508af
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 4 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ Main (unreleased)

- Add support for configuring CPU profile's duration scraped by `pyroscope.scrape`. (@hainenber)

- Improved filesystem error handling when working with `loki.source.file` and `local.file_match`,
which removes some false-positive error log messages on Windows (@thampiotr)

### Bugfixes

- Fix panic when component ID contains `/` in `otelcomponent.MustNewType(ID)`.(@qclaogui)
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ require (
github.com/prometheus/snmp_exporter v0.26.0
github.com/prometheus/statsd_exporter v0.22.8
github.com/richardartoul/molecule v1.0.1-0.20221107223329-32cfee06a052
github.com/rogpeppe/go-internal v1.12.0
github.com/rs/cors v1.10.1
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.25
github.com/shirou/gopsutil/v3 v3.24.3
Expand Down
21 changes: 21 additions & 0 deletions internal/component/common/loki/utils/fs_errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package utils

import (
"errors"
"os"
"strings"

"github.com/rogpeppe/go-internal/robustio"
)

// IsEphemeralOrFileClosed checks if the error is an ephemeral error or if the file is already closed. This is useful
// on certain file systems (e.g. on Windows) where in practice reading a file can result in ephemeral errors
// (e.g. due to antivirus scans) or if the file appears as closed when being removed or rotated.
func IsEphemeralOrFileClosed(err error) bool {
return robustio.IsEphemeralError(err) ||
errors.Is(os.ErrClosed, err) ||
// The above errors.Is(os.ErrClosedm, err) condition doesn't always capture the 'file already closed' error on
// Windows. Check the error message as well.
// Inspired by https://github.com/grafana/loki/blob/987e551f9e21b9a612dd0b6a3e60503ce6fe13a8/clients/cmd/docker-driver/driver.go#L145
strings.Contains(err.Error(), "file already closed")
}
11 changes: 9 additions & 2 deletions internal/component/local/file_match/watch.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ import (
"os"
"path/filepath"

"github.com/bmatcuk/doublestar"
"github.com/go-kit/log"

"github.com/bmatcuk/doublestar"
"github.com/grafana/alloy/internal/component/common/loki/utils"
"github.com/grafana/alloy/internal/component/discovery"
"github.com/grafana/alloy/internal/runtime/logging/level"
)
Expand Down Expand Up @@ -38,7 +39,13 @@ func (w *watch) getPaths() ([]discovery.Target, error) {
}
fi, err := os.Stat(abs)
if err != nil {
level.Error(w.log).Log("msg", "error getting os stat", "path", abs, "err", err)
// On some filesystems we can get errors accessing the discovered paths. Don't log these as errors.
// local.file_match will retry on the next sync period if the access is blocked temporarily only.
if utils.IsEphemeralOrFileClosed(err) {
level.Debug(w.log).Log("msg", "I/O error when getting os stat", "path", abs, "err", err)
} else {
level.Error(w.log).Log("msg", "error getting os stat", "path", abs, "err", err)
}
continue
}
if fi.IsDir() {
Expand Down
12 changes: 10 additions & 2 deletions internal/component/loki/source/file/tailer.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (

"github.com/grafana/alloy/internal/component/common/loki"
"github.com/grafana/alloy/internal/component/common/loki/positions"
"github.com/grafana/alloy/internal/component/common/loki/utils"
"github.com/grafana/alloy/internal/runtime/logging/level"
)

Expand Down Expand Up @@ -309,10 +310,17 @@ func (t *tailer) Stop() {
level.Error(t.logger).Log("msg", "error marking file position when stopping tailer", "path", t.path, "error", err)
}

// Stop the underlying tailer
// Stop the underlying tailer to prevent resource leak.
err = t.tail.Stop()
if err != nil {
level.Error(t.logger).Log("msg", "error stopping tailer", "path", t.path, "error", err)
if utils.IsEphemeralOrFileClosed(err) {
// Don't log as error if the file is already closed, or we got an ephemeral error - it's a common case
// when files are rotating while being read and the tailer would have stopped correctly anyway.
level.Debug(t.logger).Log("msg", "tailer stopped with file I/O error", "path", t.path, "error", err)
} else {
// Log as error for other reasons, as a resource leak may have happened.
level.Error(t.logger).Log("msg", "error stopping tailer", "path", t.path, "error", err)
}
}
// Wait for readLines() to consume all the remaining messages and exit when the channel is closed
<-t.done
Expand Down

0 comments on commit bb508af

Please sign in to comment.