-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
93 changed files
with
21,536 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
.idea | ||
local | ||
dev_notes |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
# Search For Local Log Files | ||
|
||
![Heaplog logo](Heaplog.jpg) | ||
|
||
// main branch build status | ||
|
||
Heaplog is a program that runs in the background, scans and indexes your log files, and allows to search it via Web UI. | ||
It aims to take small disk space and allow fast searches using its query language (see below). | ||
|
||
## Installation | ||
|
||
## Configuration | ||
|
||
Configuration can be provided as a Yaml file, as well as command arguments (where the latter overwrite the former). | ||
Configurable keys and values can be seen in [config.go](https://github.com/lezhnev74/heaplog/ui/config.go). | ||
To populate a new empty file run `heaplog init > heaplog.yml`. | ||
|
||
Since there are many formats of log files, you have to provide two things about your file format: | ||
1. Regular Expression to find individual messages(config key `MessageStartRE`) in your files. | ||
2. Date format(config key `DateFormat`) to parse its timestamps. | ||
|
||
### Use Automatic Format Detection Command | ||
|
||
This command `heaplog detect` will ask you to give it a sample log message. It will try to detect date format automatically. | ||
If it succeeds, you can copy the output config values and go to testing your config. | ||
|
||
Sample output: | ||
``` | ||
$ heaplog detect | ||
Enter a sample message line: | ||
[2023-12-31T00:00:03.448201+00:00] production.DEBUG: My message | ||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
Yay, the date detected above! | ||
Config values: | ||
MessageStartRE: "(?m)^\[(\d{4}\-\d{2}\-\d{2}\w\d{2}:\d{2}:\d{2}\.\d{6}[+-]\d{2}:\d{2})" | ||
DateFormat: "2006-01-02T15:04:05.000000-07:00" | ||
``` | ||
|
||
### Use ChatGPT | ||
|
||
Use the power of AI to do the job for you :) Use this prompt to get a go code from where you can copy-paste the regular | ||
expression as well as date format for parsing. | ||
|
||
``` | ||
Detect the full timestamp in this log message. | ||
Write Go code to parse this date. | ||
[2023-12-31T00:00:03.448201+00:00] production.DEBUG: My message | ||
``` | ||
|
||
### Provide Format Manually | ||
|
||
The program needs a regular expression that detects the beginning of each message (see [re docs](https://pkg.go.dev/regexp/syntax)). | ||
In the first matching group it must contain the full date of the message. | ||
Below is the regular expression that can recognize messages and dates of this format: | ||
``` | ||
[2023-12-31T00:00:03.448201+00:00] production.DEBUG: My message | ||
(?m)^\[([^\]]+) | ||
``` | ||
|
||
### Test Your Config | ||
Once you have configured the app, run this command to make sure everything is ok: | ||
`heaplog test <path/to/log.file>`. | ||
|
||
## Query Language | ||
|
||
## Design | ||
|
||
See more about design ideas in this blog post. | ||
|
||
## Licence | ||
|
||
MIT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
package common | ||
|
||
import ( | ||
"log" | ||
"slices" | ||
) | ||
|
||
// Location addresses an area of bytes [Min,Max) | ||
type Location struct { | ||
Min, Max int64 | ||
} | ||
|
||
func (s Location) Intersects(s2 Location) bool { | ||
return s.Min <= s2.Max && s.Max >= s2.Min | ||
} | ||
|
||
// Split slices a segment into many | ||
func (s Location) Split(maxLen int64) (ret []Location) { | ||
|
||
for { | ||
if s.len() <= maxLen { | ||
ret = append(ret, s) | ||
return | ||
} | ||
|
||
ret = append(ret, Location{s.Min, s.Min + maxLen}) | ||
s = Location{s.Min + maxLen, s.Max} | ||
} | ||
} | ||
|
||
func (s Location) len() int64 { return s.Max - s.Min } | ||
|
||
func (s Location) Remove(s2 Location) (ret []Location) { | ||
|
||
// valid locations | ||
if s.len() < 0 || s2.len() < 0 { | ||
log.Panicf("Invalid ranges: %v or %v", s, s2) | ||
} | ||
|
||
intersection := Location{max(s.Min, s2.Min), min(s.Max, s2.Max)} | ||
|
||
// If the intersection is empty, then the difference is the union of the two ranges. | ||
if intersection.len() < 0 { | ||
return []Location{s} | ||
} | ||
|
||
// Otherwise, the difference is the two ranges minus the intersection. | ||
result := Location{Min: s.Min, Max: intersection.Min} | ||
if result.len() > 0 { | ||
ret = append(ret, result) | ||
} | ||
result = Location{Min: intersection.Max, Max: s.Max} | ||
if result.len() > 0 { | ||
ret = append(ret, result) | ||
} | ||
return | ||
} | ||
|
||
func MergeSegmentLocations(src []Location) (ret []Location) { | ||
slices.SortFunc(src, func(a, b Location) int { return int(a.Min - b.Min) }) | ||
|
||
if len(src) < 2 { | ||
return src | ||
} | ||
|
||
cur := src[0] | ||
|
||
for i := 1; i < len(src); i++ { | ||
if src[i].Intersects(cur) { | ||
cur = Location{min(cur.Min, src[i].Min), max(cur.Max, src[i].Max)} | ||
continue | ||
} | ||
|
||
ret = append(ret, cur) | ||
cur = src[i] | ||
} | ||
|
||
ret = append(ret, cur) | ||
|
||
return | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
package common | ||
|
||
import ( | ||
"path" | ||
"time" | ||
) | ||
|
||
// DataSourceHash this program works with a hashed value only to separate index files | ||
type DataSourceHash string | ||
|
||
func (d DataSourceHash) InvertedIndexRoot(root string) string { | ||
return path.Join(root, string(d)) | ||
} | ||
|
||
func HashFile(filename string) DataSourceHash { | ||
return DataSourceHash(HashString(filename)) | ||
} | ||
|
||
type QuerySummary struct { | ||
Text, QueryId string | ||
From, To, BuiltAt *time.Time | ||
Complete bool // if the query is still in-flight | ||
Total int | ||
MinDoc, MaxDoc *time.Time | ||
} | ||
|
||
// MatchedMessage is a message matched the query criteria | ||
type MatchedMessage struct { | ||
Id int64 | ||
Loc Location | ||
Date time.Time | ||
QueryHash string | ||
DataSource DataSourceHash | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
package common | ||
|
||
import ( | ||
"time" | ||
) | ||
|
||
// The idea behind indexing of source files is simple. | ||
// During the indexing (ingestion) phase we discover all files and split them in somewhat big segments. | ||
// In each segment we detect individual messages and save all that to the db. | ||
// We save segments in the inverted index to save disk space. | ||
// | ||
// Later during the search phase we use inverted index to find relevant segments. And then select relevant messages. | ||
// For each message we evaluate the query expression. | ||
// As the last step we put matched messages to the storage using only ids (quick ingestion via an appender). | ||
|
||
// IndexedSegment describes a region of a file with where all the messages were indexed | ||
// it contains offsets of all messages found in the segment and dates of the first/last messages | ||
type IndexedSegment struct { | ||
DataSource DataSourceHash | ||
Messages []IndexedMessage | ||
} | ||
|
||
func (is IndexedSegment) Loc() Location { | ||
l := Location{0, 0} | ||
if len(is.Messages) > 0 { | ||
l.Min = is.Messages[0].Loc.Min | ||
l.Max = is.Messages[len(is.Messages)-1].Loc.Max | ||
} | ||
return l | ||
} | ||
func (is IndexedSegment) MinDate() time.Time { return is.Messages[0].Date } | ||
func (is IndexedSegment) MaxDate() time.Time { return is.Messages[len(is.Messages)-1].Date } | ||
|
||
type IndexedMessage struct { | ||
Id int64 // ony filled when read from the storage | ||
Loc Location | ||
Date time.Time | ||
IsTail bool // detect "tail message" | ||
} | ||
|
||
type IndexedSegmentInfo struct { | ||
Id int64 | ||
DataSource DataSourceHash | ||
MinDate, MaxDate time.Time | ||
From, To int64 | ||
Messages int64 | ||
} |
Oops, something went wrong.