diff --git a/bin/usn.go b/bin/usn.go index 385e1e0..f5ceae5 100644 --- a/bin/usn.go +++ b/bin/usn.go @@ -3,6 +3,7 @@ package main import ( "context" "fmt" + "regexp" "strings" kingpin "gopkg.in/alecthomas/kingpin.v2" @@ -19,6 +20,9 @@ var ( usn_command_watch = usn_command.Flag( "watch", "Watch the USN for changes").Bool() + + usn_command_filename_filter = usn_command.Flag( + "file_filter", "Regex to match the filename").Default(".").String() ) const template = ` @@ -61,10 +65,43 @@ func doUSN() { ntfs_ctx, err := parser.GetNTFSContext(reader, 0) kingpin.FatalIfError(err, "Can not open filesystem") + filename_filter, err := regexp.Compile(*usn_command_filename_filter) + kingpin.FatalIfError(err, "Filename filter") + + for record := range parser.ParseUSN(context.Background(), ntfs_ctx, 0) { + mft_id := record.FileReferenceNumberID() + mft_seq := uint16(record.FileReferenceNumberSequence()) + + ntfs_ctx.SetPreload(mft_id, mft_seq, + func(entry *parser.MFTEntrySummary) (*parser.MFTEntrySummary, bool) { + if entry != nil { + return entry, false + } + + // Add a fake entry to resolve the filename + return &parser.MFTEntrySummary{ + Sequence: mft_seq, + Filenames: []parser.FNSummary{{ + Name: record.Filename(), + NameType: "DOS+Win32", + ParentEntryNumber: record.ParentFileReferenceNumberID(), + ParentSequenceNumber: uint16( + record.ParentFileReferenceNumberSequence()), + }}, + }, true + }) + } + for record := range parser.ParseUSN(context.Background(), ntfs_ctx, 0) { + filename := record.Filename() + + if !filename_filter.MatchString(filename) { + continue + } + fmt.Printf(template, record.Usn(), record.Offset, - record.Filename(), - record.FullPath(), record.TimeStamp(), + filename, + record.Links(), record.TimeStamp(), strings.Join(record.Reason(), ", "), strings.Join(record.FileAttributes(), ", "), strings.Join(record.SourceInfo(), ", "), diff --git a/parser/caching.go b/parser/caching.go index f748187..2bd7545 100644 --- a/parser/caching.go +++ b/parser/caching.go @@ -23,20 +23,79 @@ type MFTEntryCache struct { ntfs *NTFSContext lru *LRU + + preloaded map[uint64]*MFTEntrySummary } func NewMFTEntryCache(ntfs *NTFSContext) *MFTEntryCache { lru, _ := NewLRU(10000, nil, "MFTEntryCache") return &MFTEntryCache{ - ntfs: ntfs, - lru: lru, + ntfs: ntfs, + lru: lru, + preloaded: make(map[uint64]*MFTEntrySummary), + } +} + +// This function is used to preset persisted information in the cache +// about known MFT entries from other sources than the MFT itself. In +// particular, the USN journal is often a source of additional +// historical information. When resolving an MFT entry summary, we +// first look to the MFT itself, however if the sequence number does +// not match the required entry, we look toh the preloaded entry for a +// better match. +// +// The allows us to substitute historical information (from the USN +// journal) while resolving full paths. +func (self *MFTEntryCache) SetPreload(id uint64, seq uint16, + cb func(entry *MFTEntrySummary) (*MFTEntrySummary, bool)) { + key := id | uint64(seq)<<48 + + // Optionally allow the callback to update the preloaded entry. + entry, _ := self.preloaded[key] + new_entry, updated := cb(entry) + if updated { + self.preloaded[key] = new_entry } } -func (self *MFTEntryCache) GetSummary(id uint64) (*MFTEntrySummary, error) { +// GetSummary gets a MFTEntrySummary for the mft id. The sequence +// number is a hint for the required sequence of the entry. This +// function may return an MFTEntrySummary with a different sequence +// than requested. +func (self *MFTEntryCache) GetSummary( + id uint64, seq uint16) (*MFTEntrySummary, error) { self.mu.Lock() defer self.mu.Unlock() + // We prefer to get the read entry from the MFT because it has all + // the short names etc. + res, err := self._GetSummary(id) + if err != nil { + return nil, err + } + + // If the MFT entry is not correct (does not have the required + // sequence number), we check the preloaded set for an approximate + // match. + if res.Sequence != seq { + // Try to get from the preloaded records + key := id | uint64(seq)<<48 + res, ok := self.preloaded[key] + if ok { + // Yep - the sequence number of correct. + return res, nil + } + + // Just return the incorrect entry - callers can add an error + // for incorrect sequence number. + } + + return res, nil +} + +// Get the summary from the underlying MFT itself. +func (self *MFTEntryCache) _GetSummary( + id uint64) (*MFTEntrySummary, error) { res_any, pres := self.lru.Get(int(id)) if pres { res, ok := res_any.(*MFTEntrySummary) diff --git a/parser/context.go b/parser/context.go index 0af33c6..991e3fb 100644 --- a/parser/context.go +++ b/parser/context.go @@ -30,6 +30,8 @@ type NTFSContext struct { mft_entry_lru *LRU mft_summary_cache *MFTEntryCache + + full_path_resolver *FullPathResolver } func newNTFSContext(image io.ReaderAt, name string) *NTFSContext { @@ -43,9 +45,20 @@ func newNTFSContext(image io.ReaderAt, name string) *NTFSContext { } ntfs.mft_summary_cache = NewMFTEntryCache(ntfs) + ntfs.full_path_resolver = &FullPathResolver{ + ntfs: ntfs, + options: ntfs.options, + mft_cache: ntfs.mft_summary_cache, + } + return ntfs } +func (self *NTFSContext) SetPreload(id uint64, seq uint16, + cb func(entry *MFTEntrySummary) (*MFTEntrySummary, bool)) { + self.mft_summary_cache.SetPreload(id, seq, cb) +} + func (self *NTFSContext) Copy() *NTFSContext { self.mu.Lock() defer self.mu.Unlock() @@ -99,10 +112,6 @@ func (self *NTFSContext) GetRecordSize() int64 { return self.RecordSize } -func (self *NTFSContext) GetMFTSummary(id uint64) (*MFTEntrySummary, error) { - return self.mft_summary_cache.GetSummary(id) -} - func (self *NTFSContext) GetMFT(id int64) (*MFT_ENTRY, error) { // Check the cache first cached_any, pres := self.mft_entry_lru.Get(int(id)) diff --git a/parser/easy.go b/parser/easy.go index 0a2d137..d103c93 100644 --- a/parser/easy.go +++ b/parser/easy.go @@ -465,8 +465,12 @@ func findNextVCN(attributes []*attrInfo, selected_attribute *attrInfo) (*attrInf // all related attributes and wraps them in a RangeReader to appear as // a single stream. This function is what you need when you want to // read the full file. -func OpenStream(ntfs *NTFSContext, - mft_entry *MFT_ENTRY, attr_type uint64, attr_id uint16, attr_name string) (RangeReaderAt, error) { +func OpenStream( + ntfs *NTFSContext, + mft_entry *MFT_ENTRY, + attr_type uint64, + attr_id uint16, + attr_name string) (RangeReaderAt, error) { result := &RangeReader{} diff --git a/parser/hardlinks.go b/parser/hardlinks.go index 87b6834..de8bf56 100644 --- a/parser/hardlinks.go +++ b/parser/hardlinks.go @@ -55,32 +55,45 @@ func (self *Visitor) Components() [][]string { return result } +// The FullPathResolver resolves an MFT entry into a full path. +// +// This resolver can use information from both the USN journal and the +// MFT to reconstruct the full path of an mft entry. +type FullPathResolver struct { + ntfs *NTFSContext + options Options + + mft_cache *MFTEntryCache +} + // Walks the MFT entry to get all file names to this MFT entry. -func GetHardLinks(ntfs *NTFSContext, mft_id uint64, max int) [][]string { +func (self *FullPathResolver) GetHardLinks( + mft_id uint64, seq_number uint16, max int) [][]string { if max == 0 { - max = ntfs.options.MaxLinks + max = self.options.MaxLinks } visitor := &Visitor{ Paths: [][]string{[]string{}}, Max: max, - IncludeShortNames: ntfs.options.IncludeShortNames, - Prefix: ntfs.options.PrefixComponents, + IncludeShortNames: self.options.IncludeShortNames, + Prefix: self.options.PrefixComponents, } - mft_entry_summary, err := ntfs.GetMFTSummary(mft_id) + mft_entry_summary, err := self.mft_cache.GetSummary( + mft_id, seq_number) if err != nil { return nil } - getNames(ntfs, mft_entry_summary, visitor, 0, 0) + self.getNames(mft_entry_summary, visitor, 0, 0) return visitor.Components() } -func getNames(ntfs *NTFSContext, +func (self *FullPathResolver) getNames( mft_entry *MFTEntrySummary, visitor *Visitor, idx, depth int) { - if depth > ntfs.options.MaxDirectoryDepth { + if depth > self.options.MaxDirectoryDepth { visitor.AddComponent(idx, "") visitor.AddComponent(idx, "") return @@ -135,7 +148,8 @@ func getNames(ntfs *NTFSContext, continue } - parent_entry, err := ntfs.GetMFTSummary(fn.ParentEntryNumber) + parent_entry, err := self.mft_cache.GetSummary( + fn.ParentEntryNumber, fn.ParentSequenceNumber) if err != nil { visitor.AddComponent(visitor_idx, err.Error()) visitor.AddComponent(visitor_idx, "") @@ -150,6 +164,6 @@ func getNames(ntfs *NTFSContext, continue } - getNames(ntfs, parent_entry, visitor, visitor_idx, depth+1) + self.getNames(parent_entry, visitor, visitor_idx, depth+1) } } diff --git a/parser/mft.go b/parser/mft.go index 8ad481b..d660b31 100644 --- a/parser/mft.go +++ b/parser/mft.go @@ -371,7 +371,8 @@ func (self *MFTHighlight) FullPath() string { } func (self *MFTHighlight) Links() []string { - components := GetHardLinks(self.ntfs_ctx, uint64(self.EntryNumber), + components := self.ntfs_ctx.full_path_resolver.GetHardLinks( + uint64(self.EntryNumber), self.SequenceNumber, DefaultMaxLinks) result := make([]string, 0, len(components)) for _, l := range components { @@ -404,7 +405,8 @@ func (self *MFTHighlight) FileName() string { // so you should consult the Links() to get more info. func (self *MFTHighlight) Components() []string { components := []string{} - links := GetHardLinks(self.ntfs_ctx, uint64(self.EntryNumber), 1) + links := self.ntfs_ctx.full_path_resolver.GetHardLinks( + uint64(self.EntryNumber), self.SequenceNumber, 1) if len(links) > 0 { components = links[0] } diff --git a/parser/model.go b/parser/model.go index 4343706..89ea6c0 100644 --- a/parser/model.go +++ b/parser/model.go @@ -116,7 +116,8 @@ func ModelMFTEntry(ntfs *NTFSContext, mft_entry *MFT_ENTRY) (*NTFSFileInformatio }) } - for _, l := range GetHardLinks(ntfs, uint64(mft_id), DefaultMaxLinks) { + for _, l := range ntfs.full_path_resolver.GetHardLinks( + uint64(mft_id), result.SequenceNumber, DefaultMaxLinks) { result.Hardlinks = append(result.Hardlinks, strings.Join(l, "\\")) } diff --git a/parser/usn.go b/parser/usn.go index 16fe549..c36dcb7 100644 --- a/parser/usn.go +++ b/parser/usn.go @@ -82,29 +82,35 @@ func (self *USN_RECORD) Next(max_offset int64) *USN_RECORD { } func (self *USN_RECORD) Links() []string { - // Since this record could have mean a file deletion event + return self._Links(DefaultMaxLinks) +} + +func (self *USN_RECORD) _Links(depth int) []string { + // Since this record could have meant a file deletion event // then resolving the actual MFT entry to a full path is less // reliable. It is more reliable to resolve the parent path, // and then add the USN record name to it. parent_mft_id := self.USN_RECORD_V2.ParentFileReferenceNumberID() - parent_mft_sequence := self.USN_RECORD_V2.ParentFileReferenceNumberSequence() + parent_mft_sequence := uint16( + self.USN_RECORD_V2.ParentFileReferenceNumberSequence()) // Make sure the parent has the correct sequence to prevent // nonsensical paths. - parent_mft_entry, err := self.context.GetMFTSummary(parent_mft_id) + parent_mft_entry, err := self.context.mft_summary_cache.GetSummary( + parent_mft_id, parent_mft_sequence) if err != nil { return []string{fmt.Sprintf("\\\\%v", parent_mft_id, err, self.Filename())} } - if uint64(parent_mft_entry.Sequence) != parent_mft_sequence { + if uint64(parent_mft_entry.Sequence) != uint64(parent_mft_sequence) { return []string{fmt.Sprintf("\\\\%v", parent_mft_id, parent_mft_entry.Sequence, parent_mft_sequence, self.Filename())} } - components := GetHardLinks(self.context, uint64(parent_mft_id), - DefaultMaxLinks) + components := self.context.full_path_resolver.GetHardLinks( + uint64(parent_mft_id), parent_mft_sequence, DefaultMaxLinks) result := make([]string, 0, len(components)) for _, l := range components { l = append(l, self.Filename()) @@ -115,23 +121,11 @@ func (self *USN_RECORD) Links() []string { // Resolve the file to a full path func (self *USN_RECORD) FullPath() string { - // Since this record could have meant a file deletion event - // then resolving the actual MFT entry to a full path is less - // reliable. It is more reliable to resolve the parent path, - // and then add the USN record name to it. - parent_mft_id := self.USN_RECORD_V2.ParentFileReferenceNumberID() - parent_mft_entry, err := self.context.GetMFT(int64(parent_mft_id)) - if err != nil { - return "" - } - - file_names := parent_mft_entry.FileName(self.context) - if len(file_names) == 0 { + res := self._Links(1) + if len(res) == 0 { return "" } - - parent_full_path := GetFullPath(self.context, parent_mft_entry) - return parent_full_path + "/" + self.Filename() + return res[0] } func (self *USN_RECORD) Reason() []string { @@ -176,36 +170,34 @@ func getUSNStream(ntfs_ctx *NTFSContext) (mft_id int64, attr_id uint16, attr_nam return 0, 0, "", errors.New("Can not find $Extend\\$UsnJrnl:$J") } +func OpenUSNStream(ntfs_ctx *NTFSContext) (RangeReaderAt, error) { + mft_id, attr_id, attr_name, err := getUSNStream(ntfs_ctx) + if err != nil { + return nil, err + } + + mft_entry, err := ntfs_ctx.GetMFT(mft_id) + if err != nil { + return nil, err + } + + return OpenStream(ntfs_ctx, mft_entry, 128, attr_id, attr_name) +} + // Returns a channel which will send USN records on. We start parsing // at the start of the file and continue until the end. -func ParseUSN(ctx context.Context, ntfs_ctx *NTFSContext, starting_offset int64) chan *USN_RECORD { +func ParseUSN(ctx context.Context, + ntfs_ctx *NTFSContext, + usn_stream RangeReaderAt, + starting_offset int64) chan *USN_RECORD { + output := make(chan *USN_RECORD) go func() { defer close(output) - mft_id, attr_id, attr_name, err := getUSNStream(ntfs_ctx) - if err != nil { - DebugPrint("ParseUSN error: %v", err) - return - } - - mft_entry, err := ntfs_ctx.GetMFT(mft_id) - if err != nil { - DebugPrint("ParseUSN error: %v", err) - return - } - - data, err := OpenStream(ntfs_ctx, mft_entry, 128, attr_id, attr_name) - if err != nil { - DebugPrint("ParseUSN error: %v", err) - return - } - count := 0 - defer DebugPrint("Skipped %v entries\n", count) - - for _, rng := range data.Ranges() { + for _, rng := range usn_stream.Ranges() { run_end := rng.Offset + rng.Length if rng.IsSparse { continue @@ -216,7 +208,7 @@ func ParseUSN(ctx context.Context, ntfs_ctx *NTFSContext, starting_offset int64) continue } - for record := NewUSN_RECORD(ntfs_ctx, data, rng.Offset); record != nil; record = record.Next(run_end) { + for record := NewUSN_RECORD(ntfs_ctx, usn_stream, rng.Offset); record != nil; record = record.Next(run_end) { if record.Offset < starting_offset { continue } @@ -268,7 +260,13 @@ func getLastUSN(ctx context.Context, ntfs_ctx *NTFSContext) (record *USN_RECORD, DebugPrint("Staring to parse USN in offset for seek %v\n", last_range.Offset) count := 0 - for record := range ParseUSN(ctx, ntfs_ctx, last_range.Offset) { + usn_stream, err := OpenUSNStream(ntfs_ctx) + if err != nil { + return nil, err + } + + for record := range ParseUSN( + ctx, ntfs_ctx, usn_stream, last_range.Offset) { result = record count++ } @@ -316,7 +314,13 @@ func WatchUSN(ctx context.Context, ntfs_ctx *NTFSContext, period int) chan *USN_ // we always get fresh data. ntfs_ctx.Purge() - for record := range ParseUSN(ctx, ntfs_ctx, start_offset) { + usn_stream, err := OpenUSNStream(ntfs_ctx) + if err != nil { + return + } + + for record := range ParseUSN( + ctx, ntfs_ctx, usn_stream, start_offset) { if record.Offset > start_offset { select { case <-ctx.Done(): diff --git a/parser/utils.go b/parser/utils.go index 8e521c5..6f5f68d 100644 --- a/parser/utils.go +++ b/parser/utils.go @@ -23,7 +23,8 @@ func get_display_name(file_names []*FILE_NAME) string { // Traverse the mft entry and attempt to find its owner until the // root. We return the full path of the MFT entry. func GetFullPath(ntfs *NTFSContext, mft_entry *MFT_ENTRY) string { - links := GetHardLinks(ntfs, uint64(mft_entry.Record_number()), 1) + links := ntfs.full_path_resolver.GetHardLinks( + uint64(mft_entry.Record_number()), mft_entry.Sequence_value(), 1) if len(links) == 0 { return "/" }