From 1d756915f847b18a2421e6fbfd838adc46056daf Mon Sep 17 00:00:00 2001 From: Michael Oborne Date: Mon, 24 Jun 2024 16:00:32 +1000 Subject: [PATCH] DFLogBuffer: add index cache if file is over 300mb --- ExtLibs/Utilities/DFLogBuffer.cs | 361 ++++++++++++++++++++----------- 1 file changed, 235 insertions(+), 126 deletions(-) diff --git a/ExtLibs/Utilities/DFLogBuffer.cs b/ExtLibs/Utilities/DFLogBuffer.cs index db786a7d4d..2e02511330 100644 --- a/ExtLibs/Utilities/DFLogBuffer.cs +++ b/ExtLibs/Utilities/DFLogBuffer.cs @@ -4,7 +4,9 @@ using System.Diagnostics; using System.Globalization; using System.IO; +using System.IO.Compression; using System.Linq; +using System.Runtime.Serialization.Formatters.Binary; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; @@ -29,6 +31,7 @@ public class DFLogBuffer : IEnumerable, IDisposable /// Type and offsets /// List[] messageindex = new List[256]; + /// /// Type and line numbers /// @@ -41,7 +44,7 @@ public class DFLogBuffer : IEnumerable, IDisposable long indexcachelineno = -1; String currentindexcache = null; - public DFLogBuffer(string filename) : this(File.Open(filename,FileMode.Open,FileAccess.Read,FileShare.Read)) + public DFLogBuffer(string filename) : this(File.Open(filename, FileMode.Open, FileAccess.Read, FileShare.Read)) { _filename = filename; } @@ -54,10 +57,10 @@ public DFLogBuffer(Stream instream) dflog = new DFLog(this); for (int a = 0; a < messageindex.Length; a++) { - messageindex[a] = new List(); - messageindexline[a] = new List(); + messageindex[a] = new List(0); + messageindexline[a] = new List(0); } - + if (instream.CanSeek) { basestream = instream; @@ -85,131 +88,143 @@ public DFLogBuffer(Stream instream) basestream.Position = 0; DateTime start = DateTime.Now; setlinecount(); - Console.WriteLine("DFLogBuffer-linecount: " + Count + " time(ms): " + (DateTime.Now - start).TotalMilliseconds); + Console.WriteLine("DFLogBuffer-linecount: " + Count + " time(ms): " + + (DateTime.Now - start).TotalMilliseconds); basestream.Position = 0; } void setlinecount() { - int offset = 0; - - byte[] buffer = new byte[1024*1024]; - - var lineCount = 0l; - - if (binary) + if (!LoadCache()) { - long length = basestream.Length; - while (basestream.Position < length) + byte[] buffer = new byte[1024 * 1024]; + + var lineCount = 0l; + if (binary) { - var ans = binlog.ReadMessageTypeOffset(basestream, length); + long length = basestream.Length; + while (basestream.Position < length) + { + var ans = binlog.ReadMessageTypeOffset(basestream, length); - if (ans.MsgType == 0 && ans.Offset == 0) - continue; + if (ans.MsgType == 0 && ans.Offset == 0) + continue; - byte type = ans.Item1; - messageindex[type].Add(ans.Item2); - messageindexline[type].Add(lineCount); + byte type = ans.Item1; + messageindex[type].Add(ans.Item2); + messageindexline[type].Add(lineCount); - linestartoffset.Add(ans.Item2); - lineCount++; + linestartoffset.Add(ans.Item2); + lineCount++; - if (lineCount % 1000000 == 0) - Console.WriteLine("reading lines " + lineCount + " " + ((basestream.Position / (double)length) * 100.0)); - } - - _count = lineCount; + if (lineCount % 1000000 == 0) + Console.WriteLine("reading lines " + lineCount + " " + + ((basestream.Position / (double)length) * 100.0)); + } - // build fmt line database to pre seed the FMT message - messageindexline[128].ForEach(a => dflog.FMTLine(this[(int)a])); + _count = lineCount; - try - { - foreach (var item in dflog.logformat) + // build fmt line database to pre seed the FMT message + messageindexline[128].ForEach(a => dflog.FMTLine(this[(int)a])); + + try + { + foreach (var item in dflog.logformat) + { + var id = item.Value.Id; + var type = item.Value.Name; + if (messageindex[id].Count != 0) + Console.WriteLine("Seen " + type + " count " + messageindex[id].Count); + } + } + catch { - var id = item.Value.Id; - var type = item.Value.Name; - if(messageindex[id].Count != 0) - Console.WriteLine("Seen " + type + " count " + messageindex[id].Count); } } - catch { } - } - else - { - // first line starts at 0 - linestartoffset.Add(0); - - long length = basestream.Length; - while (basestream.Position < length) + else { - offset = 0; + var offset = 0; + // first line starts at 0 + linestartoffset.Add(0); - long startpos = basestream.Position; + long length = basestream.Length; + while (basestream.Position < length) + { + offset = 0; - int read = basestream.Read(buffer, offset, buffer.Length); + long startpos = basestream.Position; - while (read > 0) - { - if (buffer[offset] == '\n') + int read = basestream.Read(buffer, offset, buffer.Length); + + while (read > 0) { - linestartoffset.Add((uint)(startpos + 1 + offset)); - lineCount++; - } + if (buffer[offset] == '\n') + { + linestartoffset.Add((uint)(startpos + 1 + offset)); + lineCount++; + } - offset++; - read--; + offset++; + read--; + } } - } - - _count = lineCount; - // create msg cache - int b = 0; - foreach (var item in this) - { - var idx = item.IndexOf(','); + _count = lineCount; - if (idx <= 0) + // create msg cache + int b = 0; + foreach (var item in this) { - b++; - continue; - } - - var msgtype = item.Substring(0, idx); + var idx = item.IndexOf(','); - if(msgtype == "FMT") - dflog.FMTLine(item); + if (idx <= 0) + { + b++; + continue; + } - if (dflog.logformat.ContainsKey(msgtype)) - { - var type = (byte)dflog.logformat[msgtype].Id; + var msgtype = item.Substring(0, idx); + + if (msgtype == "FMT") + dflog.FMTLine(item); + + if (dflog.logformat.ContainsKey(msgtype)) + { + var type = (byte)dflog.logformat[msgtype].Id; + + messageindex[type].Add(linestartoffset[b]); + messageindexline[type].Add((uint)b); + } - messageindex[type].Add(linestartoffset[b]); - messageindexline[type].Add((uint)b); + b++; } - b++; } + + SaveCache(); } + // build fmt line database using type foreach (var item in GetEnumeratorType("FMT")) { try { - if(item.items == null || item.items.Length == 0) + if (item.items == null || item.items.Length == 0) continue; FMT[int.Parse(item["Type"])] = ( int.Parse(item["Length"].Trim()), item["Name"].Trim(), item["Format"].Trim(), - item.items.Skip(dflog.FindMessageOffset("FMT", "Columns")).Aggregate((s, s1) => s.Trim() + "," + s1.Trim()) + item.items.Skip(dflog.FindMessageOffset("FMT", "Columns")) + .Aggregate((s, s1) => s.Trim() + "," + s1.Trim()) .TrimStart(',')); dflog.FMTLine(this[item.lineno]); } - catch { } + catch + { + } } foreach (var item in GetEnumeratorType("FMTU")) @@ -228,12 +243,14 @@ void setlinecount() (item["UnitIds"].Trim().IndexOf("#"), new List()); } } - catch { } + catch + { + } } foreach (var b in InstanceType) { - if(!FMT.ContainsKey(b.Key)) + if (!FMT.ContainsKey(b.Key)) continue; int a = 0; foreach (var item in GetEnumeratorType(FMT[b.Key].name)) @@ -256,8 +273,11 @@ void setlinecount() { Unit[(char)int.Parse(item["Id"])] = item["Label"].Trim(); } - catch { } + catch + { + } } + if (Mult.Count > 0) foreach (var item in GetEnumeratorType("MULT")) { @@ -265,7 +285,9 @@ void setlinecount() { Mult[(char)int.Parse(item["Id"])] = item["Mult"].Trim(); } - catch { } + catch + { + } } BuildUnitMultiList(); @@ -273,12 +295,12 @@ void setlinecount() int limitcount = 0; // used to set the firmware type foreach (var item in GetEnumeratorType(new[] - { - "MSG", "PARM" - })) + { + "MSG", "PARM" + })) { // must be the string version to do the firmware type detection - binarylog - var line = this[(int) item.lineno]; + var line = this[(int)item.lineno]; //Console.WriteLine(); limitcount++; if (limitcount > 100000) @@ -289,9 +311,9 @@ void setlinecount() // here we just force the parsing of gps messages to get the valid board time to gps time offset int gpsa = 0; foreach (var item in GetEnumeratorType(new[] - { - "GPS", "GPS2", "GPSB" - })) + { + "GPS", "GPS2", "GPSB" + })) { gpsa++; int status = 0; @@ -300,6 +322,7 @@ void setlinecount() if (status >= 3) break; } + // get first gps time if (gpsa > 2000) break; @@ -307,28 +330,103 @@ void setlinecount() indexcachelineno = -1; } - + + [Serializable] + struct cache + { + public List[] messageindex; + public List[] messageindexline; + public List linestartoffset; + public long lineCount; + } + + private string CachePath + { + get + { + try + { + return Path.GetTempPath() + Path.GetFileNameWithoutExtension(_filename) + new FileInfo(_filename).Length; + } + catch + { + return Path.GetTempFileName(); + } + } + } + + private void SaveCache() + { + // save cache if file is over 300mb + if (basestream.Length < 1024 * 1024 * 300) + return; + //save cache + cache cache = new cache(); + cache.messageindex = messageindex; + cache.messageindexline = messageindexline; + cache.linestartoffset = linestartoffset; + cache.lineCount = _count; + + using (var file = File.OpenWrite(CachePath)) + { + using (GZipStream gs = new GZipStream(file, CompressionMode.Compress)) + { + BinaryFormatter serializer = new BinaryFormatter(); + serializer.Serialize(gs, cache); + } + } + } + + private bool LoadCache() + { + if (File.Exists(CachePath)) + { + //load cache + cache cache = new cache(); + BinaryFormatter deserializer = new BinaryFormatter(); + using (var file = File.OpenRead(CachePath)) + { + using (GZipStream gs = new GZipStream(file, CompressionMode.Decompress)) + { + cache = (cache)deserializer.Deserialize(gs); + } + } + + messageindex = cache.messageindex; + messageindexline = cache.messageindexline; + linestartoffset = cache.linestartoffset; + _count = cache.lineCount; + + // build fmt line database to pre seed the FMT message + messageindexline[128].ForEach(a => dflog.FMTLine(this[(int)a])); + return true; + } + + return false; + } + public void SplitLog(int pieces = 0) { long length = basestream.Length; if (pieces > 0) - { + { long sizeofpiece = length / pieces; - for(int i = 0; i < pieces; i++) + for (int i = 0; i < pieces; i++) { long start = i * sizeofpiece; long end = start + sizeofpiece; - using (var file = File.OpenWrite(_filename + "_split" + i + ".bin")) + using (var file = File.OpenWrite(_filename + "_split" + i + ".bin")) { var type = dflog.logformat["FMT"]; - var buffer = new byte[1024*256]; + var buffer = new byte[1024 * 256]; // fmt from entire file - messageindex[type.Id].ForEach(a => { + messageindex[type.Id].ForEach(a => + { basestream.Seek(a, SeekOrigin.Begin); int read = basestream.Read(buffer, 0, type.Length); file.Write(buffer, 0, read); @@ -336,7 +434,8 @@ public void SplitLog(int pieces = 0) type = dflog.logformat["FMTU"]; - messageindex[type.Id].ForEach(a => { + messageindex[type.Id].ForEach(a => + { basestream.Seek(a, SeekOrigin.Begin); int read = basestream.Read(buffer, 0, type.Length); file.Write(buffer, 0, read); @@ -344,7 +443,8 @@ public void SplitLog(int pieces = 0) type = dflog.logformat["UNIT"]; - messageindex[type.Id].ForEach(a => { + messageindex[type.Id].ForEach(a => + { basestream.Seek(a, SeekOrigin.Begin); int read = basestream.Read(buffer, 0, type.Length); file.Write(buffer, 0, read); @@ -352,7 +452,8 @@ public void SplitLog(int pieces = 0) type = dflog.logformat["MULT"]; - messageindex[type.Id].ForEach(a => { + messageindex[type.Id].ForEach(a => + { basestream.Seek(a, SeekOrigin.Begin); int read = basestream.Read(buffer, 0, type.Length); file.Write(buffer, 0, read); @@ -364,7 +465,7 @@ public void SplitLog(int pieces = 0) var max = long.MinValue; // got min and max valid - linestartoffset.ForEach(a => + linestartoffset.ForEach(a => { if (a >= start && a < end) { @@ -397,7 +498,7 @@ private void BuildUnitMultiList() // get unit and mult info var fmtu = FMTU.FirstOrDefault(a => a.Key == msgtype.Key); - if(fmtu.Value == null) + if (fmtu.Value == null) continue; var units = fmtu.Value.Item1.ToCharArray().Select(a => Unit.FirstOrDefault(b => b.Key == a)); @@ -433,12 +534,17 @@ private void BuildUnitMultiList() } } - public List> UnitMultiList = new List>(); - public Dictionary value)> InstanceType = new Dictionary value)>(); - private string _filename; + public List> UnitMultiList = + new List>(); + + public Dictionary value)> InstanceType = + new Dictionary value)>(); + + private string _filename = ""; public Dictionary FMT { get; set; } = new Dictionary(); + public Dictionary> FMTU { get; set; } = new Dictionary>(); public Dictionary Unit { get; set; } = new Dictionary(); @@ -490,7 +596,7 @@ public DFLog.DFItem this[long indexin] return dflog.GetDFItemFromLine(ASCIIEncoding.ASCII.GetString(data), (int)indexin); } - + } } } @@ -511,7 +617,7 @@ public String this[int index] endoffset = linestartoffset[index + 1]; } - int length = (int) (endoffset - startoffset); + int length = (int)(endoffset - startoffset); // prevent multi io to file lock (locker) @@ -556,7 +662,11 @@ public void Clear() public int Count { - get { if (_count > int.MaxValue) Console.WriteLine("log line count is too large"); return (int)_count; } + get + { + if (_count > int.MaxValue) Console.WriteLine("log line count is too large"); + return (int)_count; + } } public bool IsReadOnly @@ -576,7 +686,7 @@ public bool IsReadOnly public IEnumerable GetEnumeratorType(string type) { - return GetEnumeratorType(new string[] {type}); + return GetEnumeratorType(new string[] { type }); } public IEnumerable GetEnumeratorType(string[] types) @@ -584,7 +694,7 @@ public bool IsReadOnly Dictionary> instances = new Dictionary>(); types.ForEach(x => - { + { // match ACC[0] GPS[0] or ACC or GPS var m = Regex.Match(x, @"(\w+)(\[([0-9]+)\])?", RegexOptions.None); if (m.Success) @@ -609,7 +719,8 @@ public bool IsReadOnly if (!instances.ContainsKey(m.Groups[1].ToString())) instances[m.Groups[1].ToString()] = new List(); - instances[m.Groups[1].ToString()].Add(m.Groups[2].Success ? (int.Parse(m.Groups[2].ToString()) - 1).ToString() : ""); + instances[m.Groups[1].ToString()] + .Add(m.Groups[2].Success ? (int.Parse(m.Groups[2].ToString()) - 1).ToString() : ""); } }); @@ -619,7 +730,7 @@ public bool IsReadOnly { if (dflog.logformat.ContainsKey(type)) { - var typeid = (byte) dflog.logformat[type].Id; + var typeid = (byte)dflog.logformat[type].Id; foreach (var item in messageindexline[typeid]) { @@ -628,7 +739,7 @@ public bool IsReadOnly } } - if(types.Length > 1) + if (types.Length > 1) slist.Sort(); int progress = DateTime.Now.Second; @@ -640,7 +751,8 @@ public bool IsReadOnly Console.WriteLine(l); progress = DateTime.Now.Second; } - var ans = this[(long) l]; + + var ans = this[(long)l]; if (!instances.ContainsKey(ans.msgtype)) continue; var inst = instances[ans.msgtype]; @@ -651,12 +763,12 @@ public bool IsReadOnly yield return ans; } } - + public IEnumerator GetEnumerator() { int position = 0; // state while (position < Count) - { + { yield return this[position]; position++; } @@ -676,12 +788,9 @@ public void Dispose() GC.Collect(); } - public bool EndOfStream + public bool EndOfStream { - get - { - return (indexcachelineno >= (linestartoffset.Count-1)); - } + get { return (indexcachelineno >= (linestartoffset.Count - 1)); } } public List SeenMessageTypes @@ -700,7 +809,7 @@ public List SeenMessageTypes } } - public Tuple GetUnit(string type, string header) + public Tuple GetUnit(string type, string header) { var answer = UnitMultiList.Where(tuple => tuple.Item1 == type && tuple.Item2 == header); @@ -709,15 +818,15 @@ public Tuple GetUnit(string type, string header) return new Tuple(answer.First().Item3, answer.First().Item4); } - + public int getInstanceIndex(string type) { - if(!dflog.logformat.ContainsKey(type)) + if (!dflog.logformat.ContainsKey(type)) return -1; var typeno = dflog.logformat[type].Id; - if(!FMTU.ContainsKey(typeno)) + if (!FMTU.ContainsKey(typeno)) return -1; var unittypes = FMTU[typeno].Item1;