Skip to content

Commit

Permalink
Improve symbols chain fetching from files (#21)
Browse files Browse the repository at this point in the history
* Improve symbols chain fetching from files

* Minor change
  • Loading branch information
jhonabreul authored Jan 3, 2025
1 parent 7f9135a commit c67b54f
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 35 deletions.
46 changes: 20 additions & 26 deletions Lean.DataSource.DerivativeUniverseGenerator/ChainSymbolProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,13 @@ public Dictionary<Symbol, List<Symbol>> GetSymbols()

return result;
}

/// <summary>

/// <summary>
/// Gets the zip file names for the canonical symbols where the contracts or universe constituents will be read from.
/// </summary>
protected virtual IEnumerable<string> GetZipFileNames(DateTime date, Resolution resolution, TickType tickType)
protected virtual IEnumerable<string> GetZipFileNames(DateTime date, Resolution resolution)
{
var tickTypeLower = tickType.TickTypeToLower();
var tickTypesLower = _symbolsDataTickTypes.Select(tickType => tickType.TickTypeToLower()).ToArray();

if (resolution == Resolution.Minute)
{
Expand All @@ -112,8 +112,11 @@ protected virtual IEnumerable<string> GetZipFileNames(DateTime date, Resolution
var optionStyleLower = _securityType.DefaultOptionStyle().OptionStyleToLower();

return directories
.Select(directory => Path.Combine(directory, $"{dateStr}_{tickTypeLower}_{optionStyleLower}.zip"))
.Where(fileName => File.Exists(fileName));
.Select(directory => tickTypesLower
.Select(tickTypeLower => Path.Combine(directory, $"{dateStr}_{tickTypeLower}_{optionStyleLower}.zip"))
.Where(fileName => File.Exists(fileName))
.FirstOrDefault())
.Where(fileName => fileName != null);
}
// Support for resolutions higher than minute, just for Lean local repo data generation
else
Expand All @@ -125,12 +128,20 @@ protected virtual IEnumerable<string> GetZipFileNames(DateTime date, Resolution
Path.Combine(_dataSourceFolder, resolution.ResolutionToLower()),
$"*{dateStr}*.zip",
SearchOption.AllDirectories)
.Where(fileName =>
.Select(fileName =>
{
var fileInfo = new FileInfo(fileName);
var fileNameParts = fileInfo.Name.Split('_');
return fileNameParts.Length == 4 && fileNameParts[1] == dateStr && fileNameParts[2] == tickTypeLower;
});
var tickTypeIndex = Array.IndexOf(tickTypesLower, fileNameParts[2]);

return (fileName, directoryName: fileInfo.DirectoryName, tickTypeIndex);
})
// Get only supported tick type data
.Where(tuple => tuple.tickTypeIndex > -1)
// For each contract get the first matching tick type file
.OrderBy(tuple => tuple.tickTypeIndex)
.GroupBy(tuple => tuple.directoryName)
.Select(group => group.First().fileName);
}
catch (DirectoryNotFoundException)
{
Expand All @@ -139,23 +150,6 @@ protected virtual IEnumerable<string> GetZipFileNames(DateTime date, Resolution
}
}

/// <summary>
/// Gets the zip file names for the canonical symbols where the contracts or universe constituents will be read from.
/// </summary>
private IEnumerable<string> GetZipFileNames(DateTime date, Resolution resolution)
{
foreach (var tickType in _symbolsDataTickTypes)
{
var fileNames = GetZipFileNames(date, resolution, tickType).ToList();
if (fileNames.Count > 0)
{
return fileNames;
}
}

return Enumerable.Empty<string>();
}

/// <summary>
/// Reads the symbols from the zip entry names for the given canonical symbol.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,31 +36,42 @@ public FutureChainSymbolProvider(IDataCacheProvider dataCacheProvider, DateTime
{
}

protected override IEnumerable<string> GetZipFileNames(DateTime date, Resolution resolution, TickType tickType)
protected override IEnumerable<string> GetZipFileNames(DateTime date, Resolution resolution)
{
var tickTypeLower = tickType.TickTypeToLower();
var tickTypesLower = _symbolsDataTickTypes.Select(tickType => tickType.TickTypeToLower()).ToArray();

if (resolution == Resolution.Minute)
{
var basePath = Path.Combine(_dataSourceFolder, resolution.ResolutionToLower());
var dateStr = date.ToString("yyyyMMdd");

return Directory.EnumerateDirectories(basePath, "*", new EnumerationOptions() { RecurseSubdirectories = true, MaxRecursionDepth = 1 })
.Select(directory => Path.Combine(directory, $"{dateStr}_{tickTypeLower}.zip"))
.Where(fileName => File.Exists(fileName));
return Directory.EnumerateDirectories(basePath)
.Select(directory => tickTypesLower
.Select(tickTypeLower => Path.Combine(directory, $"{dateStr}_{tickTypeLower}.zip"))
.Where(fileName => File.Exists(fileName))
.FirstOrDefault())
.Where(fileName => fileName != null);
}
// Support for resolutions higher than minute, just for Lean local repo data generation
else
{
try
{
return Directory.EnumerateFiles(Path.Combine(_dataSourceFolder, resolution.ResolutionToLower()), $"*_{tickTypeLower}.zip")
.Where(fileName =>
return Directory.EnumerateFiles(Path.Combine(_dataSourceFolder, resolution.ResolutionToLower()), $"*.zip")
.Select(fileName =>
{
var fileInfo = new FileInfo(fileName);
var fileNameParts = Path.GetFileNameWithoutExtension(fileInfo.Name).Split('_');
return fileNameParts.Length == 2 && fileNameParts[1] == tickTypeLower;
});
var tickTypeIndex = Array.IndexOf(tickTypesLower, fileNameParts[1]);

return (fileName, ticker: fileNameParts[0], tickTypeIndex);
})
// Get only supported tick type data
.Where(tuple => tuple.tickTypeIndex > -1)
// For each ticker get the first matching tick type file
.OrderBy(tuple => tuple.tickTypeIndex)
.GroupBy(tuple => tuple.ticker)
.Select(group => group.First().fileName);
}
catch (DirectoryNotFoundException)
{
Expand Down

0 comments on commit c67b54f

Please sign in to comment.