Skip to content

Commit

Permalink
CSCTTV-3773 Check that database is in valid state before indexing
Browse files Browse the repository at this point in the history
  • Loading branch information
sarkikos committed Mar 27, 2024
1 parent 422b535 commit 3685c5f
Show file tree
Hide file tree
Showing 4 changed files with 160 additions and 0 deletions.
99 changes: 99 additions & 0 deletions aspnetcore/src/Indexer/DatabasePreflightCheck.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
using CSC.PublicApi.DatabaseContext;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;

namespace CSC.PublicApi.Indexer;

public class DatabasePreflightCheck
{
private readonly ApiDbContext? _context;
private readonly ILogger<DatabasePreflightCheck>? _logger;
private readonly string _logPrefix = "Database preflight check: ";

public DatabasePreflightCheck(ApiDbContext context, ILogger<DatabasePreflightCheck> logger)
{
_context = context;
_logger = logger;
}

// Constructor without dependencies for unit testing
public DatabasePreflightCheck()
{
}

// Most of publications should have author information linked via fact_contribution.
// Exact ratio cannot be determined, 80% is used as a baseline.
public bool FactContributionNumberOfDistinctReferencesToDimPublicationIsGood(int dimPublicationCount, int factContributionDistinctReferencesToDimPublicationCount)
{
if (factContributionDistinctReferencesToDimPublicationCount >= dimPublicationCount * 0.8)
{
return true;
}
return false;
}
public bool IsGood()
{
bool isGood = true;
if (_context != null && _logger != null)
{
_logger.LogInformation(_logPrefix + "Check that required database tables contain data for indexing");

// Publication count
int dimPublication_Count = _context.DimPublications.AsNoTracking().Where(dp => dp.Id > 0).Count();
_logger.LogInformation(_logPrefix + "publications: dim_publication count = {DimPublicationCount}", dimPublication_Count);
if (dimPublication_Count == 0)
{
_logger.LogError(_logPrefix + "publications: Table dim_publication is empty");
isGood = false;
}

// Funding call count (dim_call_programmme in database)
int dimCallProgramme_Count = _context.DimCallProgrammes.AsNoTracking().Where(dcp => dcp.Id > 0).Count();
_logger.LogInformation(_logPrefix + "funding calls: dim_call_programme count = {DimCallProgramme}", dimCallProgramme_Count);
if (dimCallProgramme_Count == 0)
{
_logger.LogError(_logPrefix + "funding calls: Table dim_call_programme is empty");
isGood = false;
}

// Funding decision count
int dimFundingDecision_Count = _context.DimFundingDecisions.AsNoTracking().Where(dfd => dfd.Id > 0).Count();
_logger.LogInformation(_logPrefix + "funding decisions: dim_funding_decision count = {DimFundingDecision}", dimFundingDecision_Count);
if (dimFundingDecision_Count == 0)
{
_logger.LogError(_logPrefix + "funding decisions: Table dim_funding_decision is empty");
isGood = false;
}

// Research dataset count
int dimResearchDataset_Count = _context.DimResearchDatasets.AsNoTracking().Where(drd => drd.Id > 0).Count();
_logger.LogInformation(_logPrefix + "research datasets: dim_research_dataset count = {DimResearchDataset}", dimResearchDataset_Count);
if (dimResearchDataset_Count == 0)
{
_logger.LogError(_logPrefix + "research datasets: Table dim_research_dataset is empty");
isGood = false;
}

// Publication related fact_contribution count.
// Count distinct dim_publication references in fact_contribution.
int distinctDimPublicationReferencesInFactContribution_Count =
_context.FactContributions.AsNoTracking().Where(fc => fc.DimPublicationId > 0).Select(fc => fc.DimPublicationId).Distinct().Count();
_logger.LogInformation(_logPrefix + "publications: Number of distinct dim_publication references in fact_contribution = {DistinctDimPublicationReferencesInFactContributionCount}", distinctDimPublicationReferencesInFactContribution_Count);
if (!FactContributionNumberOfDistinctReferencesToDimPublicationIsGood(dimPublication_Count, distinctDimPublicationReferencesInFactContribution_Count))
{
_logger.LogError(_logPrefix + "publications: Possibly too few of dim_publication references in fact_contribution");
isGood = false;
}

if (isGood)
{
_logger.LogInformation(_logPrefix + "status OK");
}
else if (!isGood)
{
_logger.LogError(_logPrefix + "indexing aborted");
}
}
return isGood;
}
}
10 changes: 10 additions & 0 deletions aspnetcore/src/Indexer/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ public static async Task Main(string[] args)
// Create and configure the host to support dependency injection, configuration, etc.
var consoleHost = CreateHostBuilder(args).Build();

// Check if the database is ready for indexing.
var databasePreflightCheck = consoleHost.Services.GetRequiredService<DatabasePreflightCheck>();
if (!databasePreflightCheck.IsGood())
{
return;
}

// Get the "Main" service which handles the indexing.
var indexer = consoleHost.Services.GetRequiredService<Indexer>();

Expand All @@ -51,6 +58,9 @@ private static IHostBuilder CreateHostBuilder(string[] args) => Host
// Register the "Main" service.
services.AddTransient<Indexer>();

// Register the database checker service.
services.AddTransient<DatabasePreflightCheck>();

// Register settings.
services.AddSettings(hostContext.Configuration);

Expand Down
5 changes: 5 additions & 0 deletions aspnetcore/test/Indexer.Tests/Indexer.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@
<ItemGroup>
<ProjectReference Include="..\..\src\DatabaseContext\DatabaseContext.csproj" />
<ProjectReference Include="..\..\src\Repositories\Repositories.csproj" />
<ProjectReference Include="..\..\src\Indexer\Indexer.csproj" />
</ItemGroup>

<ItemGroup>
<Folder Include="Preflight/" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@

using FluentAssertions;
using Xunit;

namespace CSC.PublicApi.Indexer.Tests.Preflight;

public class DatabasePreflightCheckTest
{
public DatabasePreflightCheckTest()
{
}

[Fact]
public void FactContributionNumberOfDistinctReferencesToDimPublicationIsGood_01()
{
// Arrange
DatabasePreflightCheck databasePreflightCheck = new DatabasePreflightCheck();
int dimPublicationCount = 10;
int factContributionDistinctReferencesToDimPublicationCount = 7;

// Act
bool actualResult = databasePreflightCheck.FactContributionNumberOfDistinctReferencesToDimPublicationIsGood(
dimPublicationCount,
factContributionDistinctReferencesToDimPublicationCount);

// Assert
Assert.False(actualResult);
}

[Fact]
public void FactContributionNumberOfDistinctReferencesToDimPublicationIsGood_02()
{
// Arrange
DatabasePreflightCheck databasePreflightCheck = new DatabasePreflightCheck();
int dimPublicationCount = 10;
int factContributionDistinctReferencesToDimPublicationCount = 9;

// Act
bool actualResult = databasePreflightCheck.FactContributionNumberOfDistinctReferencesToDimPublicationIsGood(
dimPublicationCount,
factContributionDistinctReferencesToDimPublicationCount);

// Assert
Assert.True(actualResult);
}
}

0 comments on commit 3685c5f

Please sign in to comment.