From 3685c5f132048072e168d1089f9e7371b78e743b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Sa=CC=88rkikoski?= Date: Wed, 27 Mar 2024 15:12:52 +0200 Subject: [PATCH] CSCTTV-3773 Check that database is in valid state before indexing --- .../src/Indexer/DatabasePreflightCheck.cs | 99 +++++++++++++++++++ aspnetcore/src/Indexer/Program.cs | 10 ++ .../test/Indexer.Tests/Indexer.Tests.csproj | 5 + .../Preflight/DatabasePreflightCheckTest.cs | 46 +++++++++ 4 files changed, 160 insertions(+) create mode 100644 aspnetcore/src/Indexer/DatabasePreflightCheck.cs create mode 100644 aspnetcore/test/Indexer.Tests/Preflight/DatabasePreflightCheckTest.cs diff --git a/aspnetcore/src/Indexer/DatabasePreflightCheck.cs b/aspnetcore/src/Indexer/DatabasePreflightCheck.cs new file mode 100644 index 0000000..c811e1d --- /dev/null +++ b/aspnetcore/src/Indexer/DatabasePreflightCheck.cs @@ -0,0 +1,99 @@ +using CSC.PublicApi.DatabaseContext; +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Logging; + +namespace CSC.PublicApi.Indexer; + +public class DatabasePreflightCheck +{ + private readonly ApiDbContext? _context; + private readonly ILogger? _logger; + private readonly string _logPrefix = "Database preflight check: "; + + public DatabasePreflightCheck(ApiDbContext context, ILogger logger) + { + _context = context; + _logger = logger; + } + + // Constructor without dependencies for unit testing + public DatabasePreflightCheck() + { + } + + // Most of publications should have author information linked via fact_contribution. + // Exact ratio cannot be determined, 80% is used as a baseline. + public bool FactContributionNumberOfDistinctReferencesToDimPublicationIsGood(int dimPublicationCount, int factContributionDistinctReferencesToDimPublicationCount) + { + if (factContributionDistinctReferencesToDimPublicationCount >= dimPublicationCount * 0.8) + { + return true; + } + return false; + } + public bool IsGood() + { + bool isGood = true; + if (_context != null && _logger != null) + { + _logger.LogInformation(_logPrefix + "Check that required database tables contain data for indexing"); + + // Publication count + int dimPublication_Count = _context.DimPublications.AsNoTracking().Where(dp => dp.Id > 0).Count(); + _logger.LogInformation(_logPrefix + "publications: dim_publication count = {DimPublicationCount}", dimPublication_Count); + if (dimPublication_Count == 0) + { + _logger.LogError(_logPrefix + "publications: Table dim_publication is empty"); + isGood = false; + } + + // Funding call count (dim_call_programmme in database) + int dimCallProgramme_Count = _context.DimCallProgrammes.AsNoTracking().Where(dcp => dcp.Id > 0).Count(); + _logger.LogInformation(_logPrefix + "funding calls: dim_call_programme count = {DimCallProgramme}", dimCallProgramme_Count); + if (dimCallProgramme_Count == 0) + { + _logger.LogError(_logPrefix + "funding calls: Table dim_call_programme is empty"); + isGood = false; + } + + // Funding decision count + int dimFundingDecision_Count = _context.DimFundingDecisions.AsNoTracking().Where(dfd => dfd.Id > 0).Count(); + _logger.LogInformation(_logPrefix + "funding decisions: dim_funding_decision count = {DimFundingDecision}", dimFundingDecision_Count); + if (dimFundingDecision_Count == 0) + { + _logger.LogError(_logPrefix + "funding decisions: Table dim_funding_decision is empty"); + isGood = false; + } + + // Research dataset count + int dimResearchDataset_Count = _context.DimResearchDatasets.AsNoTracking().Where(drd => drd.Id > 0).Count(); + _logger.LogInformation(_logPrefix + "research datasets: dim_research_dataset count = {DimResearchDataset}", dimResearchDataset_Count); + if (dimResearchDataset_Count == 0) + { + _logger.LogError(_logPrefix + "research datasets: Table dim_research_dataset is empty"); + isGood = false; + } + + // Publication related fact_contribution count. + // Count distinct dim_publication references in fact_contribution. + int distinctDimPublicationReferencesInFactContribution_Count = + _context.FactContributions.AsNoTracking().Where(fc => fc.DimPublicationId > 0).Select(fc => fc.DimPublicationId).Distinct().Count(); + _logger.LogInformation(_logPrefix + "publications: Number of distinct dim_publication references in fact_contribution = {DistinctDimPublicationReferencesInFactContributionCount}", distinctDimPublicationReferencesInFactContribution_Count); + if (!FactContributionNumberOfDistinctReferencesToDimPublicationIsGood(dimPublication_Count, distinctDimPublicationReferencesInFactContribution_Count)) + { + _logger.LogError(_logPrefix + "publications: Possibly too few of dim_publication references in fact_contribution"); + isGood = false; + } + + if (isGood) + { + _logger.LogInformation(_logPrefix + "status OK"); + } + else if (!isGood) + { + _logger.LogError(_logPrefix + "indexing aborted"); + } + } + return isGood; + } +} \ No newline at end of file diff --git a/aspnetcore/src/Indexer/Program.cs b/aspnetcore/src/Indexer/Program.cs index 71d883a..95f88a3 100644 --- a/aspnetcore/src/Indexer/Program.cs +++ b/aspnetcore/src/Indexer/Program.cs @@ -33,6 +33,13 @@ public static async Task Main(string[] args) // Create and configure the host to support dependency injection, configuration, etc. var consoleHost = CreateHostBuilder(args).Build(); + // Check if the database is ready for indexing. + var databasePreflightCheck = consoleHost.Services.GetRequiredService(); + if (!databasePreflightCheck.IsGood()) + { + return; + } + // Get the "Main" service which handles the indexing. var indexer = consoleHost.Services.GetRequiredService(); @@ -51,6 +58,9 @@ private static IHostBuilder CreateHostBuilder(string[] args) => Host // Register the "Main" service. services.AddTransient(); + // Register the database checker service. + services.AddTransient(); + // Register settings. services.AddSettings(hostContext.Configuration); diff --git a/aspnetcore/test/Indexer.Tests/Indexer.Tests.csproj b/aspnetcore/test/Indexer.Tests/Indexer.Tests.csproj index 2300af5..962c00b 100644 --- a/aspnetcore/test/Indexer.Tests/Indexer.Tests.csproj +++ b/aspnetcore/test/Indexer.Tests/Indexer.Tests.csproj @@ -29,6 +29,11 @@ + + + + + diff --git a/aspnetcore/test/Indexer.Tests/Preflight/DatabasePreflightCheckTest.cs b/aspnetcore/test/Indexer.Tests/Preflight/DatabasePreflightCheckTest.cs new file mode 100644 index 0000000..f78d4bd --- /dev/null +++ b/aspnetcore/test/Indexer.Tests/Preflight/DatabasePreflightCheckTest.cs @@ -0,0 +1,46 @@ + +using FluentAssertions; +using Xunit; + +namespace CSC.PublicApi.Indexer.Tests.Preflight; + +public class DatabasePreflightCheckTest +{ + public DatabasePreflightCheckTest() + { + } + + [Fact] + public void FactContributionNumberOfDistinctReferencesToDimPublicationIsGood_01() + { + // Arrange + DatabasePreflightCheck databasePreflightCheck = new DatabasePreflightCheck(); + int dimPublicationCount = 10; + int factContributionDistinctReferencesToDimPublicationCount = 7; + + // Act + bool actualResult = databasePreflightCheck.FactContributionNumberOfDistinctReferencesToDimPublicationIsGood( + dimPublicationCount, + factContributionDistinctReferencesToDimPublicationCount); + + // Assert + Assert.False(actualResult); + } + + [Fact] + public void FactContributionNumberOfDistinctReferencesToDimPublicationIsGood_02() + { + // Arrange + DatabasePreflightCheck databasePreflightCheck = new DatabasePreflightCheck(); + int dimPublicationCount = 10; + int factContributionDistinctReferencesToDimPublicationCount = 9; + + // Act + bool actualResult = databasePreflightCheck.FactContributionNumberOfDistinctReferencesToDimPublicationIsGood( + dimPublicationCount, + factContributionDistinctReferencesToDimPublicationCount); + + // Assert + Assert.True(actualResult); + } +} \ No newline at end of file