From 93ca48536f7948e114bae68f68d5d0f4dfef7f32 Mon Sep 17 00:00:00 2001 From: "Eli C. Lowry" <83078660+Enkidu93@users.noreply.github.com> Date: Wed, 9 Oct 2024 17:23:27 -0400 Subject: [PATCH] Serval-side mixed source support (#497) Mixed source support --- .../TranslationEngineServiceV1.cs | 21 +- .../Serval.Machine.Shared/Models/Corpus.cs | 14 - .../Models/MonolingualCorpus.cs | 12 + .../Models/ParallelCorpus.cs | 8 + .../Services/ITranslationEngineService.cs | 2 +- .../Services/NmtEngineService.cs | 2 +- .../Services/NmtHangfireBuildJobFactory.cs | 2 +- .../Services/PreprocessBuildJob.cs | 168 +- .../ServalTranslationEngineServiceV1.cs | 52 +- .../Services/SmtTransferEngineService.cs | 2 +- .../SmtTransferHangfireBuildJobFactory.cs | 2 +- .../Services/SmtTransferPreprocessBuildJob.cs | 2 +- .../Services/NmtEngineServiceTests.cs | 6 +- .../Services/PreprocessBuildJobTests.cs | 518 ++- .../Services/SmtTransferEngineServiceTests.cs | 37 +- .../Services/data/pt-target2/04LEVTe4.SFM | 7 + .../Services/data/pt-target2/41MATTe4.SFM | 18 + .../Services/data/pt-target2/42MRKTe4.SFM | 4 + .../Services/data/pt-target2/Settings.xml | 33 + .../data/pt-target2/TermRenderings.xml | 9 + .../Services/data/pt-target2/custom.vrs | 31 + src/Serval/src/Serval.Client/Client.g.cs | 3319 ++++++++++++----- ...iatorRegistrationConfiguratorExtensions.cs | 1 + .../IMongoDataAccessConfiguratorExtensions.cs | 7 + .../Configuration/IServalBuilderExtensions.cs | 3 + .../Consumers/GetCorpusConsumer.cs | 45 + .../Contracts/CorpusConfigDto.cs | 10 + .../Serval.DataFiles/Contracts/CorpusDto.cs | 11 + .../Contracts/CorpusFileConfigDto.cs | 7 + .../Contracts/CorpusFileDto.cs | 7 + .../Controllers/CorporaController.cs | 214 ++ .../Controllers/DataFilesController.cs | 2 +- .../src/Serval.DataFiles/Models/Corpus.cs | 11 + .../src/Serval.DataFiles/Models/CorpusFile.cs | 7 + .../Serval.DataFiles/Models/DeletedFile.cs | 2 +- .../Services/CorpusService.cs | 28 + .../Services/ICorpusService.cs | 11 + .../Protos/serval/translation/v1/engine.proto | 28 +- .../Contracts/CorpusFileResult.cs | 7 + .../Serval.Shared/Contracts/CorpusNotFound.cs | 7 + .../Serval.Shared/Contracts/CorpusResult.cs | 9 + .../src/Serval.Shared/Contracts/GetCorpus.cs | 7 + .../Serval.Shared/Controllers/Endpoints.cs | 3 + .../ParallelCorpusFilterConfigDto.cs | 8 + .../Contracts/ParallelCorpusFilterDto.cs | 8 + .../Contracts/PretranslateCorpusConfigDto.cs | 5 +- .../Contracts/PretranslateCorpusDto.cs | 5 +- .../Contracts/TrainingCorpusConfigDto.cs | 6 +- .../Contracts/TrainingCorpusDto.cs | 8 +- .../TranslationParallelCorpusConfigDto.cs | 12 + .../Contracts/TranslationParallelCorpusDto.cs | 10 + .../TranslationParallelCorpusUpdateDto.cs | 23 + .../TranslationEnginesController.cs | 488 ++- .../src/Serval.Translation/Models/Engine.cs | 3 +- .../Models/MonolingualCorpus.cs | 9 + .../Models/ParallelCorpus.cs | 8 + .../Models/ParallelCorpusFilter.cs | 8 + .../Models/PretranslateCorpus.cs | 5 +- .../Models/TrainingCorpus.cs | 6 +- .../Services/EngineService.cs | 414 +- .../Services/IEngineService.cs | 14 + .../TranslationEngineTests.cs | 535 ++- .../Services/CorpusServiceTests.cs | 57 + .../Services/EngineServiceTests.cs | 1314 ++++++- 64 files changed, 6257 insertions(+), 1385 deletions(-) delete mode 100644 src/Machine/src/Serval.Machine.Shared/Models/Corpus.cs create mode 100644 src/Machine/src/Serval.Machine.Shared/Models/MonolingualCorpus.cs create mode 100644 src/Machine/src/Serval.Machine.Shared/Models/ParallelCorpus.cs create mode 100644 src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/04LEVTe4.SFM create mode 100644 src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/41MATTe4.SFM create mode 100644 src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/42MRKTe4.SFM create mode 100644 src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/Settings.xml create mode 100644 src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/TermRenderings.xml create mode 100644 src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/custom.vrs create mode 100644 src/Serval/src/Serval.DataFiles/Consumers/GetCorpusConsumer.cs create mode 100644 src/Serval/src/Serval.DataFiles/Contracts/CorpusConfigDto.cs create mode 100644 src/Serval/src/Serval.DataFiles/Contracts/CorpusDto.cs create mode 100644 src/Serval/src/Serval.DataFiles/Contracts/CorpusFileConfigDto.cs create mode 100644 src/Serval/src/Serval.DataFiles/Contracts/CorpusFileDto.cs create mode 100644 src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs create mode 100644 src/Serval/src/Serval.DataFiles/Models/Corpus.cs create mode 100644 src/Serval/src/Serval.DataFiles/Models/CorpusFile.cs create mode 100644 src/Serval/src/Serval.DataFiles/Services/CorpusService.cs create mode 100644 src/Serval/src/Serval.DataFiles/Services/ICorpusService.cs create mode 100644 src/Serval/src/Serval.Shared/Contracts/CorpusFileResult.cs create mode 100644 src/Serval/src/Serval.Shared/Contracts/CorpusNotFound.cs create mode 100644 src/Serval/src/Serval.Shared/Contracts/CorpusResult.cs create mode 100644 src/Serval/src/Serval.Shared/Contracts/GetCorpus.cs create mode 100644 src/Serval/src/Serval.Translation/Contracts/ParallelCorpusFilterConfigDto.cs create mode 100644 src/Serval/src/Serval.Translation/Contracts/ParallelCorpusFilterDto.cs create mode 100644 src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusConfigDto.cs create mode 100644 src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusDto.cs create mode 100644 src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusUpdateDto.cs create mode 100644 src/Serval/src/Serval.Translation/Models/MonolingualCorpus.cs create mode 100644 src/Serval/src/Serval.Translation/Models/ParallelCorpus.cs create mode 100644 src/Serval/src/Serval.Translation/Models/ParallelCorpusFilter.cs create mode 100644 src/Serval/test/Serval.DataFiles.Tests/Services/CorpusServiceTests.cs diff --git a/src/Echo/src/EchoTranslationEngine/TranslationEngineServiceV1.cs b/src/Echo/src/EchoTranslationEngine/TranslationEngineServiceV1.cs index 8a348c8d..67779bc0 100644 --- a/src/Echo/src/EchoTranslationEngine/TranslationEngineServiceV1.cs +++ b/src/Echo/src/EchoTranslationEngine/TranslationEngineServiceV1.cs @@ -80,21 +80,22 @@ await client.BuildStartedAsync( client.InsertPretranslations(cancellationToken: cancellationToken) ) { - foreach (Corpus corpus in request.Corpora) + foreach (ParallelCorpus corpus in request.Corpora) { - if (!corpus.PretranslateAll && corpus.PretranslateTextIds.Count == 0) - continue; - var sourceFiles = corpus - .SourceFiles.Where(f => - (corpus.PretranslateAll || corpus.PretranslateTextIds.Contains(f.TextId)) - && f.Format == FileFormat.Text + .SourceCorpora.SelectMany(sc => + sc.Files.Where(f => + (sc.PretranslateTextIds is null || sc.PretranslateTextIds.Contains(f.TextId)) + && f.Format == FileFormat.Text + ) ) .ToDictionary(f => f.TextId, f => f.Location); var targetFiles = corpus - .TargetFiles.Where(f => - (corpus.PretranslateAll || corpus.PretranslateTextIds.Contains(f.TextId)) - && f.Format == FileFormat.Text + .TargetCorpora.SelectMany(tc => + tc.Files.Where(f => + (tc.PretranslateTextIds is null || tc.PretranslateTextIds.Contains(f.TextId)) + && f.Format == FileFormat.Text + ) ) .ToDictionary(f => f.TextId, f => f.Location); diff --git a/src/Machine/src/Serval.Machine.Shared/Models/Corpus.cs b/src/Machine/src/Serval.Machine.Shared/Models/Corpus.cs deleted file mode 100644 index 9145e90d..00000000 --- a/src/Machine/src/Serval.Machine.Shared/Models/Corpus.cs +++ /dev/null @@ -1,14 +0,0 @@ -namespace Serval.Machine.Shared.Models; - -public record Corpus -{ - public required string Id { get; init; } - public required string SourceLanguage { get; init; } - public required string TargetLanguage { get; init; } - public IReadOnlyDictionary>? TrainOnChapters { get; init; } - public IReadOnlyDictionary>? PretranslateChapters { get; init; } - public required HashSet? TrainOnTextIds { get; init; } - public required HashSet? PretranslateTextIds { get; init; } - public required IReadOnlyList SourceFiles { get; init; } - public required IReadOnlyList TargetFiles { get; init; } -} diff --git a/src/Machine/src/Serval.Machine.Shared/Models/MonolingualCorpus.cs b/src/Machine/src/Serval.Machine.Shared/Models/MonolingualCorpus.cs new file mode 100644 index 00000000..2b4a1612 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Models/MonolingualCorpus.cs @@ -0,0 +1,12 @@ +namespace Serval.Machine.Shared.Models; + +public record MonolingualCorpus +{ + public required string Id { get; set; } + public required string Language { get; set; } + public required IReadOnlyList Files { get; set; } + public HashSet? TrainOnTextIds { get; set; } + public Dictionary>? TrainOnChapters { get; set; } + public HashSet? PretranslateTextIds { get; set; } + public Dictionary>? PretranslateChapters { get; set; } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Models/ParallelCorpus.cs b/src/Machine/src/Serval.Machine.Shared/Models/ParallelCorpus.cs new file mode 100644 index 00000000..a28dfc14 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Models/ParallelCorpus.cs @@ -0,0 +1,8 @@ +namespace Serval.Machine.Shared.Models; + +public record ParallelCorpus +{ + public required string Id { get; set; } + public IReadOnlyList SourceCorpora { get; set; } = new List(); + public IReadOnlyList TargetCorpora { get; set; } = new List(); +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ITranslationEngineService.cs b/src/Machine/src/Serval.Machine.Shared/Services/ITranslationEngineService.cs index 71ed5d94..b9e64472 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ITranslationEngineService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ITranslationEngineService.cs @@ -35,7 +35,7 @@ Task StartBuildAsync( string engineId, string buildId, string? buildOptions, - IReadOnlyList corpora, + IReadOnlyList corpora, CancellationToken cancellationToken = default ); diff --git a/src/Machine/src/Serval.Machine.Shared/Services/NmtEngineService.cs b/src/Machine/src/Serval.Machine.Shared/Services/NmtEngineService.cs index fc1c2c95..0dd66544 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/NmtEngineService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/NmtEngineService.cs @@ -69,7 +69,7 @@ public async Task StartBuildAsync( string engineId, string buildId, string? buildOptions, - IReadOnlyList corpora, + IReadOnlyList corpora, CancellationToken cancellationToken = default ) { diff --git a/src/Machine/src/Serval.Machine.Shared/Services/NmtHangfireBuildJobFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/NmtHangfireBuildJobFactory.cs index a8b3d52f..4d250188 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/NmtHangfireBuildJobFactory.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/NmtHangfireBuildJobFactory.cs @@ -11,7 +11,7 @@ public Job CreateJob(string engineId, string buildId, BuildStage stage, object? return stage switch { BuildStage.Preprocess - => CreateJob>( + => CreateJob>( engineId, buildId, "nmt", diff --git a/src/Machine/src/Serval.Machine.Shared/Services/PreprocessBuildJob.cs b/src/Machine/src/Serval.Machine.Shared/Services/PreprocessBuildJob.cs index 97e5fc77..d9e433ce 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/PreprocessBuildJob.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/PreprocessBuildJob.cs @@ -1,6 +1,6 @@ namespace Serval.Machine.Shared.Services; -public class PreprocessBuildJob : HangfireBuildJob> +public class PreprocessBuildJob : HangfireBuildJob> { private static readonly JsonWriterOptions PretranslateWriterOptions = new() { Indented = true }; @@ -43,7 +43,7 @@ internal int Seed protected override async Task DoWorkAsync( string engineId, string buildId, - IReadOnlyList data, + IReadOnlyList data, string? buildOptions, CancellationToken cancellationToken ) @@ -99,7 +99,7 @@ CancellationToken cancellationToken private async Task<(int TrainCount, int PretranslateCount)> WriteDataFilesAsync( string buildId, - IReadOnlyList corpora, + IReadOnlyList corpora, string? buildOptions, CancellationToken cancellationToken ) @@ -121,17 +121,63 @@ CancellationToken cancellationToken int trainCount = 0; int pretranslateCount = 0; pretranslateWriter.WriteStartArray(); - foreach (Corpus corpus in corpora) + foreach (ParallelCorpus corpus in corpora) { - ITextCorpus[] sourceTextCorpora = _corpusService.CreateTextCorpora(corpus.SourceFiles).ToArray(); - ITextCorpus targetTextCorpus = - _corpusService.CreateTextCorpora(corpus.TargetFiles).FirstOrDefault() ?? new DictionaryTextCorpus(); + (MonolingualCorpus Corpus, ITextCorpus TextCorpus)[] sourceCorpora = corpus + .SourceCorpora.SelectMany(c => _corpusService.CreateTextCorpora(c.Files).Select(tc => (c, tc))) + .ToArray(); + ITextCorpus[] sourceTrainingCorpora = sourceCorpora + .Select(sc => + { + ITextCorpus textCorpus = sc.TextCorpus; + if (sc.Corpus.TrainOnTextIds is not null) + textCorpus = textCorpus.FilterTexts(sc.Corpus.TrainOnTextIds); + return textCorpus.Where(row => + row.Ref is not ScriptureRef sr + || sc.Corpus.TrainOnChapters is null + || IsInChapters(sr, sc.Corpus.TrainOnChapters) + ); + }) + .ToArray(); + ITextCorpus[] sourcePretranslateCorpora = sourceCorpora + .Select(sc => + { + ITextCorpus textCorpus = sc.TextCorpus; + if (sc.Corpus.PretranslateTextIds is not null) + textCorpus = textCorpus.FilterTexts(sc.Corpus.PretranslateTextIds); + return textCorpus.Where(row => + row.Ref is not ScriptureRef sr + || sc.Corpus.PretranslateChapters is null + || ( + IsInChapters(sr, sc.Corpus.PretranslateChapters) + && !IsInChapters(sr, sc.Corpus.TrainOnChapters ?? new()) + ) + ); + }) + .ToArray(); + + (MonolingualCorpus Corpus, ITextCorpus TextCorpus)[] targetCorpora = corpus + .TargetCorpora.SelectMany(c => _corpusService.CreateTextCorpora(c.Files).Select(tc => (c, tc))) + .ToArray(); + ITextCorpus[] targetTrainingCorpora = targetCorpora + .Select(tc => + { + ITextCorpus textCorpus = tc.TextCorpus; + if (tc.Corpus.TrainOnTextIds is not null) + textCorpus = textCorpus.FilterTexts(tc.Corpus.TrainOnTextIds); + return textCorpus.Where(row => + row.Ref is not ScriptureRef sr + || tc.Corpus.TrainOnChapters is null + || IsInChapters(sr, tc.Corpus.TrainOnChapters) + ); + }) + .ToArray(); - if (sourceTextCorpora.Length == 0) + if (sourceCorpora.Length == 0) continue; int skipCount = 0; - foreach (Row?[] rows in AlignTrainCorpus(corpus, sourceTextCorpora, targetTextCorpus)) + foreach (Row?[] rows in AlignTrainCorpus(sourceTrainingCorpora, targetTrainingCorpora)) { if (skipCount > 0) { @@ -139,15 +185,30 @@ CancellationToken cancellationToken continue; } - Row[] trainRows = rows.Where(r => r is not null && IsInTrain(r, corpus)).Cast().ToArray(); + Row[] trainRows = rows.Where(r => r is not null).Cast().ToArray(); if (trainRows.Length > 0) { Row row = trainRows[0]; if (rows.Length > 1) { Row[] nonEmptyRows = trainRows.Where(r => r.SourceSegment.Length > 0).ToArray(); + Row[] targetNonEmptyRows = nonEmptyRows.Where(r => r.TargetSegment.Length > 0).ToArray(); + if (targetNonEmptyRows.Length > 0) + nonEmptyRows = targetNonEmptyRows; if (nonEmptyRows.Length > 0) - row = nonEmptyRows[_random.Next(nonEmptyRows.Length)]; + { + nonEmptyRows = nonEmptyRows + .GroupBy(r => r.SourceSegment) + .Select(group => group.First()) + .ToArray(); + { + nonEmptyRows = nonEmptyRows + .GroupBy(r => r.SourceSegment) + .Select(group => group.First()) + .ToArray(); + row = nonEmptyRows[_random.Next(nonEmptyRows.Length)]; + } + } } await sourceTrainWriter.WriteAsync($"{row.SourceSegment}\n"); @@ -160,8 +221,12 @@ CancellationToken cancellationToken if ((bool?)buildOptionsObject?["use_key_terms"] ?? true) { - ITextCorpus? sourceTermCorpus = _corpusService.CreateTermCorpora(corpus.SourceFiles).FirstOrDefault(); - ITextCorpus? targetTermCorpus = _corpusService.CreateTermCorpora(corpus.TargetFiles).FirstOrDefault(); + ITextCorpus? sourceTermCorpus = _corpusService + .CreateTermCorpora(corpus.SourceCorpora.SelectMany(sc => sc.Files).ToList()) + .FirstOrDefault(); + ITextCorpus? targetTermCorpus = _corpusService + .CreateTermCorpora(corpus.TargetCorpora.SelectMany(tc => tc.Files).ToList()) + .FirstOrDefault(); if (sourceTermCorpus is not null && targetTermCorpus is not null) { IParallelTextCorpus parallelKeyTermsCorpus = sourceTermCorpus.AlignRows(targetTermCorpus); @@ -174,13 +239,9 @@ CancellationToken cancellationToken } } - foreach (Row row in AlignPretranslateCorpus(corpus, sourceTextCorpora[0], targetTextCorpus)) + foreach (Row row in AlignPretranslateCorpus(sourcePretranslateCorpora, targetCorpora[0].TextCorpus)) { - if ( - IsInPretranslate(row, corpus) - && row.SourceSegment.Length > 0 - && (row.TargetSegment.Length == 0 || !IsInTrain(row, corpus)) - ) + if (row.SourceSegment.Length > 0) { pretranslateWriter.WriteStartObject(); pretranslateWriter.WriteString("corpusId", corpus.Id); @@ -201,10 +262,17 @@ CancellationToken cancellationToken return (trainCount, pretranslateCount); } + private static bool IsInChapters(ScriptureRef sr, Dictionary> selection) + { + return selection.TryGetValue(sr.Book, out HashSet? chapters) + && chapters != null + && (chapters.Count == 0 || chapters.Contains(sr.ChapterNum)); + } + protected override async Task CleanupAsync( string engineId, string buildId, - IReadOnlyList data, + IReadOnlyList data, JobCompletionStatus completionStatus ) { @@ -221,62 +289,25 @@ JobCompletionStatus completionStatus } } - private static bool IsInTrain(Row row, Corpus corpus) - { - return IsIncluded(row, corpus.TrainOnTextIds, corpus.TrainOnChapters); - } - - private static bool IsInPretranslate(Row row, Corpus corpus) - { - return IsIncluded(row, corpus.PretranslateTextIds, corpus.PretranslateChapters); - } - - private static bool IsIncluded( - Row? row, - IReadOnlySet? textIds, - IReadOnlyDictionary>? chapters - ) - { - if (row is null) - return false; - if (chapters is not null) - return row.Refs.Any(r => IsInChapters(chapters, r)); - if (textIds is not null) - return textIds.Contains(row.TextId); - return true; - } - - private static bool IsInChapters(IReadOnlyDictionary> bookChapters, object rowRef) - { - if (rowRef is not ScriptureRef sr) - return false; - return bookChapters.TryGetValue(sr.Book, out HashSet? chapters) - && (chapters.Contains(sr.ChapterNum) || chapters.Count == 0); - } - private static IEnumerable AlignTrainCorpus( - Corpus corpus, IReadOnlyList srcCorpora, - ITextCorpus trgCorpus + IReadOnlyList trgCorpora ) { - IEnumerable? textIds = corpus.TrainOnChapters is not null - ? corpus.TrainOnChapters.Keys - : corpus.TrainOnTextIds; - srcCorpora = srcCorpora.Select(sc => sc.FilterTexts(textIds).Transform(CleanSegment)).ToArray(); - trgCorpus = trgCorpus.FilterTexts(textIds).Transform(CleanSegment); + srcCorpora = srcCorpora.Select(sc => sc.Transform(CleanSegment)).ToArray(); + trgCorpora = trgCorpora.Select(tc => tc.Transform(CleanSegment)).ToArray(); - if (trgCorpus.IsScripture()) + if (trgCorpora.All(tc => tc.IsScripture())) { return srcCorpora - .Select(sc => AlignScripture(sc, trgCorpus)) + .SelectMany(sc => trgCorpora.Select(tc => AlignScripture(sc, tc))) .ZipMany(rows => rows.ToArray()) // filter out every list that only contains completely empty rows .Where(rows => rows.Any(r => r is null || r.SourceSegment.Length > 0 || r.TargetSegment.Length > 0)); } IEnumerable sourceOnlyRows = srcCorpora - .Select(sc => sc.AlignRows(trgCorpus, allSourceRows: true)) + .SelectMany(sc => trgCorpora.Select(tc => sc.AlignRows(tc, allSourceRows: true))) .ZipMany(rows => rows.Where(r => r.TargetSegment.Count == 0) .Select(r => new Row(r.TextId, r.Refs, r.SourceText, r.TargetText, 1)) @@ -284,7 +315,7 @@ ITextCorpus trgCorpus ); IEnumerable targetRows = srcCorpora - .Select(sc => sc.AlignRows(trgCorpus, allTargetRows: true)) + .SelectMany(sc => trgCorpora.Select(tc => sc.AlignRows(tc, allTargetRows: true))) .ZipMany(rows => rows.Where(r => r.TargetSegment.Count > 0) .Select(r => new Row(r.TextId, r.Refs, r.SourceText, r.TargetText, 1)) @@ -379,19 +410,14 @@ ITextCorpus trgCorpus } } - private static IEnumerable AlignPretranslateCorpus(Corpus corpus, ITextCorpus srcCorpus, ITextCorpus trgCorpus) + private static IEnumerable AlignPretranslateCorpus(ITextCorpus[] srcCorpora, ITextCorpus trgCorpus) { - IEnumerable? textIds = corpus.PretranslateChapters is not null - ? corpus.PretranslateChapters.Keys - : corpus.PretranslateTextIds; - srcCorpus = srcCorpus.FilterTexts(textIds).Transform(CleanSegment); - trgCorpus = trgCorpus.FilterTexts(textIds).Transform(CleanSegment); int rowCount = 0; StringBuilder srcSegBuffer = new(); StringBuilder trgSegBuffer = new(); List refs = []; string textId = ""; - foreach (ParallelTextRow row in srcCorpus.AlignRows(trgCorpus, allSourceRows: true)) + foreach (ParallelTextRow row in srcCorpora.SelectMany(sc => sc.AlignRows(trgCorpus, allSourceRows: true))) { if (!row.IsTargetRangeStart && row.IsTargetInRange) { diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationEngineServiceV1.cs b/src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationEngineServiceV1.cs index 049889b9..bced613b 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationEngineServiceV1.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationEngineServiceV1.cs @@ -91,7 +91,7 @@ await engineService.TrainSegmentPairAsync( public override async Task StartBuild(StartBuildRequest request, ServerCallContext context) { ITranslationEngineService engineService = GetEngineService(request.EngineType); - Models.Corpus[] corpora = request.Corpora.Select(Map).ToArray(); + Models.ParallelCorpus[] corpora = request.Corpora.Select(Map).ToArray(); try { await engineService.StartBuildAsync( @@ -269,32 +269,41 @@ private static Translation.V1.Phrase Map(SIL.Machine.Translation.Phrase source) }; } - private static Models.Corpus Map(Translation.V1.Corpus source) + private static Models.ParallelCorpus Map(Translation.V1.ParallelCorpus source) { - var pretranslateChapters = source.PretranslateChapters.ToDictionary( + return new Models.ParallelCorpus + { + Id = source.Id, + SourceCorpora = source.SourceCorpora.Select(Map).ToList(), + TargetCorpora = source.TargetCorpora.Select(Map).ToList() + }; + } + + private static Models.MonolingualCorpus Map(Translation.V1.MonolingualCorpus source) + { + var trainOnChapters = source.TrainOnChapters.ToDictionary( kvp => kvp.Key, kvp => kvp.Value.Chapters.ToHashSet() ); - FilterChoice pretranslateFilter = GetFilterChoice(source.PretranslateAll, pretranslateChapters); + var trainOnTextIds = source.TrainOnTextIds.ToHashSet(); + FilterChoice trainingFilter = GetFilterChoice(trainOnChapters, trainOnTextIds); - var trainOnChapters = source.TrainOnChapters.ToDictionary( + var pretranslateChapters = source.PretranslateChapters.ToDictionary( kvp => kvp.Key, kvp => kvp.Value.Chapters.ToHashSet() ); - FilterChoice trainingFilter = GetFilterChoice(source.TrainOnAll, trainOnChapters); + var pretranslateTextIds = source.PretranslateTextIds.ToHashSet(); + FilterChoice pretranslateFilter = GetFilterChoice(pretranslateChapters, pretranslateTextIds); - return new Models.Corpus + return new Models.MonolingualCorpus { Id = source.Id, - SourceLanguage = source.SourceLanguage, - TargetLanguage = source.TargetLanguage, + Language = source.Language, + Files = source.Files.Select(Map).ToList(), TrainOnChapters = trainingFilter == FilterChoice.Chapters ? trainOnChapters : null, + TrainOnTextIds = trainingFilter == FilterChoice.TextIds ? trainOnTextIds : null, PretranslateChapters = pretranslateFilter == FilterChoice.Chapters ? pretranslateChapters : null, - TrainOnTextIds = trainingFilter == FilterChoice.TextIds ? source.TrainOnTextIds.ToHashSet() : null, - PretranslateTextIds = - pretranslateFilter == FilterChoice.TextIds ? source.PretranslateTextIds.ToHashSet() : null, - SourceFiles = source.SourceFiles.Select(Map).ToList(), - TargetFiles = source.TargetFiles.Select(Map).ToList() + PretranslateTextIds = pretranslateFilter == FilterChoice.TextIds ? pretranslateTextIds : null }; } @@ -315,16 +324,17 @@ private enum FilterChoice None } - private static FilterChoice GetFilterChoice(bool all, IReadOnlyDictionary> chapters) + private static FilterChoice GetFilterChoice( + IReadOnlyDictionary> chapters, + HashSet textIds + ) { - if (all) - return FilterChoice.None; - // Only either textIds or Scripture Range will be used at a time // TextIds may be an empty array, so prefer that if both are empty (which applies to both scripture and text) - if (chapters.Count == 0) + if (chapters is null && textIds is null) + return FilterChoice.None; + if (chapters is null || chapters.Count == 0) return FilterChoice.TextIds; - else - return FilterChoice.Chapters; + return FilterChoice.Chapters; } } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferEngineService.cs b/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferEngineService.cs index 5789d67d..7c4f10b4 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferEngineService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferEngineService.cs @@ -180,7 +180,7 @@ public async Task StartBuildAsync( string engineId, string buildId, string? buildOptions, - IReadOnlyList corpora, + IReadOnlyList corpora, CancellationToken cancellationToken = default ) { diff --git a/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferHangfireBuildJobFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferHangfireBuildJobFactory.cs index 2d9bf00c..71f2d09a 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferHangfireBuildJobFactory.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferHangfireBuildJobFactory.cs @@ -11,7 +11,7 @@ public Job CreateJob(string engineId, string buildId, BuildStage stage, object? return stage switch { BuildStage.Preprocess - => CreateJob>( + => CreateJob>( engineId, buildId, "smt_transfer", diff --git a/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferPreprocessBuildJob.cs b/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferPreprocessBuildJob.cs index 9e14037a..b9393e9b 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferPreprocessBuildJob.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferPreprocessBuildJob.cs @@ -27,7 +27,7 @@ IRepository trainSegmentPairs protected override async Task InitializeAsync( string engineId, string buildId, - IReadOnlyList data, + IReadOnlyList data, CancellationToken cancellationToken ) { diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/NmtEngineServiceTests.cs b/src/Machine/test/Serval.Machine.Shared.Tests/Services/NmtEngineServiceTests.cs index 19a15a25..67145c01 100644 --- a/src/Machine/test/Serval.Machine.Shared.Tests/Services/NmtEngineServiceTests.cs +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/NmtEngineServiceTests.cs @@ -9,7 +9,7 @@ public async Task StartBuildAsync() using var env = new TestEnvironment(); TranslationEngine engine = env.Engines.Get("engine1"); Assert.That(engine.BuildRevision, Is.EqualTo(1)); - await env.Service.StartBuildAsync("engine1", "build1", "{}", Array.Empty()); + await env.Service.StartBuildAsync("engine1", "build1", "{}", Array.Empty()); await env.WaitForBuildToFinishAsync(); engine = env.Engines.Get("engine1"); Assert.Multiple(() => @@ -28,7 +28,7 @@ public async Task CancelBuildAsync_Building() TranslationEngine engine = env.Engines.Get("engine1"); Assert.That(engine.BuildRevision, Is.EqualTo(1)); - await env.Service.StartBuildAsync("engine1", "build1", "{}", Array.Empty()); + await env.Service.StartBuildAsync("engine1", "build1", "{}", Array.Empty()); await env.WaitForBuildToStartAsync(); engine = env.Engines.Get("engine1"); Assert.That(engine.CurrentBuild, Is.Not.Null); @@ -55,7 +55,7 @@ public async Task DeleteAsync_WhileBuilding() TranslationEngine engine = env.Engines.Get("engine1"); Assert.That(engine.BuildRevision, Is.EqualTo(1)); - await env.Service.StartBuildAsync("engine1", "build1", "{}", Array.Empty()); + await env.Service.StartBuildAsync("engine1", "build1", "{}", Array.Empty()); await env.WaitForBuildToStartAsync(); engine = env.Engines.Get("engine1"); Assert.That(engine.CurrentBuild, Is.Not.Null); diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs b/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs index df7498ee..539b9c4c 100644 --- a/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs @@ -7,7 +7,7 @@ public class PreprocessBuildJobTests public async Task RunAsync_FilterOutEverything() { using TestEnvironment env = new(); - Corpus corpus1 = env.DefaultTextFileCorpus with { }; + ParallelCorpus corpus1 = env.DefaultTextFileCorpus with { }; await env.RunBuildJobAsync(corpus1); @@ -25,7 +25,7 @@ public async Task RunAsync_FilterOutEverything() public async Task RunAsync_TrainOnAll() { using TestEnvironment env = new(); - Corpus corpus1 = env.DefaultTextFileCorpus with { TrainOnTextIds = null }; + ParallelCorpus corpus1 = TestEnvironment.TextFileCorpus(trainOnTextIds: null, pretranslateTextIds: []); await env.RunBuildJobAsync(corpus1); @@ -43,7 +43,7 @@ public async Task RunAsync_TrainOnAll() public async Task RunAsync_TrainOnTextIds() { using TestEnvironment env = new(); - Corpus corpus1 = env.DefaultTextFileCorpus with { TrainOnTextIds = ["textId1"] }; + ParallelCorpus corpus1 = TestEnvironment.TextFileCorpus(trainOnTextIds: ["textId1"], pretranslateTextIds: []); await env.RunBuildJobAsync(corpus1); @@ -61,18 +61,18 @@ public async Task RunAsync_TrainOnTextIds() public async Task RunAsync_TrainAndPretranslateAll() { using TestEnvironment env = new(); - Corpus corpus1 = env.DefaultTextFileCorpus with { PretranslateTextIds = null, TrainOnTextIds = null }; + ParallelCorpus corpus1 = TestEnvironment.TextFileCorpus(trainOnTextIds: null, pretranslateTextIds: null); await env.RunBuildJobAsync(corpus1); - Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(2)); + Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(4)); } [Test] public async Task RunAsync_PretranslateAll() { using TestEnvironment env = new(); - Corpus corpus1 = env.DefaultTextFileCorpus with { PretranslateTextIds = null }; + ParallelCorpus corpus1 = TestEnvironment.TextFileCorpus(trainOnTextIds: [], pretranslateTextIds: null); await env.RunBuildJobAsync(corpus1); @@ -83,18 +83,18 @@ public async Task RunAsync_PretranslateAll() public async Task RunAsync_PretranslateTextIds() { using TestEnvironment env = new(); - Corpus corpus1 = env.DefaultTextFileCorpus with { PretranslateTextIds = ["textId1"], TrainOnTextIds = null }; + ParallelCorpus corpus1 = TestEnvironment.TextFileCorpus(pretranslateTextIds: ["textId1"], trainOnTextIds: null); await env.RunBuildJobAsync(corpus1); - Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(2)); + Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(4)); } [Test] public async Task RunAsync_EnableKeyTerms() { using TestEnvironment env = new(); - Corpus corpus1 = env.DefaultParatextCorpus with { }; + ParallelCorpus corpus1 = env.DefaultParatextCorpus; await env.RunBuildJobAsync(corpus1, useKeyTerms: true); @@ -112,7 +112,7 @@ public async Task RunAsync_EnableKeyTerms() public async Task RunAsync_DisableKeyTerms() { using TestEnvironment env = new(); - Corpus corpus1 = env.DefaultParatextCorpus with { }; + ParallelCorpus corpus1 = env.DefaultParatextCorpus; await env.RunBuildJobAsync(corpus1, useKeyTerms: false); @@ -130,16 +130,16 @@ public async Task RunAsync_DisableKeyTerms() public async Task RunAsync_PretranslateChapters() { using TestEnvironment env = new(); - Corpus corpus1 = env.DefaultParatextCorpus with - { - PretranslateChapters = new Dictionary> + ParallelCorpus corpus1 = env.ParatextCorpus( + trainOnChapters: [], + pretranslateChapters: new Dictionary> { { "1CH", new HashSet { 12 } } } - }; + ); await env.RunBuildJobAsync(corpus1); @@ -150,16 +150,16 @@ public async Task RunAsync_PretranslateChapters() public async Task RunAsync_TrainOnChapters() { using TestEnvironment env = new(); - Corpus corpus1 = env.DefaultParatextCorpus with - { - TrainOnChapters = new Dictionary> + ParallelCorpus corpus1 = env.ParatextCorpus( + trainOnChapters: new Dictionary> { { "MAT", new HashSet { 1 } } - } - }; + }, + pretranslateChapters: [] + ); await env.RunBuildJobAsync(corpus1, useKeyTerms: false); @@ -177,11 +177,7 @@ public async Task RunAsync_TrainOnChapters() public async Task RunAsync_MixedSource_Paratext() { using TestEnvironment env = new(); - Corpus corpus1 = env.DefaultMixedSourceParatextCorpus with - { - TrainOnTextIds = null, - PretranslateTextIds = null - }; + ParallelCorpus corpus1 = env.DefaultMixedSourceParatextCorpus; await env.RunBuildJobAsync(corpus1, useKeyTerms: false); @@ -193,20 +189,14 @@ public async Task RunAsync_MixedSource_Paratext() Assert.That(trgCount, Is.EqualTo(1)); Assert.That(termCount, Is.EqualTo(0)); }); - Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(13)); + Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(56)); } [Test] public async Task RunAsync_MixedSource_Text() { using TestEnvironment env = new(); - Corpus corpus1 = env.DefaultMixedSourceTextFileCorpus with - { - TrainOnTextIds = null, - PretranslateTextIds = null, - TrainOnChapters = null, - PretranslateChapters = null - }; + ParallelCorpus corpus1 = env.DefaultMixedSourceTextFileCorpus; await env.RunBuildJobAsync(corpus1); @@ -218,14 +208,14 @@ public async Task RunAsync_MixedSource_Text() Assert.That(trgCount, Is.EqualTo(1)); Assert.That(termCount, Is.EqualTo(0)); }); - Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(2)); + Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(9)); } [Test] public void RunAsync_UnknownLanguageTagsNoData() { using TestEnvironment env = new(); - Corpus corpus1 = env.DefaultTextFileCorpus with { SourceLanguage = "xxx", TargetLanguage = "zzz" }; + ParallelCorpus corpus1 = TestEnvironment.TextFileCorpus(sourceLanguage: "xxx", targetLanguage: "zzz"); Assert.ThrowsAsync(async () => { @@ -237,7 +227,7 @@ public void RunAsync_UnknownLanguageTagsNoData() public async Task RunAsync_UnknownLanguageTagsNoDataSmtTransfer() { using TestEnvironment env = new(); - Corpus corpus1 = env.DefaultTextFileCorpus with { SourceLanguage = "xxx", TargetLanguage = "zzz" }; + ParallelCorpus corpus1 = TestEnvironment.TextFileCorpus(sourceLanguage: "xxx", targetLanguage: "zzz"); await env.RunBuildJobAsync(corpus1, engineId: "engine2", engineType: TranslationEngineType.SmtTransfer); } @@ -246,23 +236,22 @@ public async Task RunAsync_UnknownLanguageTagsNoDataSmtTransfer() public async Task RunAsync_RemoveFreestandingEllipses() { using TestEnvironment env = new(); - Corpus corpus1 = env.DefaultParatextCorpus with - { - TrainOnChapters = new Dictionary> + ParallelCorpus corpus1 = env.ParatextCorpus( + trainOnChapters: new Dictionary> { { "MAT", new HashSet() { 2 } } }, - PretranslateChapters = new Dictionary> + pretranslateChapters: new Dictionary> { { "MAT", new HashSet() { 2 } } } - }; + ); await env.RunBuildJobAsync(corpus1, useKeyTerms: false); string sourceExtract = await env.GetSourceExtractAsync(); Assert.That( @@ -276,21 +265,19 @@ public async Task RunAsync_RemoveFreestandingEllipses() Is.EqualTo("Target one, chapter two, verse one.\n\nTarget one, chapter two, verse three.\n"), targetExtract ); - JsonArray? pretranslations = await env.GetPretranslationAsync(); + JsonArray? pretranslations = await env.GetPretranslationsAsync(); Assert.That(pretranslations, Is.Not.Null); - Assert.That(pretranslations.Count, Is.EqualTo(1)); - Assert.That(pretranslations[0]!["translation"]!.ToString(), Is.EqualTo("Source one, chapter two, verse two.")); + Assert.That(pretranslations.Count, Is.EqualTo(0)); } [Test] public void RunAsync_OnlyParseSelectedBooks_NoBadBooks() { using TestEnvironment env = new(); - Corpus corpus = env.DefaultParatextCorpus with - { - TrainOnTextIds = new() { "LEV" }, - PretranslateTextIds = new() { "MRK" } - }; + ParallelCorpus corpus = env.ParatextCorpus( + trainOnTextIds: new() { "LEV" }, + pretranslateTextIds: new() { "MRK" } + ); env.CorpusService = Substitute.For(); env.CorpusService.CreateTextCorpora(Arg.Any>()) @@ -310,11 +297,10 @@ public void RunAsync_OnlyParseSelectedBooks_NoBadBooks() public void RunAsync_OnlyParseSelectedBooks_TrainOnBadBook() { using TestEnvironment env = new(); - Corpus corpus = env.DefaultParatextCorpus with - { - TrainOnTextIds = new() { "MAT" }, - PretranslateTextIds = new() { "MRK" } - }; + ParallelCorpus corpus = env.ParatextCorpus( + trainOnTextIds: new() { "MAT" }, + pretranslateTextIds: new() { "MRK" } + ); env.CorpusService = Substitute.For(); env.CorpusService.CreateTextCorpora(Arg.Any>()) .Returns( @@ -333,11 +319,10 @@ public void RunAsync_OnlyParseSelectedBooks_TrainOnBadBook() public void RunAsync_OnlyParseSelectedBooks_PretranslateOnBadBook() { using TestEnvironment env = new(); - Corpus corpus = env.DefaultParatextCorpus with - { - TrainOnTextIds = new() { "LEV" }, - PretranslateTextIds = new() { "MAT" } - }; + ParallelCorpus corpus = env.ParatextCorpus( + trainOnTextIds: new() { "LEV" }, + pretranslateTextIds: new() { "MAT" } + ); env.CorpusService = Substitute.For(); env.CorpusService.CreateTextCorpora(Arg.Any>()) .Returns( @@ -352,6 +337,147 @@ public void RunAsync_OnlyParseSelectedBooks_PretranslateOnBadBook() }); } + [Test] + public async Task ParallelCorpusLogic() + { + using TestEnvironment env = new(); + var corpora = new List() + { + new ParallelCorpus() + { + Id = "1", + SourceCorpora = new List() + { + new() + { + Id = "_1", + Language = "en", + Files = new List { env.ParatextFile("pt-source1") }, + TrainOnChapters = new() + { + { + "MAT", + new() { 1 } + }, + { + "LEV", + new() { } + } + }, + PretranslateChapters = new() + { + { + "1CH", + new() { } + } + } + }, + new() + { + Id = "_1", + Language = "en", + Files = new List { env.ParatextFile("pt-source2") }, + TrainOnChapters = new() + { + { + "MAT", + new() { 1 } + }, + { + "MRK", + new() { } + } + }, + }, + }, + TargetCorpora = new List() + { + new() + { + Id = "_1", + Language = "en", + Files = new List { env.ParatextFile("pt-target1") }, + TrainOnChapters = new() + { + { + "MAT", + new() { 1 } + }, + { + "MRK", + new() { } + } + } + }, + new() + { + Id = "_2", + Language = "en", + Files = new List { env.ParatextFile("pt-target2") }, + TrainOnChapters = new() + { + { + "MAT", + new() { 1 } + }, + { + "MRK", + new() { } + }, + { + "LEV", + new() { } + } + } + } + } + } + }; + await env.RunBuildJobAsync(corpora, useKeyTerms: false); + Assert.Multiple(async () => + { + Assert.That( + await env.GetSourceExtractAsync(), + Is.EqualTo( + @"Source one, chapter fourteen, verse fifty-five. Segment b. +Source one, chapter fourteen, verse fifty-six. +Source one, chapter one, verse one. +Source two, chapter one, verse two. +Source two, chapter one, verse three. +Source two, chapter one, verse four. +Source two, chapter one, verse five. Source two, chapter one, verse six. +Source two, chapter one, verse seven. Source two, chapter one, verse eight. +Source two, chapter one, verse nine. Source two, chapter one, verse ten. +Source two, chapter one, verse one. +" + ) + ); + Assert.That( + await env.GetTargetExtractAsync(), + Is.EqualTo( + @"Target two, chapter fourteen, verse fifty-five. +Target two, chapter fourteen, verse fifty-six. +Target one, chapter one, verse one. +Target one, chapter one, verse two. +Target one, chapter one, verse three. + +Target one, chapter one, verse five and six. +Target one, chapter one, verse seven and eight. +Target one, chapter one, verse nine and ten. + +" + ) + ); + }); + JsonArray? pretranslations = await env.GetPretranslationsAsync(); + Assert.That(pretranslations, Is.Not.Null); + Assert.That(pretranslations!.Count, Is.EqualTo(37), pretranslations.ToJsonString()); + Assert.That( + pretranslations[2]!["translation"]!.ToString(), + Is.EqualTo("Source one, chapter twelve, verse one.") + ); + } + private class TestEnvironment : DisposableBase { private static readonly string TestDataPath = Path.Combine( @@ -375,10 +501,10 @@ private class TestEnvironment : DisposableBase public IClearMLService ClearMLService { get; } public IOptionsMonitor BuildJobOptions { get; } - public Corpus DefaultTextFileCorpus { get; } - public Corpus DefaultMixedSourceTextFileCorpus { get; } - public Corpus DefaultParatextCorpus { get; } - public Corpus DefaultMixedSourceParatextCorpus { get; } + public ParallelCorpus DefaultTextFileCorpus { get; } + public ParallelCorpus DefaultMixedSourceTextFileCorpus { get; } + public ParallelCorpus DefaultParatextCorpus { get; } + public ParallelCorpus DefaultMixedSourceParatextCorpus { get; } public TestEnvironment() { @@ -390,49 +516,121 @@ public TestEnvironment() ZipParatextProject("pt-source1"); ZipParatextProject("pt-source2"); ZipParatextProject("pt-target1"); + ZipParatextProject("pt-target2"); DefaultTextFileCorpus = new() { Id = "corpusId1", - SourceLanguage = "es", - TargetLanguage = "en", - PretranslateTextIds = [], - TrainOnTextIds = [], - SourceFiles = [TextFile("source1")], - TargetFiles = [TextFile("target1")] + SourceCorpora = new List() + { + new() + { + Id = "src_1", + Language = "es", + Files = [TextFile("source1")], + TrainOnTextIds = [], + PretranslateTextIds = [] + } + }, + TargetCorpora = new List() + { + new() + { + Id = "trg_1", + Language = "en", + Files = [TextFile("target1")], + TrainOnTextIds = [] + } + } }; DefaultMixedSourceTextFileCorpus = new() { Id = "corpusId1", - SourceLanguage = "es", - TargetLanguage = "en", - PretranslateTextIds = [], - TrainOnTextIds = [], - SourceFiles = [TextFile("source1"), TextFile("source2")], - TargetFiles = [TextFile("target1")] + SourceCorpora = new List() + { + new() + { + Id = "src_1", + Language = "es", + Files = [TextFile("source1"), TextFile("source2")], + TrainOnTextIds = null, + TrainOnChapters = null, + PretranslateTextIds = null, + PretranslateChapters = null, + } + }, + TargetCorpora = new List() + { + new() + { + Id = "trg_1", + Language = "en", + Files = [TextFile("target1")], + TrainOnChapters = null, + TrainOnTextIds = null + } + } }; DefaultParatextCorpus = new() { Id = "corpusId1", - SourceLanguage = "es", - TargetLanguage = "en", - PretranslateTextIds = [], - TrainOnTextIds = [], - SourceFiles = [ParatextFile("pt-source1")], - TargetFiles = [ParatextFile("pt-target1")] + SourceCorpora = new List() + { + new() + { + Id = "src_1", + Language = "es", + Files = [ParatextFile("pt-source1")], + TrainOnTextIds = [], + PretranslateTextIds = [] + } + }, + TargetCorpora = new List() + { + new() + { + Id = "trg_1", + Language = "en", + Files = [ParatextFile("pt-target1")], + TrainOnTextIds = [] + } + } }; DefaultMixedSourceParatextCorpus = new() { Id = "corpusId1", - SourceLanguage = "es", - TargetLanguage = "en", - PretranslateTextIds = [], - TrainOnTextIds = [], - SourceFiles = [ParatextFile("pt-source1"), ParatextFile("pt-source2")], - TargetFiles = [ParatextFile("pt-target1")] + SourceCorpora = new List() + { + new() + { + Id = "src_1", + Language = "es", + Files = [ParatextFile("pt-source1")], + TrainOnTextIds = null, + PretranslateTextIds = null + }, + new() + { + Id = "src_1", + Language = "es", + Files = [ParatextFile("pt-source2")], + TrainOnTextIds = null, + PretranslateTextIds = null + } + }, + TargetCorpora = new List() + { + new() + { + Id = "trg_1", + Language = "en", + Files = [ParatextFile("pt-target1")], + TrainOnTextIds = null + } + } }; Engines = new MemoryRepository(); @@ -613,15 +811,153 @@ public PreprocessBuildJob GetBuildJob(TranslationEngineType engineType) ; } + public static ParallelCorpus TextFileCorpus( + HashSet? trainOnTextIds, + HashSet? pretranslateTextIds + ) + { + return new() + { + Id = "corpusId1", + SourceCorpora = new List() + { + new() + { + Id = "src_1", + Language = "es", + Files = [TextFile("source1")], + TrainOnTextIds = trainOnTextIds, + PretranslateTextIds = pretranslateTextIds + } + }, + TargetCorpora = new List() + { + new() + { + Id = "trg_1", + Language = "en", + Files = [TextFile("target1")], + TrainOnTextIds = trainOnTextIds + } + } + }; + } + + public static ParallelCorpus TextFileCorpus(string sourceLanguage, string targetLanguage) + { + return new() + { + Id = "corpusId1", + SourceCorpora = new List() + { + new() + { + Id = "src_1", + Language = sourceLanguage, + Files = [TextFile("source1")], + TrainOnTextIds = [], + PretranslateTextIds = [] + } + }, + TargetCorpora = new List() + { + new() + { + Id = "trg_1", + Language = targetLanguage, + Files = [TextFile("target1")], + TrainOnTextIds = [] + } + } + }; + } + + public ParallelCorpus ParatextCorpus( + Dictionary>? trainOnChapters, + Dictionary>? pretranslateChapters + ) + { + return new() + { + Id = "corpusId1", + SourceCorpora = new List() + { + new() + { + Id = "src_1", + Language = "es", + Files = [ParatextFile("pt-source1")], + TrainOnChapters = trainOnChapters, + PretranslateChapters = pretranslateChapters + } + }, + TargetCorpora = new List() + { + new() + { + Id = "trg_1", + Language = "en", + Files = [ParatextFile("pt-target1")], + TrainOnChapters = trainOnChapters + } + } + }; + } + + public ParallelCorpus ParatextCorpus(HashSet? trainOnTextIds, HashSet? pretranslateTextIds) + { + return new() + { + Id = "corpusId1", + SourceCorpora = new List() + { + new() + { + Id = "src_1", + Language = "es", + Files = [ParatextFile("pt-source1")], + TrainOnTextIds = trainOnTextIds, + PretranslateTextIds = pretranslateTextIds + } + }, + TargetCorpora = new List() + { + new() + { + Id = "trg_1", + Language = "en", + Files = [ParatextFile("pt-target1")], + TrainOnTextIds = trainOnTextIds + } + } + }; + } + public Task RunBuildJobAsync( - Corpus corpus, + ParallelCorpus corpus, + bool useKeyTerms = true, + string engineId = "engine1", + TranslationEngineType engineType = TranslationEngineType.Nmt + ) + { + return RunBuildJobAsync([corpus], useKeyTerms, engineId, engineType); + } + + public Task RunBuildJobAsync( + IEnumerable corpora, bool useKeyTerms = true, string engineId = "engine1", TranslationEngineType engineType = TranslationEngineType.Nmt ) { return GetBuildJob(engineType) - .RunAsync(engineId, "build1", [corpus], useKeyTerms ? null : "{\"use_key_terms\":false}", default); + .RunAsync( + engineId, + "build1", + corpora.ToList(), + useKeyTerms ? null : "{\"use_key_terms\":false}", + default + ); } public async Task GetSourceExtractAsync() @@ -665,7 +1001,7 @@ public async Task GetTargetExtractAsync() return (src1Count, src2Count, trgCount, termCount); } - public async Task GetPretranslationAsync() + public async Task GetPretranslationsAsync() { using StreamReader reader = new(await SharedFileService.OpenReadAsync("builds/build1/pretranslate.src.json")); @@ -674,7 +1010,7 @@ public async Task GetTargetExtractAsync() public async Task GetPretranslateCountAsync() { - return (await GetPretranslationAsync())?.Count ?? 0; + return (await GetPretranslationsAsync())?.Count ?? 0; } private void ZipParatextProject(string name) @@ -682,7 +1018,7 @@ private void ZipParatextProject(string name) ZipFile.CreateFromDirectory(Path.Combine(TestDataPath, name), Path.Combine(_tempDir.Path, $"{name}.zip")); } - private CorpusFile ParatextFile(string name) + public CorpusFile ParatextFile(string name) { return new() { diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/SmtTransferEngineServiceTests.cs b/src/Machine/test/Serval.Machine.Shared.Tests/Services/SmtTransferEngineServiceTests.cs index 1a798d97..6b888794 100644 --- a/src/Machine/test/Serval.Machine.Shared.Tests/Services/SmtTransferEngineServiceTests.cs +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/SmtTransferEngineServiceTests.cs @@ -40,15 +40,30 @@ await env.Service.StartBuildAsync( BuildId1, null, [ - new Corpus() + new ParallelCorpus() { Id = CorpusId1, - SourceLanguage = "es", - TargetLanguage = "en", - SourceFiles = [], - TargetFiles = [], - TrainOnTextIds = null, - PretranslateTextIds = null + SourceCorpora = new List() + { + new() + { + Id = "src", + Language = "es", + Files = [], + TrainOnTextIds = null, + PretranslateTextIds = null + } + }, + TargetCorpora = new List() + { + new() + { + Id = "trg", + Language = "en", + Files = [], + TrainOnTextIds = null + } + }, } ] ); @@ -77,7 +92,7 @@ public async Task CancelBuildAsync_Building(BuildJobRunnerType trainJobRunnerTyp using var env = new TestEnvironment(trainJobRunnerType); env.UseInfiniteTrainJob(); - await env.Service.StartBuildAsync(EngineId1, BuildId1, buildOptions: "{}", corpora: []); + await env.Service.StartBuildAsync(EngineId1, BuildId1, "{}", Array.Empty()); await env.WaitForTrainingToStartAsync(); TranslationEngine engine = env.Engines.Get(EngineId1); Assert.That(engine.CurrentBuild, Is.Not.Null); @@ -103,7 +118,7 @@ public async Task StartBuildAsync_RestartUnfinishedBuild() using var env = new TestEnvironment(BuildJobRunnerType.Hangfire); env.UseInfiniteTrainJob(); - await env.Service.StartBuildAsync(EngineId1, BuildId1, "{}", Array.Empty()); + await env.Service.StartBuildAsync(EngineId1, BuildId1, "{}", Array.Empty()); await env.WaitForTrainingToStartAsync(); TranslationEngine engine = env.Engines.Get(EngineId1); Assert.That(engine.CurrentBuild, Is.Not.Null); @@ -128,7 +143,7 @@ public async Task DeleteAsync_WhileBuilding(BuildJobRunnerType trainJobRunnerTyp using var env = new TestEnvironment(trainJobRunnerType); env.UseInfiniteTrainJob(); - await env.Service.StartBuildAsync(EngineId1, BuildId1, "{}", Array.Empty()); + await env.Service.StartBuildAsync(EngineId1, BuildId1, "{}", Array.Empty()); await env.WaitForTrainingToStartAsync(); TranslationEngine engine = env.Engines.Get(EngineId1); Assert.That(engine.CurrentBuild, Is.Not.Null); @@ -148,7 +163,7 @@ public async Task TrainSegmentPairAsync(BuildJobRunnerType trainJobRunnerType) using var env = new TestEnvironment(trainJobRunnerType); env.UseInfiniteTrainJob(); - await env.Service.StartBuildAsync(EngineId1, BuildId1, "{}", Array.Empty()); + await env.Service.StartBuildAsync(EngineId1, BuildId1, "{}", Array.Empty()); await env.WaitForBuildToStartAsync(); TranslationEngine engine = env.Engines.Get(EngineId1); Assert.That(engine.CurrentBuild, Is.Not.Null); diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/04LEVTe4.SFM b/src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/04LEVTe4.SFM new file mode 100644 index 00000000..323ecf27 --- /dev/null +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/04LEVTe4.SFM @@ -0,0 +1,7 @@ +\id LEV - Test +\h Leviticus +\mt Leviticus +\c 14 +\p +\v 55 Target two, chapter fourteen, verse fifty-five. +\v 56 Target two, chapter fourteen, verse fifty-six. \ No newline at end of file diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/41MATTe4.SFM b/src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/41MATTe4.SFM new file mode 100644 index 00000000..9119285f --- /dev/null +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/41MATTe4.SFM @@ -0,0 +1,18 @@ +\id MAT - Test +\h Matthew +\mt Matthew +\ip An introduction to Matthew +\c 1 +\p +\v 1 Target two, chapter one, verse one. +\v 2 Target two, chapter one, verse two. +\v 3 Target two, chapter one, verse three. +\v 4 +\v 5-6 Target two, chapter one, verse five and six. +\v 7-8 Target two, chapter one, verse seven and eight. +\v 9-10 Target two, chapter one, verse nine and ten. +\c 2 +\p +\v 1 Target two, chapter two, verse one. +\v 2 ... +\v 3 Target two, chapter two, verse three. \ No newline at end of file diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/42MRKTe4.SFM b/src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/42MRKTe4.SFM new file mode 100644 index 00000000..46000963 --- /dev/null +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/42MRKTe4.SFM @@ -0,0 +1,4 @@ +\id MRK - Test +\h Mark +\mt Mark +\ip An introduction to Mark \ No newline at end of file diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/Settings.xml b/src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/Settings.xml new file mode 100644 index 00000000..ff85c4bc --- /dev/null +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/Settings.xml @@ -0,0 +1,33 @@ + + usfm.sty + 4 + en::: + English + 8.0.100.76 + Test4 + 65001 + T + + NFC + Te4 + a7e0b3ce0200736062f9f810a444dbfbe64aca35 + Charis SIL + 12 + + + + 41MAT + + Ten.SFM + F + F + F + Public + Standard:: + + 3 + 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 + 000000000000000000000000000000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000 + + + \ No newline at end of file diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/TermRenderings.xml b/src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/TermRenderings.xml new file mode 100644 index 00000000..5743ee41 --- /dev/null +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/TermRenderings.xml @@ -0,0 +1,9 @@ + + + Abra'am + + + + + + diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/custom.vrs b/src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/custom.vrs new file mode 100644 index 00000000..9c1cd387 --- /dev/null +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/data/pt-target2/custom.vrs @@ -0,0 +1,31 @@ +# custom.vrs + +LEV 14:56 +ROM 14:26 +REV 12:17 +TOB 5:22 +TOB 10:12 +SIR 23:28 +ESG 1:22 +ESG 3:15 +ESG 5:14 +ESG 8:17 +ESG 10:14 +SIR 33:33 +SIR 41:24 +BAR 1:22 +4MA 7:25 +4MA 12:20 + +# deliberately missing verses +-ROM 16:26 +-ROM 16:27 +-3JN 1:15 +-S3Y 1:49 +-ESG 4:6 +-ESG 9:5 +-ESG 9:30 + +LEV 14:55 = LEV 14:55 +LEV 14:55 = LEV 14:56 +LEV 14:56 = LEV 14:57 diff --git a/src/Serval/src/Serval.Client/Client.g.cs b/src/Serval/src/Serval.Client/Client.g.cs index 45867f0e..e55775e7 100644 --- a/src/Serval/src/Serval.Client/Client.g.cs +++ b/src/Serval/src/Serval.Client/Client.g.cs @@ -2451,94 +2451,57 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c } [System.CodeDom.Compiler.GeneratedCode("NSwag", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial interface IDataFilesClient + public partial interface ICorporaClient { /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get all files + /// Get all corpora /// - /// A list of all files owned by the client + /// A list of all corpora owned by the client /// A server side error occurred. - System.Threading.Tasks.Task> GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + System.Threading.Tasks.Task> GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Upload a new file + /// Create a new corpus /// - /// - /// Sample request: - ///
- ///
POST /files - ///
{ - ///
"format": "text", - ///
"name": "myTeam:myProject:myFile.txt" - ///
} - ///
- /// The file to upload. Max size: 100MB - /// File format options: - ///
* **Text**: One translation unit (a.k.a., verse) per line - ///
* If a line contains a tab, characters before the tab are used as a unique identifier for the line, characters after the tab are understood as the content of the verse, and if there is another tab following the verse content, characters after this second tab are assumed to be column codes like "ss" etc. for sectioning and other formatting. See this example of a tab-delimited text file: - ///
> verse_001_005 (tab) Ὑπομνῆσαι δὲ ὑμᾶς βούλομαι , εἰδότας ὑμᾶς ἅπαξ τοῦτο - ///
> verse_001_006 (tab) Ἀγγέλους τε τοὺς μὴ τηρήσαντας τὴν ἑαυτῶν ἀρχήν , ἀλλὰ (tab) ss - ///
> verse_001_007 (tab) Ὡς Σόδομα καὶ Γόμορρα , καὶ αἱ περὶ αὐτὰς πόλεις (tab) ss - ///
* Otherwise, *no tabs* should be used in the file and a unique identifier will generated for each translation unit based on the line number. - ///
* **Paratext**: A complete, zipped Paratext project backup: that is, a .zip archive of files including the USFM files and "Settings.xml" file. To generate a zipped backup for a project in Paratext, navigate to "Paratext/Advanced/Backup project to file..." and follow the dialogue. - ///
* USFM files in paratext projects have unique identifiers assigned per segment for scripture and non-scripture content according to [this guide](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation) - /// A name to help identify and distinguish the file. - ///
Recommendation: Create a multi-part name to distinguish between projects, uses, languages, etc. - ///
The name does not have to be unique. - ///
Example: myTranslationTeam:myProject:myLanguage:myFile.txt - /// The file was created successfully + /// The corpus was created successfully /// A server side error occurred. - System.Threading.Tasks.Task CreateAsync(FileParameter file, FileFormat format, string? name = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + System.Threading.Tasks.Task CreateAsync(CorpusConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get a file by unique id + /// Get a corpus by unique id /// - /// The unique identifier for the file - /// The file exists + /// The unique identifier for the corpus + /// The corpus exists /// A server side error occurred. - System.Threading.Tasks.Task GetAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + System.Threading.Tasks.Task GetAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Update an existing file + /// Update an existing corpus /// - /// The existing file's unique id - /// The updated file - /// The file was updated successfully + /// The unique identifier for the corpus + /// Tuples of the ids of the new corpus files and the associated text ids + /// The corpus was updated successfully /// A server side error occurred. - System.Threading.Tasks.Task UpdateAsync(string id, FileParameter file, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + System.Threading.Tasks.Task UpdateAsync(string id, System.Collections.Generic.IEnumerable files, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Delete an existing file + /// Delete an existing corpus /// - /// - /// If a file is in a corpora and the file is deleted, it will be automatically removed from the corpora. - ///
If a build job has started before the file was deleted, the file will be used for the build job, even - ///
though it will no longer be accessible through the API. - ///
- /// The existing file's unique id - /// The file was deleted successfully + /// The unique identifier for the corpus + /// The corpus was deleted successfully /// A server side error occurred. System.Threading.Tasks.Task DeleteAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); - /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. - /// - /// Download a file - /// - /// The unique identifier for the file - /// The file exists - /// A server side error occurred. - System.Threading.Tasks.Task DownloadAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); - } [System.CodeDom.Compiler.GeneratedCode("NSwag", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class DataFilesClient : IDataFilesClient + public partial class CorporaClient : ICorporaClient { #pragma warning disable 8618 private string _baseUrl; @@ -2549,7 +2512,7 @@ public partial class DataFilesClient : IDataFilesClient private Newtonsoft.Json.JsonSerializerSettings _instanceSettings; #pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. - public DataFilesClient(System.Net.Http.HttpClient httpClient) + public CorporaClient(System.Net.Http.HttpClient httpClient) #pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. { BaseUrl = "/api/v1"; @@ -2587,11 +2550,11 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get all files + /// Get all corpora /// - /// A list of all files owned by the client + /// A list of all corpora owned by the client /// A server side error occurred. - public virtual async System.Threading.Tasks.Task> GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task> GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { var client_ = _httpClient; var disposeClient_ = false; @@ -2604,8 +2567,8 @@ public string BaseUrl var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "files" - urlBuilder_.Append("files"); + // Operation Path: "corpora" + urlBuilder_.Append("corpora"); PrepareRequest(client_, request_, urlBuilder_); @@ -2632,7 +2595,7 @@ public string BaseUrl var status_ = (int)response_.StatusCode; if (status_ == 200) { - var objectResponse_ = await ReadObjectResponseAsync>(response_, headers_, cancellationToken).ConfigureAwait(false); + var objectResponse_ = await ReadObjectResponseAsync>(response_, headers_, cancellationToken).ConfigureAwait(false); if (objectResponse_.Object == null) { throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); @@ -2679,75 +2642,32 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Upload a new file + /// Create a new corpus /// - /// - /// Sample request: - ///
- ///
POST /files - ///
{ - ///
"format": "text", - ///
"name": "myTeam:myProject:myFile.txt" - ///
} - ///
- /// The file to upload. Max size: 100MB - /// File format options: - ///
* **Text**: One translation unit (a.k.a., verse) per line - ///
* If a line contains a tab, characters before the tab are used as a unique identifier for the line, characters after the tab are understood as the content of the verse, and if there is another tab following the verse content, characters after this second tab are assumed to be column codes like "ss" etc. for sectioning and other formatting. See this example of a tab-delimited text file: - ///
> verse_001_005 (tab) Ὑπομνῆσαι δὲ ὑμᾶς βούλομαι , εἰδότας ὑμᾶς ἅπαξ τοῦτο - ///
> verse_001_006 (tab) Ἀγγέλους τε τοὺς μὴ τηρήσαντας τὴν ἑαυτῶν ἀρχήν , ἀλλὰ (tab) ss - ///
> verse_001_007 (tab) Ὡς Σόδομα καὶ Γόμορρα , καὶ αἱ περὶ αὐτὰς πόλεις (tab) ss - ///
* Otherwise, *no tabs* should be used in the file and a unique identifier will generated for each translation unit based on the line number. - ///
* **Paratext**: A complete, zipped Paratext project backup: that is, a .zip archive of files including the USFM files and "Settings.xml" file. To generate a zipped backup for a project in Paratext, navigate to "Paratext/Advanced/Backup project to file..." and follow the dialogue. - ///
* USFM files in paratext projects have unique identifiers assigned per segment for scripture and non-scripture content according to [this guide](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation) - /// A name to help identify and distinguish the file. - ///
Recommendation: Create a multi-part name to distinguish between projects, uses, languages, etc. - ///
The name does not have to be unique. - ///
Example: myTranslationTeam:myProject:myLanguage:myFile.txt - /// The file was created successfully + /// The corpus was created successfully /// A server side error occurred. - public virtual async System.Threading.Tasks.Task CreateAsync(FileParameter file, FileFormat format, string? name = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task CreateAsync(CorpusConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { + if (corpusConfig == null) + throw new System.ArgumentNullException("corpusConfig"); + var client_ = _httpClient; var disposeClient_ = false; try { using (var request_ = new System.Net.Http.HttpRequestMessage()) { - var boundary_ = System.Guid.NewGuid().ToString(); - var content_ = new System.Net.Http.MultipartFormDataContent(boundary_); - content_.Headers.Remove("Content-Type"); - content_.Headers.TryAddWithoutValidation("Content-Type", "multipart/form-data; boundary=" + boundary_); - - if (file == null) - throw new System.ArgumentNullException("file"); - else - { - var content_file_ = new System.Net.Http.StreamContent(file.Data); - if (!string.IsNullOrEmpty(file.ContentType)) - content_file_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse(file.ContentType); - content_.Add(content_file_, "file", file.FileName ?? "file"); - } - - if (format == null) - throw new System.ArgumentNullException("format"); - else - { - content_.Add(new System.Net.Http.StringContent(ConvertToString(format, System.Globalization.CultureInfo.InvariantCulture)), "format"); - } - - if (name != null) - { - content_.Add(new System.Net.Http.StringContent(ConvertToString(name, System.Globalization.CultureInfo.InvariantCulture)), "name"); - } + var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(corpusConfig, JsonSerializerSettings); + var content_ = new System.Net.Http.StringContent(json_); + content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); request_.Content = content_; request_.Method = new System.Net.Http.HttpMethod("POST"); request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "files" - urlBuilder_.Append("files"); + // Operation Path: "corpora" + urlBuilder_.Append("corpora"); PrepareRequest(client_, request_, urlBuilder_); @@ -2774,7 +2694,7 @@ public string BaseUrl var status_ = (int)response_.StatusCode; if (status_ == 201) { - var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); if (objectResponse_.Object == null) { throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); @@ -2785,7 +2705,7 @@ public string BaseUrl if (status_ == 400) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("Bad request. Is the file over 100 MB?", status_, responseText_, headers_, null); + throw new ServalApiException("Bad request", status_, responseText_, headers_, null); } else if (status_ == 401) @@ -2827,12 +2747,12 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get a file by unique id + /// Get a corpus by unique id /// - /// The unique identifier for the file - /// The file exists + /// The unique identifier for the corpus + /// The corpus exists /// A server side error occurred. - public virtual async System.Threading.Tasks.Task GetAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task GetAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); @@ -2848,8 +2768,8 @@ public string BaseUrl var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "files/{id}" - urlBuilder_.Append("files/"); + // Operation Path: "corpora/{id}" + urlBuilder_.Append("corpora/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); PrepareRequest(client_, request_, urlBuilder_); @@ -2877,7 +2797,7 @@ public string BaseUrl var status_ = (int)response_.StatusCode; if (status_ == 200) { - var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); if (objectResponse_.Object == null) { throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); @@ -2894,13 +2814,13 @@ public string BaseUrl if (status_ == 403) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The authenticated client cannot perform the operation or does not own the file", status_, responseText_, headers_, null); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the corpus", status_, responseText_, headers_, null); } else if (status_ == 404) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The file does not exist", status_, responseText_, headers_, null); + throw new ServalApiException("The corpus does not exist", status_, responseText_, headers_, null); } else if (status_ == 503) @@ -2930,45 +2850,37 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Update an existing file + /// Update an existing corpus /// - /// The existing file's unique id - /// The updated file - /// The file was updated successfully + /// The unique identifier for the corpus + /// Tuples of the ids of the new corpus files and the associated text ids + /// The corpus was updated successfully /// A server side error occurred. - public virtual async System.Threading.Tasks.Task UpdateAsync(string id, FileParameter file, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task UpdateAsync(string id, System.Collections.Generic.IEnumerable files, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); + if (files == null) + throw new System.ArgumentNullException("files"); + var client_ = _httpClient; var disposeClient_ = false; try { using (var request_ = new System.Net.Http.HttpRequestMessage()) { - var boundary_ = System.Guid.NewGuid().ToString(); - var content_ = new System.Net.Http.MultipartFormDataContent(boundary_); - content_.Headers.Remove("Content-Type"); - content_.Headers.TryAddWithoutValidation("Content-Type", "multipart/form-data; boundary=" + boundary_); - - if (file == null) - throw new System.ArgumentNullException("file"); - else - { - var content_file_ = new System.Net.Http.StreamContent(file.Data); - if (!string.IsNullOrEmpty(file.ContentType)) - content_file_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse(file.ContentType); - content_.Add(content_file_, "file", file.FileName ?? "file"); - } + var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(files, JsonSerializerSettings); + var content_ = new System.Net.Http.StringContent(json_); + content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); request_.Content = content_; request_.Method = new System.Net.Http.HttpMethod("PATCH"); request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "files/{id}" - urlBuilder_.Append("files/"); + // Operation Path: "corpora/{id}" + urlBuilder_.Append("corpora/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); PrepareRequest(client_, request_, urlBuilder_); @@ -2996,7 +2908,7 @@ public string BaseUrl var status_ = (int)response_.StatusCode; if (status_ == 200) { - var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); if (objectResponse_.Object == null) { throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); @@ -3007,7 +2919,7 @@ public string BaseUrl if (status_ == 400) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("Bad request. Is the file over 100 MB?", status_, responseText_, headers_, null); + throw new ServalApiException("Bad request", status_, responseText_, headers_, null); } else if (status_ == 401) @@ -3019,13 +2931,13 @@ public string BaseUrl if (status_ == 403) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The authenticated client cannot perform the operation or does not own the file", status_, responseText_, headers_, null); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the corpus", status_, responseText_, headers_, null); } else if (status_ == 404) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The file does not exist and therefore cannot be updated", status_, responseText_, headers_, null); + throw new ServalApiException("The corpus does not exist and therefore cannot be updated", status_, responseText_, headers_, null); } else if (status_ == 503) @@ -3055,15 +2967,10 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Delete an existing file + /// Delete an existing corpus /// - /// - /// If a file is in a corpora and the file is deleted, it will be automatically removed from the corpora. - ///
If a build job has started before the file was deleted, the file will be used for the build job, even - ///
though it will no longer be accessible through the API. - ///
- /// The existing file's unique id - /// The file was deleted successfully + /// The unique identifier for the corpus + /// The corpus was deleted successfully /// A server side error occurred. public virtual async System.Threading.Tasks.Task DeleteAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { @@ -3080,8 +2987,8 @@ public string BaseUrl var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "files/{id}" - urlBuilder_.Append("files/"); + // Operation Path: "corpora/{id}" + urlBuilder_.Append("corpora/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); PrepareRequest(client_, request_, urlBuilder_); @@ -3121,13 +3028,13 @@ public string BaseUrl if (status_ == 403) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The authenticated client cannot perform the operation or does not own the file", status_, responseText_, headers_, null); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the corpus", status_, responseText_, headers_, null); } else if (status_ == 404) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The file does not exist and therefore cannot be deleted", status_, responseText_, headers_, null); + throw new ServalApiException("The corpus does not exist and therefore cannot be deleted", status_, responseText_, headers_, null); } else if (status_ == 503) @@ -3155,158 +3062,49 @@ public string BaseUrl } } - /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. - /// - /// Download a file - /// - /// The unique identifier for the file - /// The file exists - /// A server side error occurred. - public virtual async System.Threading.Tasks.Task DownloadAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + protected struct ObjectResponseResult { - if (id == null) - throw new System.ArgumentNullException("id"); - - var client_ = _httpClient; - var disposeClient_ = false; - try + public ObjectResponseResult(T responseObject, string responseText) { - using (var request_ = new System.Net.Http.HttpRequestMessage()) - { - request_.Content = new System.Net.Http.StringContent(string.Empty, System.Text.Encoding.UTF8, "application/octet-stream"); - request_.Method = new System.Net.Http.HttpMethod("POST"); - request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/octet-stream")); + this.Object = responseObject; + this.Text = responseText; + } - var urlBuilder_ = new System.Text.StringBuilder(); - if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "files/{id}/contents" - urlBuilder_.Append("files/"); - urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); - urlBuilder_.Append("/contents"); + public T Object { get; } - PrepareRequest(client_, request_, urlBuilder_); + public string Text { get; } + } - var url_ = urlBuilder_.ToString(); - request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + public bool ReadResponseAsString { get; set; } - PrepareRequest(client_, request_, url_); + protected virtual async System.Threading.Tasks.Task> ReadObjectResponseAsync(System.Net.Http.HttpResponseMessage response, System.Collections.Generic.IReadOnlyDictionary> headers, System.Threading.CancellationToken cancellationToken) + { + if (response == null || response.Content == null) + { + return new ObjectResponseResult(default(T)!, string.Empty); + } - var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); - var disposeResponse_ = true; - try - { - var headers_ = new System.Collections.Generic.Dictionary>(); - foreach (var item_ in response_.Headers) - headers_[item_.Key] = item_.Value; - if (response_.Content != null && response_.Content.Headers != null) - { - foreach (var item_ in response_.Content.Headers) - headers_[item_.Key] = item_.Value; - } - - ProcessResponse(client_, response_); - - var status_ = (int)response_.StatusCode; - if (status_ == 200 || status_ == 206) - { - var responseStream_ = response_.Content == null ? System.IO.Stream.Null : await response_.Content.ReadAsStreamAsync().ConfigureAwait(false); - var fileResponse_ = new FileResponse(status_, headers_, responseStream_, null, response_); - disposeClient_ = false; disposeResponse_ = false; // response and client are disposed by FileResponse - return fileResponse_; - } - else - if (status_ == 401) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The client is not authenticated", status_, responseText_, headers_, null); - } - else - if (status_ == 403) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The authenticated client cannot perform the operation or does not own the file", status_, responseText_, headers_, null); - } - else - if (status_ == 404) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The file does not exist", status_, responseText_, headers_, null); - } - else - if (status_ == 500) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The data file is corrupted. Please try reuploading or recreating the file.", status_, responseText_, headers_, null); - } - else - if (status_ == 503) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); - } - else - { - var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); - } - } - finally - { - if (disposeResponse_) - response_.Dispose(); - } - } - } - finally - { - if (disposeClient_) - client_.Dispose(); - } - } - - protected struct ObjectResponseResult - { - public ObjectResponseResult(T responseObject, string responseText) - { - this.Object = responseObject; - this.Text = responseText; - } - - public T Object { get; } - - public string Text { get; } - } - - public bool ReadResponseAsString { get; set; } - - protected virtual async System.Threading.Tasks.Task> ReadObjectResponseAsync(System.Net.Http.HttpResponseMessage response, System.Collections.Generic.IReadOnlyDictionary> headers, System.Threading.CancellationToken cancellationToken) - { - if (response == null || response.Content == null) - { - return new ObjectResponseResult(default(T)!, string.Empty); - } - - if (ReadResponseAsString) - { - var responseText = await response.Content.ReadAsStringAsync().ConfigureAwait(false); - try - { - var typedBody = Newtonsoft.Json.JsonConvert.DeserializeObject(responseText, JsonSerializerSettings); - return new ObjectResponseResult(typedBody!, responseText); - } - catch (Newtonsoft.Json.JsonException exception) - { - var message = "Could not deserialize the response body string as " + typeof(T).FullName + "."; - throw new ServalApiException(message, (int)response.StatusCode, responseText, headers, exception); - } - } - else - { - try - { - using (var responseStream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false)) - using (var streamReader = new System.IO.StreamReader(responseStream)) - using (var jsonTextReader = new Newtonsoft.Json.JsonTextReader(streamReader)) + if (ReadResponseAsString) + { + var responseText = await response.Content.ReadAsStringAsync().ConfigureAwait(false); + try + { + var typedBody = Newtonsoft.Json.JsonConvert.DeserializeObject(responseText, JsonSerializerSettings); + return new ObjectResponseResult(typedBody!, responseText); + } + catch (Newtonsoft.Json.JsonException exception) + { + var message = "Could not deserialize the response body string as " + typeof(T).FullName + "."; + throw new ServalApiException(message, (int)response.StatusCode, responseText, headers, exception); + } + } + else + { + try + { + using (var responseStream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false)) + using (var streamReader = new System.IO.StreamReader(responseStream)) + using (var jsonTextReader = new Newtonsoft.Json.JsonTextReader(streamReader)) { var serializer = Newtonsoft.Json.JsonSerializer.Create(JsonSerializerSettings); var typedBody = serializer.Deserialize(jsonTextReader); @@ -3377,438 +3175,2016 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c } [System.CodeDom.Compiler.GeneratedCode("NSwag", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial interface ITranslationEnginesClient + public partial interface IDataFilesClient { /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get all translation engines + /// Get all files /// - /// The engines + /// A list of all files owned by the client /// A server side error occurred. - System.Threading.Tasks.Task> GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + System.Threading.Tasks.Task> GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Create a new translation engine + /// Upload a new file /// /// - /// ## Parameters - ///
* **name**: (optional) A name to help identify and distinguish the file. - ///
* Recommendation: Create a multi-part name to distinguish between projects, uses, etc. - ///
* The name does not have to be unique, as the engine is uniquely identified by the auto-generated id - ///
* **sourceLanguage**: The source language code (a valid [IETF language tag](https://en.wikipedia.org/wiki/IETF_language_tag) is recommended) - ///
* **targetLanguage**: The target language code (a valid IETF language tag is recommended) - ///
* **type**: **smt-transfer** or **nmt** or **echo** - ///
* **isModelPersisted**: (optional) - see below - ///
### smt-transfer - ///
The Statistical Machine Translation Transfer Learning engine is primarily used for translation suggestions. Typical endpoints: translate, get-word-graph, train-segment - ///
* **IsModelPersisted**: (default to true) All models are persistent and can be updated with train-segment. False is not supported. - ///
### nmt - ///
The Neural Machine Translation engine is primarily used for pretranslations. It is fine-tuned from Meta's NLLB-200. Valid IETF language tags provided to Serval will be converted to [NLLB-200 codes](https://github.com/facebookresearch/flores/tree/main/flores200#languages-in-flores-200). See more about language tag resolution [here](https://github.com/sillsdev/serval/wiki/FLORES%E2%80%90200-Language-Code-Resolution-for-NMT-Engine). - ///
* **IsModelPersisted**: (default to false) Whether the model can be downloaded by the client after it has been successfully built. - ///
- ///
If you use a language among NLLB's supported languages, Serval will utilize everything the NLLB-200 model already knows about that language when translating. If the language you are working with is not among NLLB's supported languages, the language code will have no effect. - ///
- ///
Typical endpoints: pretranslate - ///
### echo - ///
The echo engine has full coverage of all nmt and smt-transfer endpoints. Endpoints like create and build return empty responses. Endpoints like translate and get-word-graph echo the sent content back to the user in a format that mocks nmt or Smt. For example, translating a segment "test" with the echo engine would yield a translation response with translation "test". This engine is useful for debugging and testing purposes. - ///
## Sample request: + /// Sample request: ///
+ ///
POST /files ///
{ - ///
"name": "myTeam:myProject:myEngine", - ///
"sourceLanguage": "el", - ///
"targetLanguage": "en", - ///
"type": "nmt" - ///
"IsModelPersisted": true + ///
"format": "text", + ///
"name": "myTeam:myProject:myFile.txt" ///
} ///
- /// The translation engine configuration (see above) - /// The new translation engine + /// The file to upload. Max size: 100MB + /// File format options: + ///
* **Text**: One translation unit (a.k.a., verse) per line + ///
* If a line contains a tab, characters before the tab are used as a unique identifier for the line, characters after the tab are understood as the content of the verse, and if there is another tab following the verse content, characters after this second tab are assumed to be column codes like "ss" etc. for sectioning and other formatting. See this example of a tab-delimited text file: + ///
> verse_001_005 (tab) Ὑπομνῆσαι δὲ ὑμᾶς βούλομαι , εἰδότας ὑμᾶς ἅπαξ τοῦτο + ///
> verse_001_006 (tab) Ἀγγέλους τε τοὺς μὴ τηρήσαντας τὴν ἑαυτῶν ἀρχήν , ἀλλὰ (tab) ss + ///
> verse_001_007 (tab) Ὡς Σόδομα καὶ Γόμορρα , καὶ αἱ περὶ αὐτὰς πόλεις (tab) ss + ///
* Otherwise, *no tabs* should be used in the file and a unique identifier will generated for each translation unit based on the line number. + ///
* **Paratext**: A complete, zipped Paratext project backup: that is, a .zip archive of files including the USFM files and "Settings.xml" file. To generate a zipped backup for a project in Paratext, navigate to "Paratext/Advanced/Backup project to file..." and follow the dialogue. + ///
* USFM files in paratext projects have unique identifiers assigned per segment for scripture and non-scripture content according to [this guide](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation) + /// A name to help identify and distinguish the file. + ///
Recommendation: Create a multi-part name to distinguish between projects, uses, languages, etc. + ///
The name does not have to be unique. + ///
Example: myTranslationTeam:myProject:myLanguage:myFile.txt + /// The file was created successfully /// A server side error occurred. - System.Threading.Tasks.Task CreateAsync(TranslationEngineConfig engineConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + System.Threading.Tasks.Task CreateAsync(FileParameter file, FileFormat format, string? name = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get a translation engine by unique id + /// Get a file by unique id /// - /// The translation engine id - /// The translation engine + /// The unique identifier for the file + /// The file exists /// A server side error occurred. - System.Threading.Tasks.Task GetAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + System.Threading.Tasks.Task GetAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Delete a translation engine + /// Update an existing file /// - /// The translation engine id - /// The engine was successfully deleted. + /// The existing file's unique id + /// The updated file + /// The file was updated successfully /// A server side error occurred. - System.Threading.Tasks.Task DeleteAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + System.Threading.Tasks.Task UpdateAsync(string id, FileParameter file, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Translate a segment of text + /// Delete an existing file /// - /// The translation engine id - /// The source segment - /// The translation result + /// + /// If a file is in a corpus and the file is deleted, it will be automatically removed from that corpus. + ///
If a build job has started before the file was deleted, the file will be used for the build job, even + ///
though it will no longer be accessible through the API. + ///
+ /// The existing file's unique id + /// The file was deleted successfully /// A server side error occurred. - System.Threading.Tasks.Task TranslateAsync(string id, string segment, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + System.Threading.Tasks.Task DeleteAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Translates a segment of text into the top N results + /// Download a file /// - /// The translation engine id - /// The number of translations to generate - /// The source segment - /// The translation results + /// The unique identifier for the file + /// The file exists /// A server side error occurred. - System.Threading.Tasks.Task> TranslateNAsync(string id, int n, string segment, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + System.Threading.Tasks.Task DownloadAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + } + + [System.CodeDom.Compiler.GeneratedCode("NSwag", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class DataFilesClient : IDataFilesClient + { + #pragma warning disable 8618 + private string _baseUrl; + #pragma warning restore 8618 + + private System.Net.Http.HttpClient _httpClient; + private static System.Lazy _settings = new System.Lazy(CreateSerializerSettings, true); + private Newtonsoft.Json.JsonSerializerSettings _instanceSettings; + + #pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + public DataFilesClient(System.Net.Http.HttpClient httpClient) + #pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + { + BaseUrl = "/api/v1"; + _httpClient = httpClient; + Initialize(); + } + + private static Newtonsoft.Json.JsonSerializerSettings CreateSerializerSettings() + { + var settings = new Newtonsoft.Json.JsonSerializerSettings(); + UpdateJsonSerializerSettings(settings); + return settings; + } + + public string BaseUrl + { + get { return _baseUrl; } + set + { + _baseUrl = value; + if (!string.IsNullOrEmpty(_baseUrl) && !_baseUrl.EndsWith("/")) + _baseUrl += '/'; + } + } + + protected Newtonsoft.Json.JsonSerializerSettings JsonSerializerSettings { get { return _instanceSettings ?? _settings.Value; } } + + static partial void UpdateJsonSerializerSettings(Newtonsoft.Json.JsonSerializerSettings settings); + + partial void Initialize(); + + partial void PrepareRequest(System.Net.Http.HttpClient client, System.Net.Http.HttpRequestMessage request, string url); + partial void PrepareRequest(System.Net.Http.HttpClient client, System.Net.Http.HttpRequestMessage request, System.Text.StringBuilder urlBuilder); + partial void ProcessResponse(System.Net.Http.HttpClient client, System.Net.Http.HttpResponseMessage response); /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get the word graph that represents all possible translations of a segment of text + /// Get all files /// - /// The translation engine id - /// The source segment - /// The word graph result + /// A list of all files owned by the client /// A server side error occurred. - System.Threading.Tasks.Task GetWordGraphAsync(string id, string segment, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + public virtual async System.Threading.Tasks.Task> GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Method = new System.Net.Http.HttpMethod("GET"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); - /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. - /// - /// Incrementally train a translation engine with a segment pair - /// - /// - /// A segment pair consists of a source and target segment as well as a boolean flag `sentenceStart` - ///
that should be set to true if this segment pair forms the beginning of a sentence. (This information - ///
will be used to reconstruct proper capitalization when training/inferencing). - ///
- /// The translation engine id - /// The segment pair - /// The engine was trained successfully. - /// A server side error occurred. - System.Threading.Tasks.Task TrainSegmentAsync(string id, SegmentPair segmentPair, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "files" + urlBuilder_.Append("files"); - /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. - /// - /// Add a corpus to a translation engine - /// - /// - /// ## Parameters - ///
* **name**: A name to help identify and distinguish the corpus from other corpora - ///
* The name does not have to be unique since the corpus is uniquely identified by an auto-generated id - ///
* **sourceLanguage**: The source language code (See documentation on endpoint /translation/engines/ - "Create a new translation engine" for details on language codes). - ///
* Normally, this is the same as the engine sourceLanguage. This may change for future engines as a means of transfer learning. - ///
* **targetLanguage**: The target language code (See documentation on endpoint /translation/engines/ - "Create a new translation engine" for details on language codes). - ///
* **SourceFiles**: The source files associated with the corpus - ///
* **FileId**: The unique id referencing the uploaded file - ///
* **TextId**: The client-defined name to associate source and target files. - ///
* If the TextIds in the SourceFiles and TargetFiles match, they will be used to train the engine. - ///
* If selected for pretranslation when building, all SourceFiles that have no TargetFile, or lines of text in a SourceFile that have missing or blank lines in the TargetFile will be pretranslated. - ///
* If a TextId is used more than once in SourceFiles, the sources will be randomly and evenly mixed for training. - ///
* For pretranslating, multiple sources with the same TextId will be combined, but the first source will always take precedence (no random mixing). - ///
* For Paratext projects, TextId will be ignored - multiple Paratext source projects will always be mixed (as if they have the same TextId). - ///
* **TargetFiles**: The target files associated with the corpus - ///
* Same as SourceFiles, except only a single instance of a TextID or a single paratext project is supported. There is no mixing or combining of multiple targets. - ///
- /// The translation engine id - /// The corpus configuration (see remarks) - /// The added corpus - /// A server side error occurred. - System.Threading.Tasks.Task AddCorpusAsync(string id, TranslationCorpusConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + PrepareRequest(client_, request_, urlBuilder_); - /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. - /// - /// Get all corpora for a translation engine - /// - /// The translation engine id - /// The files - /// A server side error occurred. - System.Threading.Tasks.Task> GetAllCorporaAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); - /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. - /// - /// Update a corpus with a new set of files - /// - /// - /// See posting a new corpus for details of use. Will completely replace corpus' file associations. - ///
Will not affect jobs already queued or running. Will not affect existing pretranslations until new build is complete. - ///
- /// The translation engine id - /// The corpus id - /// The corpus configuration - /// The corpus was updated successfully - /// A server side error occurred. - System.Threading.Tasks.Task UpdateCorpusAsync(string id, string corpusId, TranslationCorpusUpdateConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + PrepareRequest(client_, request_, url_); - /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. - /// - /// Get the configuration of a corpus for a translation engine - /// - /// The translation engine id - /// The corpus id - /// The corpus configuration - /// A server side error occurred. - System.Threading.Tasks.Task GetCorpusAsync(string id, string corpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } - /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. - /// - /// Remove a corpus from a translation engine - /// - /// - /// Removing a corpus will remove all pretranslations associated with that corpus. - /// - /// The translation engine id - /// The corpus id - /// If true, all files associated with the corpus will be deleted as well (even if they are associated with other corpora). If false, no files will be deleted. - /// The corpus was deleted successfully. - /// A server side error occurred. - System.Threading.Tasks.Task DeleteCorpusAsync(string id, string corpusId, bool? deleteFiles = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + ProcessResponse(client_, response_); - /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. - /// - /// Get all pretranslations in a corpus of a translation engine - /// - /// - /// Pretranslations are arranged in a list of dictionaries with the following fields per pretranslation: - ///
* **TextId**: The TextId of the SourceFile defined when the corpus was created. - ///
* **Refs** (a list of strings): A list of references including: - ///
* The references defined in the SourceFile per line, if any. - ///
* An auto-generated reference of `[TextId]:[lineNumber]`, 1 indexed. - ///
* **Translation**: the text of the pretranslation - ///
- ///
Pretranslations can be filtered by text id if provided. - ///
Only pretranslations for the most recent successful build of the engine are returned. - ///
- /// The translation engine id - /// The corpus id - /// The text id (optional) - /// The pretranslations - /// A server side error occurred. - System.Threading.Tasks.Task> GetAllPretranslationsAsync(string id, string corpusId, string? textId = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync>(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details. ", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get all pretranslations for the specified text in a corpus of a translation engine + /// Upload a new file /// /// - /// Pretranslations are arranged in a list of dictionaries with the following fields per pretranslation: - ///
* **TextId**: The TextId of the SourceFile defined when the corpus was created. - ///
* **Refs** (a list of strings): A list of references including: - ///
* The references defined in the SourceFile per line, if any. - ///
* An auto-generated reference of `[TextId]:[lineNumber]`, 1 indexed. - ///
* **Translation**: the text of the pretranslation + /// Sample request: ///
- ///
Only pretranslations for the most recent successful build of the engine are returned. + ///
POST /files + ///
{ + ///
"format": "text", + ///
"name": "myTeam:myProject:myFile.txt" + ///
} ///
- /// The translation engine id - /// The corpus id - /// The text id - /// The pretranslations + /// The file to upload. Max size: 100MB + /// File format options: + ///
* **Text**: One translation unit (a.k.a., verse) per line + ///
* If a line contains a tab, characters before the tab are used as a unique identifier for the line, characters after the tab are understood as the content of the verse, and if there is another tab following the verse content, characters after this second tab are assumed to be column codes like "ss" etc. for sectioning and other formatting. See this example of a tab-delimited text file: + ///
> verse_001_005 (tab) Ὑπομνῆσαι δὲ ὑμᾶς βούλομαι , εἰδότας ὑμᾶς ἅπαξ τοῦτο + ///
> verse_001_006 (tab) Ἀγγέλους τε τοὺς μὴ τηρήσαντας τὴν ἑαυτῶν ἀρχήν , ἀλλὰ (tab) ss + ///
> verse_001_007 (tab) Ὡς Σόδομα καὶ Γόμορρα , καὶ αἱ περὶ αὐτὰς πόλεις (tab) ss + ///
* Otherwise, *no tabs* should be used in the file and a unique identifier will generated for each translation unit based on the line number. + ///
* **Paratext**: A complete, zipped Paratext project backup: that is, a .zip archive of files including the USFM files and "Settings.xml" file. To generate a zipped backup for a project in Paratext, navigate to "Paratext/Advanced/Backup project to file..." and follow the dialogue. + ///
* USFM files in paratext projects have unique identifiers assigned per segment for scripture and non-scripture content according to [this guide](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation) + /// A name to help identify and distinguish the file. + ///
Recommendation: Create a multi-part name to distinguish between projects, uses, languages, etc. + ///
The name does not have to be unique. + ///
Example: myTranslationTeam:myProject:myLanguage:myFile.txt + /// The file was created successfully /// A server side error occurred. - System.Threading.Tasks.Task> GetPretranslationsByTextIdAsync(string id, string corpusId, string textId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + public virtual async System.Threading.Tasks.Task CreateAsync(FileParameter file, FileFormat format, string? name = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + var boundary_ = System.Guid.NewGuid().ToString(); + var content_ = new System.Net.Http.MultipartFormDataContent(boundary_); + content_.Headers.Remove("Content-Type"); + content_.Headers.TryAddWithoutValidation("Content-Type", "multipart/form-data; boundary=" + boundary_); - /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. - /// - /// Get a pretranslated Scripture book in USFM format. - /// - /// - /// The text that populates the USFM structure can be controlled by the `textOrigin` parameter: - ///
* `PreferExisting`: The existing and pretranslated texts are merged into the USFM, preferring existing text. **This is the default**. - ///
* `PreferPretranslated`: The existing and pretranslated texts are merged into the USFM, preferring pretranslated text. - ///
* `OnlyExisting`: Return the existing target USFM file with no modifications (except updating the USFM id if needed). - ///
* `OnlyPretranslated`: Only the pretranslated text is returned; all existing text in the target USFM is removed. - ///
- ///
The source or target book can be used as the USFM template for the pretranslated text. The template can be controlled by the `template` parameter: - ///
* `Auto`: The target book is used as the template if it exists; otherwise, the source book is used. **This is the default**. - ///
* `Source`: The source book is used as the template. - ///
* `Target`: The target book is used as the template. - ///
- ///
Only pretranslations for the most recent successful build of the engine are returned. - ///
Both scripture and non-scripture text in the USFM is parsed and grouped according to [this wiki](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation). - ///
- /// The translation engine id - /// The corpus id - /// The text id - /// The source[s] of the data to populate the USFM file with. - /// The book in USFM format + if (file == null) + throw new System.ArgumentNullException("file"); + else + { + var content_file_ = new System.Net.Http.StreamContent(file.Data); + if (!string.IsNullOrEmpty(file.ContentType)) + content_file_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse(file.ContentType); + content_.Add(content_file_, "file", file.FileName ?? "file"); + } + + if (format == null) + throw new System.ArgumentNullException("format"); + else + { + content_.Add(new System.Net.Http.StringContent(ConvertToString(format, System.Globalization.CultureInfo.InvariantCulture)), "format"); + } + + if (name != null) + { + content_.Add(new System.Net.Http.StringContent(ConvertToString(name, System.Globalization.CultureInfo.InvariantCulture)), "name"); + } + request_.Content = content_; + request_.Method = new System.Net.Http.HttpMethod("POST"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "files" + urlBuilder_.Append("files"); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 201) + { + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 400) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("Bad request. Is the file over 100 MB?", status_, responseText_, headers_, null); + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details. ", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get a file by unique id + /// + /// The unique identifier for the file + /// The file exists + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task GetAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Method = new System.Net.Http.HttpMethod("GET"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "files/{id}" + urlBuilder_.Append("files/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the file", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The file does not exist", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details. ", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Update an existing file + /// + /// The existing file's unique id + /// The updated file + /// The file was updated successfully + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task UpdateAsync(string id, FileParameter file, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + var boundary_ = System.Guid.NewGuid().ToString(); + var content_ = new System.Net.Http.MultipartFormDataContent(boundary_); + content_.Headers.Remove("Content-Type"); + content_.Headers.TryAddWithoutValidation("Content-Type", "multipart/form-data; boundary=" + boundary_); + + if (file == null) + throw new System.ArgumentNullException("file"); + else + { + var content_file_ = new System.Net.Http.StreamContent(file.Data); + if (!string.IsNullOrEmpty(file.ContentType)) + content_file_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse(file.ContentType); + content_.Add(content_file_, "file", file.FileName ?? "file"); + } + request_.Content = content_; + request_.Method = new System.Net.Http.HttpMethod("PATCH"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "files/{id}" + urlBuilder_.Append("files/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 400) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("Bad request. Is the file over 100 MB?", status_, responseText_, headers_, null); + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the file", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The file does not exist and therefore cannot be updated", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details. ", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Delete an existing file + /// + /// + /// If a file is in a corpus and the file is deleted, it will be automatically removed from that corpus. + ///
If a build job has started before the file was deleted, the file will be used for the build job, even + ///
though it will no longer be accessible through the API. + ///
+ /// The existing file's unique id + /// The file was deleted successfully + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task DeleteAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Method = new System.Net.Http.HttpMethod("DELETE"); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "files/{id}" + urlBuilder_.Append("files/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + return; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the file", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The file does not exist and therefore cannot be deleted", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details. ", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Download a file + /// + /// The unique identifier for the file + /// The file exists + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task DownloadAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Content = new System.Net.Http.StringContent(string.Empty, System.Text.Encoding.UTF8, "application/octet-stream"); + request_.Method = new System.Net.Http.HttpMethod("POST"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/octet-stream")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "files/{id}/contents" + urlBuilder_.Append("files/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/contents"); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200 || status_ == 206) + { + var responseStream_ = response_.Content == null ? System.IO.Stream.Null : await response_.Content.ReadAsStreamAsync().ConfigureAwait(false); + var fileResponse_ = new FileResponse(status_, headers_, responseStream_, null, response_); + disposeClient_ = false; disposeResponse_ = false; // response and client are disposed by FileResponse + return fileResponse_; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the file", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The file does not exist", status_, responseText_, headers_, null); + } + else + if (status_ == 500) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The data file is corrupted. Please try reuploading or recreating the file.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + protected struct ObjectResponseResult + { + public ObjectResponseResult(T responseObject, string responseText) + { + this.Object = responseObject; + this.Text = responseText; + } + + public T Object { get; } + + public string Text { get; } + } + + public bool ReadResponseAsString { get; set; } + + protected virtual async System.Threading.Tasks.Task> ReadObjectResponseAsync(System.Net.Http.HttpResponseMessage response, System.Collections.Generic.IReadOnlyDictionary> headers, System.Threading.CancellationToken cancellationToken) + { + if (response == null || response.Content == null) + { + return new ObjectResponseResult(default(T)!, string.Empty); + } + + if (ReadResponseAsString) + { + var responseText = await response.Content.ReadAsStringAsync().ConfigureAwait(false); + try + { + var typedBody = Newtonsoft.Json.JsonConvert.DeserializeObject(responseText, JsonSerializerSettings); + return new ObjectResponseResult(typedBody!, responseText); + } + catch (Newtonsoft.Json.JsonException exception) + { + var message = "Could not deserialize the response body string as " + typeof(T).FullName + "."; + throw new ServalApiException(message, (int)response.StatusCode, responseText, headers, exception); + } + } + else + { + try + { + using (var responseStream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false)) + using (var streamReader = new System.IO.StreamReader(responseStream)) + using (var jsonTextReader = new Newtonsoft.Json.JsonTextReader(streamReader)) + { + var serializer = Newtonsoft.Json.JsonSerializer.Create(JsonSerializerSettings); + var typedBody = serializer.Deserialize(jsonTextReader); + return new ObjectResponseResult(typedBody!, string.Empty); + } + } + catch (Newtonsoft.Json.JsonException exception) + { + var message = "Could not deserialize the response body stream as " + typeof(T).FullName + "."; + throw new ServalApiException(message, (int)response.StatusCode, string.Empty, headers, exception); + } + } + } + + private string ConvertToString(object? value, System.Globalization.CultureInfo cultureInfo) + { + if (value == null) + { + return ""; + } + + if (value is System.Enum) + { + var name = System.Enum.GetName(value.GetType(), value); + if (name != null) + { + var field = System.Reflection.IntrospectionExtensions.GetTypeInfo(value.GetType()).GetDeclaredField(name); + if (field != null) + { + var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) + as System.Runtime.Serialization.EnumMemberAttribute; + if (attribute != null) + { + return attribute.Value != null ? attribute.Value : name; + } + } + + var converted = System.Convert.ToString(System.Convert.ChangeType(value, System.Enum.GetUnderlyingType(value.GetType()), cultureInfo)); + return converted == null ? string.Empty : converted; + } + } + else if (value is bool) + { + return System.Convert.ToString((bool)value, cultureInfo).ToLowerInvariant(); + } + else if (value is byte[]) + { + return System.Convert.ToBase64String((byte[]) value); + } + else if (value is string[]) + { + return string.Join(",", (string[])value); + } + else if (value.GetType().IsArray) + { + var valueArray = (System.Array)value; + var valueTextArray = new string[valueArray.Length]; + for (var i = 0; i < valueArray.Length; i++) + { + valueTextArray[i] = ConvertToString(valueArray.GetValue(i), cultureInfo); + } + return string.Join(",", valueTextArray); + } + + var result = System.Convert.ToString(value, cultureInfo); + return result == null ? "" : result; + } + } + + [System.CodeDom.Compiler.GeneratedCode("NSwag", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial interface ITranslationEnginesClient + { + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get all translation engines + /// + /// The engines + /// A server side error occurred. + System.Threading.Tasks.Task> GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Create a new translation engine + /// + /// + /// ## Parameters + ///
* **name**: (optional) A name to help identify and distinguish the file. + ///
* Recommendation: Create a multi-part name to distinguish between projects, uses, etc. + ///
* The name does not have to be unique, as the engine is uniquely identified by the auto-generated id + ///
* **sourceLanguage**: The source language code (a valid [IETF language tag](https://en.wikipedia.org/wiki/IETF_language_tag) is recommended) + ///
* **targetLanguage**: The target language code (a valid IETF language tag is recommended) + ///
* **type**: **smt-transfer** or **nmt** or **echo** + ///
* **isModelPersisted**: (optional) - see below + ///
### smt-transfer + ///
The Statistical Machine Translation Transfer Learning engine is primarily used for translation suggestions. Typical endpoints: translate, get-word-graph, train-segment + ///
* **IsModelPersisted**: (default to true) All models are persistent and can be updated with train-segment. False is not supported. + ///
### nmt + ///
The Neural Machine Translation engine is primarily used for pretranslations. It is fine-tuned from Meta's NLLB-200. Valid IETF language tags provided to Serval will be converted to [NLLB-200 codes](https://github.com/facebookresearch/flores/tree/main/flores200#languages-in-flores-200). See more about language tag resolution [here](https://github.com/sillsdev/serval/wiki/FLORES%E2%80%90200-Language-Code-Resolution-for-NMT-Engine). + ///
* **IsModelPersisted**: (default to false) Whether the model can be downloaded by the client after it has been successfully built. + ///
+ ///
If you use a language among NLLB's supported languages, Serval will utilize everything the NLLB-200 model already knows about that language when translating. If the language you are working with is not among NLLB's supported languages, the language code will have no effect. + ///
+ ///
Typical endpoints: pretranslate + ///
### echo + ///
The echo engine has full coverage of all nmt and smt-transfer endpoints. Endpoints like create and build return empty responses. Endpoints like translate and get-word-graph echo the sent content back to the user in a format that mocks nmt or Smt. For example, translating a segment "test" with the echo engine would yield a translation response with translation "test". This engine is useful for debugging and testing purposes. + ///
## Sample request: + ///
+ ///
{ + ///
"name": "myTeam:myProject:myEngine", + ///
"sourceLanguage": "el", + ///
"targetLanguage": "en", + ///
"type": "nmt" + ///
"IsModelPersisted": true + ///
} + ///
+ /// The translation engine configuration (see above) + /// The new translation engine + /// A server side error occurred. + System.Threading.Tasks.Task CreateAsync(TranslationEngineConfig engineConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get a translation engine by unique id + /// + /// The translation engine id + /// The translation engine + /// A server side error occurred. + System.Threading.Tasks.Task GetAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Delete a translation engine + /// + /// The translation engine id + /// The engine was successfully deleted. + /// A server side error occurred. + System.Threading.Tasks.Task DeleteAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Translate a segment of text + /// + /// The translation engine id + /// The source segment + /// The translation result + /// A server side error occurred. + System.Threading.Tasks.Task TranslateAsync(string id, string segment, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Translates a segment of text into the top N results + /// + /// The translation engine id + /// The number of translations to generate + /// The source segment + /// The translation results + /// A server side error occurred. + System.Threading.Tasks.Task> TranslateNAsync(string id, int n, string segment, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get the word graph that represents all possible translations of a segment of text + /// + /// The translation engine id + /// The source segment + /// The word graph result + /// A server side error occurred. + System.Threading.Tasks.Task GetWordGraphAsync(string id, string segment, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Incrementally train a translation engine with a segment pair + /// + /// + /// A segment pair consists of a source and target segment as well as a boolean flag `sentenceStart` + ///
that should be set to true if this segment pair forms the beginning of a sentence. (This information + ///
will be used to reconstruct proper capitalization when training/inferencing). + ///
+ /// The translation engine id + /// The segment pair + /// The engine was trained successfully. + /// A server side error occurred. + System.Threading.Tasks.Task TrainSegmentAsync(string id, SegmentPair segmentPair, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Add a corpus to a translation engine + /// + /// + /// ## Parameters + ///
* **name**: A name to help identify and distinguish the corpus from other corpora + ///
* The name does not have to be unique since the corpus is uniquely identified by an auto-generated id + ///
* **sourceLanguage**: The source language code (See documentation on endpoint /translation/engines/ - "Create a new translation engine" for details on language codes). + ///
* Normally, this is the same as the engine sourceLanguage. This may change for future engines as a means of transfer learning. + ///
* **targetLanguage**: The target language code (See documentation on endpoint /translation/engines/ - "Create a new translation engine" for details on language codes). + ///
* **SourceFiles**: The source files associated with the corpus + ///
* **FileId**: The unique id referencing the uploaded file + ///
* **TextId**: The client-defined name to associate source and target files. + ///
* If the TextIds in the SourceFiles and TargetFiles match, they will be used to train the engine. + ///
* If selected for pretranslation when building, all SourceFiles that have no TargetFile, or lines of text in a SourceFile that have missing or blank lines in the TargetFile will be pretranslated. + ///
* If a TextId is used more than once in SourceFiles, the sources will be randomly and evenly mixed for training. + ///
* For pretranslating, multiple sources with the same TextId will be combined, but the first source will always take precedence (no random mixing). + ///
* For Paratext projects, TextId will be ignored - multiple Paratext source projects will always be mixed (as if they have the same TextId). + ///
* **TargetFiles**: The target files associated with the corpus + ///
* Same as SourceFiles, except only a single instance of a TextID or a single paratext project is supported. There is no mixing or combining of multiple targets. + ///
+ /// The translation engine id + /// The corpus configuration (see remarks) + /// The added corpus + /// A server side error occurred. + System.Threading.Tasks.Task AddCorpusAsync(string id, TranslationCorpusConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get all corpora for a translation engine + /// + /// The translation engine id + /// The corpora + /// A server side error occurred. + System.Threading.Tasks.Task> GetAllCorporaAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Update a corpus with a new set of files + /// + /// + /// See posting a new corpus for details of use. Will completely replace corpus' file associations. + ///
Will not affect jobs already queued or running. Will not affect existing pretranslations until new build is complete. + ///
+ /// The translation engine id + /// The corpus id + /// The corpus configuration + /// The corpus was updated successfully + /// A server side error occurred. + System.Threading.Tasks.Task UpdateCorpusAsync(string id, string corpusId, TranslationCorpusUpdateConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get the configuration of a corpus for a translation engine + /// + /// The translation engine id + /// The corpus id + /// The corpus configuration + /// A server side error occurred. + System.Threading.Tasks.Task GetCorpusAsync(string id, string corpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Remove a corpus from a translation engine + /// + /// + /// Removing a corpus will remove all pretranslations associated with that corpus. + /// + /// The translation engine id + /// The corpus id + /// If true, all files associated with the corpus will be deleted as well (even if they are associated with other corpora). If false, no files will be deleted. + /// The corpus was deleted successfully. + /// A server side error occurred. + System.Threading.Tasks.Task DeleteCorpusAsync(string id, string corpusId, bool? deleteFiles = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Add a corpus to a translation engine + /// + /// + /// ## Parameters + ///
* **name**: A name to help identify and distinguish the corpus from other corpora + ///
* The name does not have to be unique since the corpus is uniquely identified by an auto-generated id + ///
* **sourceLanguage**: The source language code (See documentation on endpoint /translation/engines/ - "Create a new translation engine" for details on language codes). + ///
* Normally, this is the same as the engine sourceLanguage. This may change for future engines as a means of transfer learning. + ///
* **targetLanguage**: The target language code (See documentation on endpoint /translation/engines/ - "Create a new translation engine" for details on language codes). + ///
* **SourceFiles**: The source files associated with the corpus + ///
* **FileId**: The unique id referencing the uploaded file + ///
* **TextId**: The client-defined name to associate source and target files. + ///
* If the TextIds in the SourceFiles and TargetFiles match, they will be used to train the engine. + ///
* If selected for pretranslation when building, all SourceFiles that have no TargetFile, or lines of text in a SourceFile that have missing or blank lines in the TargetFile will be pretranslated. + ///
* If a TextId is used more than once in SourceFiles, the sources will be randomly and evenly mixed for training. + ///
* For pretranslating, multiple sources with the same TextId will be combined, but the first source will always take precedence (no random mixing). + ///
* For Paratext projects, TextId will be ignored - multiple Paratext source projects will always be mixed (as if they have the same TextId). + ///
* **TargetFiles**: The target files associated with the corpus + ///
* Same as SourceFiles, except only a single instance of a TextID or a single paratext project is supported. There is no mixing or combining of multiple targets. + ///
+ /// The translation engine id + /// The corpus configuration (see remarks) + /// The added corpus + /// A server side error occurred. + System.Threading.Tasks.Task AddParallelCorpusAsync(string id, TranslationParallelCorpusConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get all parallel corpora for a translation engine + /// + /// The translation engine id + /// The parallel corpora + /// A server side error occurred. + System.Threading.Tasks.Task> GetAllParallelCorporaAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Update a corpus with a new set of files + /// + /// + /// Will completely replace corpus' file associations. Will not affect jobs already queued or running. Will not affect existing pretranslations until new build is complete. + /// + /// The translation engine id + /// The corpus id + /// The corpus configuration + /// The corpus was updated successfully + /// A server side error occurred. + System.Threading.Tasks.Task UpdateParallelCorpusAsync(string id, string parallelCorpusId, TranslationParallelCorpusUpdateConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get the configuration of a parallel corpus for a translation engine + /// + /// The translation engine id + /// The parallel corpus id + /// The parallel corpus configuration + /// A server side error occurred. + System.Threading.Tasks.Task GetParallelCorpusAsync(string id, string parallelCorpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Remove a parallel corpus from a translation engine + /// + /// + /// Removing a parallel corpus will remove all pretranslations associated with that corpus. + /// + /// The translation engine id + /// The parallel corpus id + /// The parallel corpus was deleted successfully. + /// A server side error occurred. + System.Threading.Tasks.Task DeleteParallelCorpusAsync(string id, string parallelCorpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get all pretranslations in a corpus of a translation engine + /// + /// + /// Pretranslations are arranged in a list of dictionaries with the following fields per pretranslation: + ///
* **TextId**: The TextId of the SourceFile defined when the corpus was created. + ///
* **Refs** (a list of strings): A list of references including: + ///
* The references defined in the SourceFile per line, if any. + ///
* An auto-generated reference of `[TextId]:[lineNumber]`, 1 indexed. + ///
* **Translation**: the text of the pretranslation + ///
+ ///
Pretranslations can be filtered by text id if provided. + ///
Only pretranslations for the most recent successful build of the engine are returned. + ///
+ /// The translation engine id + /// The corpus id + /// The text id (optional) + /// The pretranslations + /// A server side error occurred. + System.Threading.Tasks.Task> GetAllPretranslationsAsync(string id, string corpusId, string? textId = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get all pretranslations for the specified text in a corpus of a translation engine + /// + /// + /// Pretranslations are arranged in a list of dictionaries with the following fields per pretranslation: + ///
* **TextId**: The TextId of the SourceFile defined when the corpus was created. + ///
* **Refs** (a list of strings): A list of references including: + ///
* The references defined in the SourceFile per line, if any. + ///
* An auto-generated reference of `[TextId]:[lineNumber]`, 1 indexed. + ///
* **Translation**: the text of the pretranslation + ///
+ ///
Only pretranslations for the most recent successful build of the engine are returned. + ///
+ /// The translation engine id + /// The corpus id + /// The text id + /// The pretranslations + /// A server side error occurred. + System.Threading.Tasks.Task> GetPretranslationsByTextIdAsync(string id, string corpusId, string textId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get a pretranslated Scripture book in USFM format. + /// + /// + /// The text that populates the USFM structure can be controlled by the `textOrigin` parameter: + ///
* `PreferExisting`: The existing and pretranslated texts are merged into the USFM, preferring existing text. **This is the default**. + ///
* `PreferPretranslated`: The existing and pretranslated texts are merged into the USFM, preferring pretranslated text. + ///
* `OnlyExisting`: Return the existing target USFM file with no modifications (except updating the USFM id if needed). + ///
* `OnlyPretranslated`: Only the pretranslated text is returned; all existing text in the target USFM is removed. + ///
+ ///
The source or target book can be used as the USFM template for the pretranslated text. The template can be controlled by the `template` parameter: + ///
* `Auto`: The target book is used as the template if it exists; otherwise, the source book is used. **This is the default**. + ///
* `Source`: The source book is used as the template. + ///
* `Target`: The target book is used as the template. + ///
+ ///
Only pretranslations for the most recent successful build of the engine are returned. + ///
Both scripture and non-scripture text in the USFM is parsed and grouped according to [this wiki](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation). + ///
+ /// The translation engine id + /// The corpus id + /// The text id + /// The source[s] of the data to populate the USFM file with. + /// The book in USFM format /// A server side error occurred. System.Threading.Tasks.Task GetPretranslatedUsfmAsync(string id, string corpusId, string textId, PretranslationUsfmTextOrigin? textOrigin = null, PretranslationUsfmTemplate? template = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get all build jobs for a translation engine + /// Get all build jobs for a translation engine + /// + /// The translation engine id + /// The build jobs + /// A server side error occurred. + System.Threading.Tasks.Task> GetAllBuildsAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Starts a build job for a translation engine. + /// + /// + /// Specify the corpora and textIds to train on. If no "trainOn" field is provided, all corpora will be used. + ///
Paratext Projects, you may flag a subset of books for training by including their [abbreviations] + ///
Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training. + ///
Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range) + ///
All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. + ///
+ ///
Specify the corpora or textIds to pretranslate. When a corpus or textId is selected for pretranslation, + ///
the following text will be pretranslated: + ///
* Text segments that are in the source and not the target (untranslated) + ///
* Text segments that are in the source and the target, but where that target segment is not trained on. + ///
If the engine does not support pretranslation, these fields have no effect. + ///
Pretranslating has the same filtering as training. + ///
+ ///
The `"options"` parameter of the build config provides the ability to pass build configuration parameters as a JSON object. + ///
See [nmt job settings documentation](https://github.com/sillsdev/serval/wiki/NMT-Build-Options) about configuring job parameters. + ///
See [keyterms parsing documentation](https://github.com/sillsdev/serval/wiki/Paratext-Key-Terms-Parsing) on how to use keyterms for training. + ///
+ /// The translation engine id + /// The build config (see remarks) + /// The new build job + /// A server side error occurred. + System.Threading.Tasks.Task StartBuildAsync(string id, TranslationBuildConfig buildConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get a build job + /// + /// + /// If the `minRevision` is not defined, the current build, at whatever state it is, + ///
will be immediately returned. If `minRevision` is defined, Serval will wait for + ///
up to 40 seconds for the engine to build to the `minRevision` specified, else + ///
will timeout. + ///
A use case is to actively query the state of the current build, where the subsequent + ///
request sets the `minRevision` to the returned `revision` + 1 and timeouts are handled gracefully. + ///
This method should use request throttling. + ///
Note: Within the returned build, percentCompleted is a value between 0 and 1. + ///
+ /// The translation engine id + /// The build job id + /// The minimum revision + /// The build job + /// A server side error occurred. + System.Threading.Tasks.Task GetBuildAsync(string id, string buildId, long? minRevision = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get the currently running build job for a translation engine + /// + /// + /// See documentation on endpoint /translation/engines/{id}/builds/{id} - "Get a Build Job" for details on using `minRevision`. + /// + /// The translation engine id + /// The minimum revision + /// The build job + /// A server side error occurred. + System.Threading.Tasks.Task GetCurrentBuildAsync(string id, long? minRevision = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Cancel the current build job (whether pending or active) for a translation engine + /// + /// The translation engine id + /// The build job was cancelled successfully. + /// A server side error occurred. + System.Threading.Tasks.Task CancelBuildAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Let a link to download the NMT translation model of the last build that was successfully saved. + /// + /// + /// If a Nmt build was successful and IsModelPersisted is `true` for the engine, + ///
then the model from the most recent successful build can be downloaded. + ///
+ ///
The endpoint will return a URL that can be used to download the model for up to 1 hour + ///
after the request is made. If the URL is not used within that time, a new request will need to be made. + ///
+ ///
The download itself is created by g-zipping together the folder containing the fine tuned model + ///
with all necessary supporting files. This zipped folder is then named by the pattern: + ///
* <engine_id>_<model_revision>.tar.gz + ///
+ /// The translation engine id + /// The url to download the model. + /// A server side error occurred. + System.Threading.Tasks.Task GetModelDownloadUrlAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + } + + [System.CodeDom.Compiler.GeneratedCode("NSwag", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class TranslationEnginesClient : ITranslationEnginesClient + { + #pragma warning disable 8618 + private string _baseUrl; + #pragma warning restore 8618 + + private System.Net.Http.HttpClient _httpClient; + private static System.Lazy _settings = new System.Lazy(CreateSerializerSettings, true); + private Newtonsoft.Json.JsonSerializerSettings _instanceSettings; + + #pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + public TranslationEnginesClient(System.Net.Http.HttpClient httpClient) + #pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + { + BaseUrl = "/api/v1"; + _httpClient = httpClient; + Initialize(); + } + + private static Newtonsoft.Json.JsonSerializerSettings CreateSerializerSettings() + { + var settings = new Newtonsoft.Json.JsonSerializerSettings(); + UpdateJsonSerializerSettings(settings); + return settings; + } + + public string BaseUrl + { + get { return _baseUrl; } + set + { + _baseUrl = value; + if (!string.IsNullOrEmpty(_baseUrl) && !_baseUrl.EndsWith("/")) + _baseUrl += '/'; + } + } + + protected Newtonsoft.Json.JsonSerializerSettings JsonSerializerSettings { get { return _instanceSettings ?? _settings.Value; } } + + static partial void UpdateJsonSerializerSettings(Newtonsoft.Json.JsonSerializerSettings settings); + + partial void Initialize(); + + partial void PrepareRequest(System.Net.Http.HttpClient client, System.Net.Http.HttpRequestMessage request, string url); + partial void PrepareRequest(System.Net.Http.HttpClient client, System.Net.Http.HttpRequestMessage request, System.Text.StringBuilder urlBuilder); + partial void ProcessResponse(System.Net.Http.HttpClient client, System.Net.Http.HttpResponseMessage response); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get all translation engines /// - /// The translation engine id - /// The build jobs + /// The engines /// A server side error occurred. - System.Threading.Tasks.Task> GetAllBuildsAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + public virtual async System.Threading.Tasks.Task> GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Method = new System.Net.Http.HttpMethod("GET"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "translation/engines" + urlBuilder_.Append("translation/engines"); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync>(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Starts a build job for a translation engine. + /// Create a new translation engine /// /// - /// Specify the corpora and textIds to train on. If no "trainOn" field is provided, all corpora will be used. - ///
Paratext Projects, you may flag a subset of books for training by including their [abbreviations] - ///
Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training. - ///
Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range) - ///
All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. + /// ## Parameters + ///
* **name**: (optional) A name to help identify and distinguish the file. + ///
* Recommendation: Create a multi-part name to distinguish between projects, uses, etc. + ///
* The name does not have to be unique, as the engine is uniquely identified by the auto-generated id + ///
* **sourceLanguage**: The source language code (a valid [IETF language tag](https://en.wikipedia.org/wiki/IETF_language_tag) is recommended) + ///
* **targetLanguage**: The target language code (a valid IETF language tag is recommended) + ///
* **type**: **smt-transfer** or **nmt** or **echo** + ///
* **isModelPersisted**: (optional) - see below + ///
### smt-transfer + ///
The Statistical Machine Translation Transfer Learning engine is primarily used for translation suggestions. Typical endpoints: translate, get-word-graph, train-segment + ///
* **IsModelPersisted**: (default to true) All models are persistent and can be updated with train-segment. False is not supported. + ///
### nmt + ///
The Neural Machine Translation engine is primarily used for pretranslations. It is fine-tuned from Meta's NLLB-200. Valid IETF language tags provided to Serval will be converted to [NLLB-200 codes](https://github.com/facebookresearch/flores/tree/main/flores200#languages-in-flores-200). See more about language tag resolution [here](https://github.com/sillsdev/serval/wiki/FLORES%E2%80%90200-Language-Code-Resolution-for-NMT-Engine). + ///
* **IsModelPersisted**: (default to false) Whether the model can be downloaded by the client after it has been successfully built. ///
- ///
Specify the corpora or textIds to pretranslate. When a corpus or textId is selected for pretranslation, - ///
the following text will be pretranslated: - ///
* Text segments that are in the source and not the target (untranslated) - ///
* Text segments that are in the source and the target, but where that target segment is not trained on. - ///
If the engine does not support pretranslation, these fields have no effect. - ///
Pretranslating has the same filtering as training. + ///
If you use a language among NLLB's supported languages, Serval will utilize everything the NLLB-200 model already knows about that language when translating. If the language you are working with is not among NLLB's supported languages, the language code will have no effect. ///
- ///
The `"options"` parameter of the build config provides the ability to pass build configuration parameters as a JSON object. - ///
See [nmt job settings documentation](https://github.com/sillsdev/serval/wiki/NMT-Build-Options) about configuring job parameters. - ///
See [keyterms parsing documentation](https://github.com/sillsdev/serval/wiki/Paratext-Key-Terms-Parsing) on how to use keyterms for training. + ///
Typical endpoints: pretranslate + ///
### echo + ///
The echo engine has full coverage of all nmt and smt-transfer endpoints. Endpoints like create and build return empty responses. Endpoints like translate and get-word-graph echo the sent content back to the user in a format that mocks nmt or Smt. For example, translating a segment "test" with the echo engine would yield a translation response with translation "test". This engine is useful for debugging and testing purposes. + ///
## Sample request: + ///
+ ///
{ + ///
"name": "myTeam:myProject:myEngine", + ///
"sourceLanguage": "el", + ///
"targetLanguage": "en", + ///
"type": "nmt" + ///
"IsModelPersisted": true + ///
} ///
- /// The translation engine id - /// The build config (see remarks) - /// The new build job + /// The translation engine configuration (see above) + /// The new translation engine /// A server side error occurred. - System.Threading.Tasks.Task StartBuildAsync(string id, TranslationBuildConfig buildConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + public virtual async System.Threading.Tasks.Task CreateAsync(TranslationEngineConfig engineConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (engineConfig == null) + throw new System.ArgumentNullException("engineConfig"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(engineConfig, JsonSerializerSettings); + var content_ = new System.Net.Http.StringContent(json_); + content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); + request_.Content = content_; + request_.Method = new System.Net.Http.HttpMethod("POST"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "translation/engines" + urlBuilder_.Append("translation/engines"); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 201) + { + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 400) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("Bad request. Is the engine type correct?", status_, responseText_, headers_, null); + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the translation engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get a build job + /// Get a translation engine by unique id /// - /// - /// If the `minRevision` is not defined, the current build, at whatever state it is, - ///
will be immediately returned. If `minRevision` is defined, Serval will wait for - ///
up to 40 seconds for the engine to build to the `minRevision` specified, else - ///
will timeout. - ///
A use case is to actively query the state of the current build, where the subsequent - ///
request sets the `minRevision` to the returned `revision` + 1 and timeouts are handled gracefully. - ///
This method should use request throttling. - ///
Note: Within the returned build, percentCompleted is a value between 0 and 1. - ///
/// The translation engine id - /// The build job id - /// The minimum revision - /// The build job + /// The translation engine /// A server side error occurred. - System.Threading.Tasks.Task GetBuildAsync(string id, string buildId, long? minRevision = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + public virtual async System.Threading.Tasks.Task GetAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); - /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. - /// - /// Get the currently running build job for a translation engine - /// - /// - /// See documentation on endpoint /translation/engines/{id}/builds/{id} - "Get a Build Job" for details on using `minRevision`. - /// - /// The translation engine id - /// The minimum revision - /// The build job - /// A server side error occurred. - System.Threading.Tasks.Task GetCurrentBuildAsync(string id, long? minRevision = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Method = new System.Net.Http.HttpMethod("GET"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "translation/engines/{id}" + urlBuilder_.Append("translation/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the translation engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Cancel the current build job (whether pending or active) for a translation engine + /// Delete a translation engine /// /// The translation engine id - /// The build job was cancelled successfully. + /// The engine was successfully deleted. /// A server side error occurred. - System.Threading.Tasks.Task CancelBuildAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + public virtual async System.Threading.Tasks.Task DeleteAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Method = new System.Net.Http.HttpMethod("DELETE"); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "translation/engines/{id}" + urlBuilder_.Append("translation/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + return; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the translation engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine does not exist and therefore cannot be deleted.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Let a link to download the NMT translation model of the last build that was successfully saved. + /// Translate a segment of text /// - /// - /// If a Nmt build was successful and IsModelPersisted is `true` for the engine, - ///
then the model from the most recent successful build can be downloaded. - ///
- ///
The endpoint will return a URL that can be used to download the model for up to 1 hour - ///
after the request is made. If the URL is not used within that time, a new request will need to be made. - ///
- ///
The download itself is created by g-zipping together the folder containing the fine tuned model - ///
with all necessary supporting files. This zipped folder is then named by the pattern: - ///
* <engine_id>_<model_revision>.tar.gz - ///
/// The translation engine id - /// The url to download the model. + /// The source segment + /// The translation result /// A server side error occurred. - System.Threading.Tasks.Task GetModelDownloadUrlAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + public virtual async System.Threading.Tasks.Task TranslateAsync(string id, string segment, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + if (segment == null) + throw new System.ArgumentNullException("segment"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(segment, JsonSerializerSettings); + var content_ = new System.Net.Http.StringContent(json_); + content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); + request_.Content = content_; + request_.Method = new System.Net.Http.HttpMethod("POST"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); - } + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "translation/engines/{id}/translate" + urlBuilder_.Append("translation/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/translate"); - [System.CodeDom.Compiler.GeneratedCode("NSwag", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class TranslationEnginesClient : ITranslationEnginesClient - { - #pragma warning disable 8618 - private string _baseUrl; - #pragma warning restore 8618 + PrepareRequest(client_, request_, urlBuilder_); - private System.Net.Http.HttpClient _httpClient; - private static System.Lazy _settings = new System.Lazy(CreateSerializerSettings, true); - private Newtonsoft.Json.JsonSerializerSettings _instanceSettings; + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); - #pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. - public TranslationEnginesClient(System.Net.Http.HttpClient httpClient) - #pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. - { - BaseUrl = "/api/v1"; - _httpClient = httpClient; - Initialize(); - } + PrepareRequest(client_, request_, url_); - private static Newtonsoft.Json.JsonSerializerSettings CreateSerializerSettings() - { - var settings = new Newtonsoft.Json.JsonSerializerSettings(); - UpdateJsonSerializerSettings(settings); - return settings; - } + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } - public string BaseUrl - { - get { return _baseUrl; } - set + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 400) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("Bad request", status_, responseText_, headers_, null); + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the translation engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 405) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The method is not supported.", status_, responseText_, headers_, null); + } + else + if (status_ == 409) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine needs to be built before it can translate segments.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally { - _baseUrl = value; - if (!string.IsNullOrEmpty(_baseUrl) && !_baseUrl.EndsWith("/")) - _baseUrl += '/'; + if (disposeClient_) + client_.Dispose(); } } - protected Newtonsoft.Json.JsonSerializerSettings JsonSerializerSettings { get { return _instanceSettings ?? _settings.Value; } } - - static partial void UpdateJsonSerializerSettings(Newtonsoft.Json.JsonSerializerSettings settings); - - partial void Initialize(); - - partial void PrepareRequest(System.Net.Http.HttpClient client, System.Net.Http.HttpRequestMessage request, string url); - partial void PrepareRequest(System.Net.Http.HttpClient client, System.Net.Http.HttpRequestMessage request, System.Text.StringBuilder urlBuilder); - partial void ProcessResponse(System.Net.Http.HttpClient client, System.Net.Http.HttpResponseMessage response); - /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get all translation engines + /// Translates a segment of text into the top N results /// - /// The engines + /// The translation engine id + /// The number of translations to generate + /// The source segment + /// The translation results /// A server side error occurred. - public virtual async System.Threading.Tasks.Task> GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task> TranslateNAsync(string id, int n, string segment, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { + if (id == null) + throw new System.ArgumentNullException("id"); + + if (n == null) + throw new System.ArgumentNullException("n"); + + if (segment == null) + throw new System.ArgumentNullException("segment"); + var client_ = _httpClient; var disposeClient_ = false; try { using (var request_ = new System.Net.Http.HttpRequestMessage()) { - request_.Method = new System.Net.Http.HttpMethod("GET"); + var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(segment, JsonSerializerSettings); + var content_ = new System.Net.Http.StringContent(json_); + content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); + request_.Content = content_; + request_.Method = new System.Net.Http.HttpMethod("POST"); request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "translation/engines" - urlBuilder_.Append("translation/engines"); + // Operation Path: "translation/engines/{id}/translate/{n}" + urlBuilder_.Append("translation/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/translate/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(n, System.Globalization.CultureInfo.InvariantCulture))); PrepareRequest(client_, request_, urlBuilder_); @@ -3835,7 +5211,7 @@ public string BaseUrl var status_ = (int)response_.StatusCode; if (status_ == 200) { - var objectResponse_ = await ReadObjectResponseAsync>(response_, headers_, cancellationToken).ConfigureAwait(false); + var objectResponse_ = await ReadObjectResponseAsync>(response_, headers_, cancellationToken).ConfigureAwait(false); if (objectResponse_.Object == null) { throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); @@ -3843,6 +5219,12 @@ public string BaseUrl return objectResponse_.Object; } else + if (status_ == 400) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("Bad request", status_, responseText_, headers_, null); + } + else if (status_ == 401) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); @@ -3852,7 +5234,25 @@ public string BaseUrl if (status_ == 403) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The authenticated client cannot perform the operation.", status_, responseText_, headers_, null); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the translation engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 405) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The method is not supported.", status_, responseText_, headers_, null); + } + else + if (status_ == 409) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine needs to be built before it can translate segments.", status_, responseText_, headers_, null); } else if (status_ == 503) @@ -3882,46 +5282,19 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Create a new translation engine + /// Get the word graph that represents all possible translations of a segment of text /// - /// - /// ## Parameters - ///
* **name**: (optional) A name to help identify and distinguish the file. - ///
* Recommendation: Create a multi-part name to distinguish between projects, uses, etc. - ///
* The name does not have to be unique, as the engine is uniquely identified by the auto-generated id - ///
* **sourceLanguage**: The source language code (a valid [IETF language tag](https://en.wikipedia.org/wiki/IETF_language_tag) is recommended) - ///
* **targetLanguage**: The target language code (a valid IETF language tag is recommended) - ///
* **type**: **smt-transfer** or **nmt** or **echo** - ///
* **isModelPersisted**: (optional) - see below - ///
### smt-transfer - ///
The Statistical Machine Translation Transfer Learning engine is primarily used for translation suggestions. Typical endpoints: translate, get-word-graph, train-segment - ///
* **IsModelPersisted**: (default to true) All models are persistent and can be updated with train-segment. False is not supported. - ///
### nmt - ///
The Neural Machine Translation engine is primarily used for pretranslations. It is fine-tuned from Meta's NLLB-200. Valid IETF language tags provided to Serval will be converted to [NLLB-200 codes](https://github.com/facebookresearch/flores/tree/main/flores200#languages-in-flores-200). See more about language tag resolution [here](https://github.com/sillsdev/serval/wiki/FLORES%E2%80%90200-Language-Code-Resolution-for-NMT-Engine). - ///
* **IsModelPersisted**: (default to false) Whether the model can be downloaded by the client after it has been successfully built. - ///
- ///
If you use a language among NLLB's supported languages, Serval will utilize everything the NLLB-200 model already knows about that language when translating. If the language you are working with is not among NLLB's supported languages, the language code will have no effect. - ///
- ///
Typical endpoints: pretranslate - ///
### echo - ///
The echo engine has full coverage of all nmt and smt-transfer endpoints. Endpoints like create and build return empty responses. Endpoints like translate and get-word-graph echo the sent content back to the user in a format that mocks nmt or Smt. For example, translating a segment "test" with the echo engine would yield a translation response with translation "test". This engine is useful for debugging and testing purposes. - ///
## Sample request: - ///
- ///
{ - ///
"name": "myTeam:myProject:myEngine", - ///
"sourceLanguage": "el", - ///
"targetLanguage": "en", - ///
"type": "nmt" - ///
"IsModelPersisted": true - ///
} - ///
- /// The translation engine configuration (see above) - /// The new translation engine + /// The translation engine id + /// The source segment + /// The word graph result /// A server side error occurred. - public virtual async System.Threading.Tasks.Task CreateAsync(TranslationEngineConfig engineConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task GetWordGraphAsync(string id, string segment, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { - if (engineConfig == null) - throw new System.ArgumentNullException("engineConfig"); + if (id == null) + throw new System.ArgumentNullException("id"); + + if (segment == null) + throw new System.ArgumentNullException("segment"); var client_ = _httpClient; var disposeClient_ = false; @@ -3929,7 +5302,7 @@ public string BaseUrl { using (var request_ = new System.Net.Http.HttpRequestMessage()) { - var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(engineConfig, JsonSerializerSettings); + var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(segment, JsonSerializerSettings); var content_ = new System.Net.Http.StringContent(json_); content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); request_.Content = content_; @@ -3938,8 +5311,10 @@ public string BaseUrl var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "translation/engines" - urlBuilder_.Append("translation/engines"); + // Operation Path: "translation/engines/{id}/get-word-graph" + urlBuilder_.Append("translation/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/get-word-graph"); PrepareRequest(client_, request_, urlBuilder_); @@ -3964,9 +5339,9 @@ public string BaseUrl ProcessResponse(client_, response_); var status_ = (int)response_.StatusCode; - if (status_ == 201) + if (status_ == 200) { - var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); if (objectResponse_.Object == null) { throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); @@ -3977,7 +5352,7 @@ public string BaseUrl if (status_ == 400) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("Bad request. Is the engine type correct?", status_, responseText_, headers_, null); + throw new ServalApiException("Bad request", status_, responseText_, headers_, null); } else if (status_ == 401) @@ -3992,6 +5367,24 @@ public string BaseUrl throw new ServalApiException("The authenticated client cannot perform the operation or does not own the translation engine.", status_, responseText_, headers_, null); } else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 405) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The method is not supported.", status_, responseText_, headers_, null); + } + else + if (status_ == 409) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine needs to be built first.", status_, responseText_, headers_, null); + } + else if (status_ == 503) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); @@ -4019,30 +5412,43 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get a translation engine by unique id + /// Incrementally train a translation engine with a segment pair /// + /// + /// A segment pair consists of a source and target segment as well as a boolean flag `sentenceStart` + ///
that should be set to true if this segment pair forms the beginning of a sentence. (This information + ///
will be used to reconstruct proper capitalization when training/inferencing). + ///
/// The translation engine id - /// The translation engine + /// The segment pair + /// The engine was trained successfully. /// A server side error occurred. - public virtual async System.Threading.Tasks.Task GetAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task TrainSegmentAsync(string id, SegmentPair segmentPair, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); + if (segmentPair == null) + throw new System.ArgumentNullException("segmentPair"); + var client_ = _httpClient; var disposeClient_ = false; try { using (var request_ = new System.Net.Http.HttpRequestMessage()) { - request_.Method = new System.Net.Http.HttpMethod("GET"); - request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(segmentPair, JsonSerializerSettings); + var content_ = new System.Net.Http.StringContent(json_); + content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); + request_.Content = content_; + request_.Method = new System.Net.Http.HttpMethod("POST"); var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "translation/engines/{id}" + // Operation Path: "translation/engines/{id}/train-segment" urlBuilder_.Append("translation/engines/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/train-segment"); PrepareRequest(client_, request_, urlBuilder_); @@ -4069,12 +5475,13 @@ public string BaseUrl var status_ = (int)response_.StatusCode; if (status_ == 200) { - var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); - if (objectResponse_.Object == null) - { - throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); - } - return objectResponse_.Object; + return; + } + else + if (status_ == 400) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("Bad request", status_, responseText_, headers_, null); } else if (status_ == 401) @@ -4095,6 +5502,18 @@ public string BaseUrl throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); } else + if (status_ == 405) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The method is not supported.", status_, responseText_, headers_, null); + } + else + if (status_ == 409) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine needs to be built first.", status_, responseText_, headers_, null); + } + else if (status_ == 503) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); @@ -4122,29 +5541,57 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Delete a translation engine + /// Add a corpus to a translation engine /// + /// + /// ## Parameters + ///
* **name**: A name to help identify and distinguish the corpus from other corpora + ///
* The name does not have to be unique since the corpus is uniquely identified by an auto-generated id + ///
* **sourceLanguage**: The source language code (See documentation on endpoint /translation/engines/ - "Create a new translation engine" for details on language codes). + ///
* Normally, this is the same as the engine sourceLanguage. This may change for future engines as a means of transfer learning. + ///
* **targetLanguage**: The target language code (See documentation on endpoint /translation/engines/ - "Create a new translation engine" for details on language codes). + ///
* **SourceFiles**: The source files associated with the corpus + ///
* **FileId**: The unique id referencing the uploaded file + ///
* **TextId**: The client-defined name to associate source and target files. + ///
* If the TextIds in the SourceFiles and TargetFiles match, they will be used to train the engine. + ///
* If selected for pretranslation when building, all SourceFiles that have no TargetFile, or lines of text in a SourceFile that have missing or blank lines in the TargetFile will be pretranslated. + ///
* If a TextId is used more than once in SourceFiles, the sources will be randomly and evenly mixed for training. + ///
* For pretranslating, multiple sources with the same TextId will be combined, but the first source will always take precedence (no random mixing). + ///
* For Paratext projects, TextId will be ignored - multiple Paratext source projects will always be mixed (as if they have the same TextId). + ///
* **TargetFiles**: The target files associated with the corpus + ///
* Same as SourceFiles, except only a single instance of a TextID or a single paratext project is supported. There is no mixing or combining of multiple targets. + ///
/// The translation engine id - /// The engine was successfully deleted. + /// The corpus configuration (see remarks) + /// The added corpus /// A server side error occurred. - public virtual async System.Threading.Tasks.Task DeleteAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task AddCorpusAsync(string id, TranslationCorpusConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); + if (corpusConfig == null) + throw new System.ArgumentNullException("corpusConfig"); + var client_ = _httpClient; var disposeClient_ = false; try { using (var request_ = new System.Net.Http.HttpRequestMessage()) { - request_.Method = new System.Net.Http.HttpMethod("DELETE"); + var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(corpusConfig, JsonSerializerSettings); + var content_ = new System.Net.Http.StringContent(json_); + content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); + request_.Content = content_; + request_.Method = new System.Net.Http.HttpMethod("POST"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "translation/engines/{id}" + // Operation Path: "translation/engines/{id}/corpora" urlBuilder_.Append("translation/engines/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/corpora"); PrepareRequest(client_, request_, urlBuilder_); @@ -4169,9 +5616,20 @@ public string BaseUrl ProcessResponse(client_, response_); var status_ = (int)response_.StatusCode; - if (status_ == 200) + if (status_ == 201) { - return; + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 400) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("Bad request", status_, responseText_, headers_, null); } else if (status_ == 401) @@ -4189,7 +5647,7 @@ public string BaseUrl if (status_ == 404) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The engine does not exist and therefore cannot be deleted.", status_, responseText_, headers_, null); + throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); } else if (status_ == 503) @@ -4219,39 +5677,31 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Translate a segment of text + /// Get all corpora for a translation engine /// /// The translation engine id - /// The source segment - /// The translation result + /// The corpora /// A server side error occurred. - public virtual async System.Threading.Tasks.Task TranslateAsync(string id, string segment, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task> GetAllCorporaAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); - if (segment == null) - throw new System.ArgumentNullException("segment"); - var client_ = _httpClient; var disposeClient_ = false; try { using (var request_ = new System.Net.Http.HttpRequestMessage()) { - var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(segment, JsonSerializerSettings); - var content_ = new System.Net.Http.StringContent(json_); - content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); - request_.Content = content_; - request_.Method = new System.Net.Http.HttpMethod("POST"); + request_.Method = new System.Net.Http.HttpMethod("GET"); request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "translation/engines/{id}/translate" + // Operation Path: "translation/engines/{id}/corpora" urlBuilder_.Append("translation/engines/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); - urlBuilder_.Append("/translate"); + urlBuilder_.Append("/corpora"); PrepareRequest(client_, request_, urlBuilder_); @@ -4278,7 +5728,7 @@ public string BaseUrl var status_ = (int)response_.StatusCode; if (status_ == 200) { - var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + var objectResponse_ = await ReadObjectResponseAsync>(response_, headers_, cancellationToken).ConfigureAwait(false); if (objectResponse_.Object == null) { throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); @@ -4286,46 +5736,28 @@ public string BaseUrl return objectResponse_.Object; } else - if (status_ == 400) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("Bad request", status_, responseText_, headers_, null); - } - else if (status_ == 401) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + throw new ServalApiException("The client is not authenticated", status_, responseText_, headers_, null); } else if (status_ == 403) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The authenticated client cannot perform the operation or does not own the translation engine.", status_, responseText_, headers_, null); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the translation engine", status_, responseText_, headers_, null); } else if (status_ == 404) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); - } - else - if (status_ == 405) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The method is not supported.", status_, responseText_, headers_, null); - } - else - if (status_ == 409) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The engine needs to be built before it can translate segments.", status_, responseText_, headers_, null); + throw new ServalApiException("The engine does not exist", status_, responseText_, headers_, null); } else if (status_ == 503) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details. ", status_, responseText_, headers_, null); } else { @@ -4349,23 +5781,27 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Translates a segment of text into the top N results + /// Update a corpus with a new set of files /// + /// + /// See posting a new corpus for details of use. Will completely replace corpus' file associations. + ///
Will not affect jobs already queued or running. Will not affect existing pretranslations until new build is complete. + ///
/// The translation engine id - /// The number of translations to generate - /// The source segment - /// The translation results + /// The corpus id + /// The corpus configuration + /// The corpus was updated successfully /// A server side error occurred. - public virtual async System.Threading.Tasks.Task> TranslateNAsync(string id, int n, string segment, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task UpdateCorpusAsync(string id, string corpusId, TranslationCorpusUpdateConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); - if (n == null) - throw new System.ArgumentNullException("n"); + if (corpusId == null) + throw new System.ArgumentNullException("corpusId"); - if (segment == null) - throw new System.ArgumentNullException("segment"); + if (corpusConfig == null) + throw new System.ArgumentNullException("corpusConfig"); var client_ = _httpClient; var disposeClient_ = false; @@ -4373,20 +5809,20 @@ public string BaseUrl { using (var request_ = new System.Net.Http.HttpRequestMessage()) { - var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(segment, JsonSerializerSettings); + var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(corpusConfig, JsonSerializerSettings); var content_ = new System.Net.Http.StringContent(json_); content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); request_.Content = content_; - request_.Method = new System.Net.Http.HttpMethod("POST"); + request_.Method = new System.Net.Http.HttpMethod("PATCH"); request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "translation/engines/{id}/translate/{n}" + // Operation Path: "translation/engines/{id}/corpora/{corpusId}" urlBuilder_.Append("translation/engines/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); - urlBuilder_.Append("/translate/"); - urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(n, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/corpora/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(corpusId, System.Globalization.CultureInfo.InvariantCulture))); PrepareRequest(client_, request_, urlBuilder_); @@ -4413,7 +5849,7 @@ public string BaseUrl var status_ = (int)response_.StatusCode; if (status_ == 200) { - var objectResponse_ = await ReadObjectResponseAsync>(response_, headers_, cancellationToken).ConfigureAwait(false); + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); if (objectResponse_.Object == null) { throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); @@ -4442,19 +5878,7 @@ public string BaseUrl if (status_ == 404) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); - } - else - if (status_ == 405) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The method is not supported.", status_, responseText_, headers_, null); - } - else - if (status_ == 409) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The engine needs to be built before it can translate segments.", status_, responseText_, headers_, null); + throw new ServalApiException("The engine or corpus does not exist.", status_, responseText_, headers_, null); } else if (status_ == 503) @@ -4484,19 +5908,19 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get the word graph that represents all possible translations of a segment of text + /// Get the configuration of a corpus for a translation engine /// /// The translation engine id - /// The source segment - /// The word graph result + /// The corpus id + /// The corpus configuration /// A server side error occurred. - public virtual async System.Threading.Tasks.Task GetWordGraphAsync(string id, string segment, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task GetCorpusAsync(string id, string corpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); - if (segment == null) - throw new System.ArgumentNullException("segment"); + if (corpusId == null) + throw new System.ArgumentNullException("corpusId"); var client_ = _httpClient; var disposeClient_ = false; @@ -4504,19 +5928,16 @@ public string BaseUrl { using (var request_ = new System.Net.Http.HttpRequestMessage()) { - var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(segment, JsonSerializerSettings); - var content_ = new System.Net.Http.StringContent(json_); - content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); - request_.Content = content_; - request_.Method = new System.Net.Http.HttpMethod("POST"); + request_.Method = new System.Net.Http.HttpMethod("GET"); request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "translation/engines/{id}/get-word-graph" + // Operation Path: "translation/engines/{id}/corpora/{corpusId}" urlBuilder_.Append("translation/engines/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); - urlBuilder_.Append("/get-word-graph"); + urlBuilder_.Append("/corpora/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(corpusId, System.Globalization.CultureInfo.InvariantCulture))); PrepareRequest(client_, request_, urlBuilder_); @@ -4543,18 +5964,12 @@ public string BaseUrl var status_ = (int)response_.StatusCode; if (status_ == 200) { - var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); if (objectResponse_.Object == null) { throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); - } - return objectResponse_.Object; - } - else - if (status_ == 400) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("Bad request", status_, responseText_, headers_, null); + } + return objectResponse_.Object; } else if (status_ == 401) @@ -4572,19 +5987,7 @@ public string BaseUrl if (status_ == 404) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); - } - else - if (status_ == 405) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The method is not supported.", status_, responseText_, headers_, null); - } - else - if (status_ == 409) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The engine needs to be built first.", status_, responseText_, headers_, null); + throw new ServalApiException("The engine or corpus does not exist.", status_, responseText_, headers_, null); } else if (status_ == 503) @@ -4614,24 +6017,23 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Incrementally train a translation engine with a segment pair + /// Remove a corpus from a translation engine /// /// - /// A segment pair consists of a source and target segment as well as a boolean flag `sentenceStart` - ///
that should be set to true if this segment pair forms the beginning of a sentence. (This information - ///
will be used to reconstruct proper capitalization when training/inferencing). + /// Removing a corpus will remove all pretranslations associated with that corpus. ///
/// The translation engine id - /// The segment pair - /// The engine was trained successfully. + /// The corpus id + /// If true, all files associated with the corpus will be deleted as well (even if they are associated with other corpora). If false, no files will be deleted. + /// The corpus was deleted successfully. /// A server side error occurred. - public virtual async System.Threading.Tasks.Task TrainSegmentAsync(string id, SegmentPair segmentPair, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task DeleteCorpusAsync(string id, string corpusId, bool? deleteFiles = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); - if (segmentPair == null) - throw new System.ArgumentNullException("segmentPair"); + if (corpusId == null) + throw new System.ArgumentNullException("corpusId"); var client_ = _httpClient; var disposeClient_ = false; @@ -4639,18 +6041,21 @@ public string BaseUrl { using (var request_ = new System.Net.Http.HttpRequestMessage()) { - var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(segmentPair, JsonSerializerSettings); - var content_ = new System.Net.Http.StringContent(json_); - content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); - request_.Content = content_; - request_.Method = new System.Net.Http.HttpMethod("POST"); + request_.Method = new System.Net.Http.HttpMethod("DELETE"); var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "translation/engines/{id}/train-segment" + // Operation Path: "translation/engines/{id}/corpora/{corpusId}" urlBuilder_.Append("translation/engines/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); - urlBuilder_.Append("/train-segment"); + urlBuilder_.Append("/corpora/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(corpusId, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append('?'); + if (deleteFiles != null) + { + urlBuilder_.Append(System.Uri.EscapeDataString("delete-files")).Append('=').Append(System.Uri.EscapeDataString(ConvertToString(deleteFiles, System.Globalization.CultureInfo.InvariantCulture))).Append('&'); + } + urlBuilder_.Length--; PrepareRequest(client_, request_, urlBuilder_); @@ -4680,12 +6085,6 @@ public string BaseUrl return; } else - if (status_ == 400) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("Bad request", status_, responseText_, headers_, null); - } - else if (status_ == 401) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); @@ -4701,19 +6100,7 @@ public string BaseUrl if (status_ == 404) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); - } - else - if (status_ == 405) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The method is not supported.", status_, responseText_, headers_, null); - } - else - if (status_ == 409) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The engine needs to be built first.", status_, responseText_, headers_, null); + throw new ServalApiException("The engine or corpus does not exist.", status_, responseText_, headers_, null); } else if (status_ == 503) @@ -4767,7 +6154,7 @@ public string BaseUrl /// The corpus configuration (see remarks) /// The added corpus /// A server side error occurred. - public virtual async System.Threading.Tasks.Task AddCorpusAsync(string id, TranslationCorpusConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task AddParallelCorpusAsync(string id, TranslationParallelCorpusConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); @@ -4790,10 +6177,10 @@ public string BaseUrl var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "translation/engines/{id}/corpora" + // Operation Path: "translation/engines/{id}/parallel-corpora" urlBuilder_.Append("translation/engines/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); - urlBuilder_.Append("/corpora"); + urlBuilder_.Append("/parallel-corpora"); PrepareRequest(client_, request_, urlBuilder_); @@ -4820,7 +6207,7 @@ public string BaseUrl var status_ = (int)response_.StatusCode; if (status_ == 201) { - var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); if (objectResponse_.Object == null) { throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); @@ -4879,12 +6266,12 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get all corpora for a translation engine + /// Get all parallel corpora for a translation engine /// /// The translation engine id - /// The files + /// The parallel corpora /// A server side error occurred. - public virtual async System.Threading.Tasks.Task> GetAllCorporaAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task> GetAllParallelCorporaAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); @@ -4900,10 +6287,10 @@ public string BaseUrl var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "translation/engines/{id}/corpora" + // Operation Path: "translation/engines/{id}/parallel-corpora" urlBuilder_.Append("translation/engines/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); - urlBuilder_.Append("/corpora"); + urlBuilder_.Append("/parallel-corpora"); PrepareRequest(client_, request_, urlBuilder_); @@ -4930,7 +6317,7 @@ public string BaseUrl var status_ = (int)response_.StatusCode; if (status_ == 200) { - var objectResponse_ = await ReadObjectResponseAsync>(response_, headers_, cancellationToken).ConfigureAwait(false); + var objectResponse_ = await ReadObjectResponseAsync>(response_, headers_, cancellationToken).ConfigureAwait(false); if (objectResponse_.Object == null) { throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); @@ -4986,21 +6373,20 @@ public string BaseUrl /// Update a corpus with a new set of files /// /// - /// See posting a new corpus for details of use. Will completely replace corpus' file associations. - ///
Will not affect jobs already queued or running. Will not affect existing pretranslations until new build is complete. + /// Will completely replace corpus' file associations. Will not affect jobs already queued or running. Will not affect existing pretranslations until new build is complete. ///
/// The translation engine id - /// The corpus id + /// The corpus id /// The corpus configuration /// The corpus was updated successfully /// A server side error occurred. - public virtual async System.Threading.Tasks.Task UpdateCorpusAsync(string id, string corpusId, TranslationCorpusUpdateConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task UpdateParallelCorpusAsync(string id, string parallelCorpusId, TranslationParallelCorpusUpdateConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); - if (corpusId == null) - throw new System.ArgumentNullException("corpusId"); + if (parallelCorpusId == null) + throw new System.ArgumentNullException("parallelCorpusId"); if (corpusConfig == null) throw new System.ArgumentNullException("corpusConfig"); @@ -5020,11 +6406,11 @@ public string BaseUrl var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "translation/engines/{id}/corpora/{corpusId}" + // Operation Path: "translation/engines/{id}/parallel-corpora/{parallelCorpusId}" urlBuilder_.Append("translation/engines/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); - urlBuilder_.Append("/corpora/"); - urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(corpusId, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/parallel-corpora/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(parallelCorpusId, System.Globalization.CultureInfo.InvariantCulture))); PrepareRequest(client_, request_, urlBuilder_); @@ -5051,7 +6437,7 @@ public string BaseUrl var status_ = (int)response_.StatusCode; if (status_ == 200) { - var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); if (objectResponse_.Object == null) { throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); @@ -5110,19 +6496,19 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get the configuration of a corpus for a translation engine + /// Get the configuration of a parallel corpus for a translation engine /// /// The translation engine id - /// The corpus id - /// The corpus configuration + /// The parallel corpus id + /// The parallel corpus configuration /// A server side error occurred. - public virtual async System.Threading.Tasks.Task GetCorpusAsync(string id, string corpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task GetParallelCorpusAsync(string id, string parallelCorpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); - if (corpusId == null) - throw new System.ArgumentNullException("corpusId"); + if (parallelCorpusId == null) + throw new System.ArgumentNullException("parallelCorpusId"); var client_ = _httpClient; var disposeClient_ = false; @@ -5135,11 +6521,11 @@ public string BaseUrl var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "translation/engines/{id}/corpora/{corpusId}" + // Operation Path: "translation/engines/{id}/parallel-corpora/{parallelCorpusId}" urlBuilder_.Append("translation/engines/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); - urlBuilder_.Append("/corpora/"); - urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(corpusId, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/parallel-corpora/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(parallelCorpusId, System.Globalization.CultureInfo.InvariantCulture))); PrepareRequest(client_, request_, urlBuilder_); @@ -5166,7 +6552,7 @@ public string BaseUrl var status_ = (int)response_.StatusCode; if (status_ == 200) { - var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); if (objectResponse_.Object == null) { throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); @@ -5189,7 +6575,7 @@ public string BaseUrl if (status_ == 404) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The engine or corpus does not exist.", status_, responseText_, headers_, null); + throw new ServalApiException("The engine or parallel corpus does not exist.", status_, responseText_, headers_, null); } else if (status_ == 503) @@ -5219,23 +6605,22 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Remove a corpus from a translation engine + /// Remove a parallel corpus from a translation engine /// /// - /// Removing a corpus will remove all pretranslations associated with that corpus. + /// Removing a parallel corpus will remove all pretranslations associated with that corpus. /// /// The translation engine id - /// The corpus id - /// If true, all files associated with the corpus will be deleted as well (even if they are associated with other corpora). If false, no files will be deleted. - /// The corpus was deleted successfully. + /// The parallel corpus id + /// The parallel corpus was deleted successfully. /// A server side error occurred. - public virtual async System.Threading.Tasks.Task DeleteCorpusAsync(string id, string corpusId, bool? deleteFiles = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task DeleteParallelCorpusAsync(string id, string parallelCorpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); - if (corpusId == null) - throw new System.ArgumentNullException("corpusId"); + if (parallelCorpusId == null) + throw new System.ArgumentNullException("parallelCorpusId"); var client_ = _httpClient; var disposeClient_ = false; @@ -5247,17 +6632,11 @@ public string BaseUrl var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "translation/engines/{id}/corpora/{corpusId}" + // Operation Path: "translation/engines/{id}/parallel-corpora/{parallelCorpusId}" urlBuilder_.Append("translation/engines/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); - urlBuilder_.Append("/corpora/"); - urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(corpusId, System.Globalization.CultureInfo.InvariantCulture))); - urlBuilder_.Append('?'); - if (deleteFiles != null) - { - urlBuilder_.Append(System.Uri.EscapeDataString("delete-files")).Append('=').Append(System.Uri.EscapeDataString(ConvertToString(deleteFiles, System.Globalization.CultureInfo.InvariantCulture))).Append('&'); - } - urlBuilder_.Length--; + urlBuilder_.Append("/parallel-corpora/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(parallelCorpusId, System.Globalization.CultureInfo.InvariantCulture))); PrepareRequest(client_, request_, urlBuilder_); @@ -5302,7 +6681,7 @@ public string BaseUrl if (status_ == 404) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The engine or corpus does not exist.", status_, responseText_, headers_, null); + throw new ServalApiException("The engine or parallel corpus does not exist.", status_, responseText_, headers_, null); } else if (status_ == 503) @@ -7872,6 +9251,45 @@ public partial class AssessmentResult } + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class Corpus + { + [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Id { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("revision", Required = Newtonsoft.Json.Required.Always)] + public int Revision { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("language", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Language { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? Name { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("url", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Url { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("files", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList Files { get; set; } = new System.Collections.ObjectModel.Collection(); + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class CorpusFile + { + [Newtonsoft.Json.JsonProperty("file", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public DataFile File { get; set; } = new DataFile(); + + [Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? TextId { get; set; } = default!; + + } + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] public partial class DataFile { @@ -7908,6 +9326,34 @@ public enum FileFormat } + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class CorpusConfig + { + [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? Name { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("language", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Language { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("files", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList Files { get; set; } = new System.Collections.ObjectModel.Collection(); + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class CorpusFileConfig + { + [Newtonsoft.Json.JsonProperty("fileId", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string FileId { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? TextId { get; set; } = default!; + + } + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] public partial class TranslationEngine { @@ -8232,6 +9678,61 @@ public partial class TranslationCorpusUpdateConfig } + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class TranslationParallelCorpus + { + [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Id { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("url", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Url { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("engine", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public ResourceLink Engine { get; set; } = new ResourceLink(); + + [Newtonsoft.Json.JsonProperty("sourceCorpora", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList SourceCorpora { get; set; } = new System.Collections.ObjectModel.Collection(); + + [Newtonsoft.Json.JsonProperty("targetCorpora", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList TargetCorpora { get; set; } = new System.Collections.ObjectModel.Collection(); + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class TranslationParallelCorpusConfig + { + /// + /// The corpus name. + /// + [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? Name { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("sourceCorpusIds", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList SourceCorpusIds { get; set; } = new System.Collections.ObjectModel.Collection(); + + [Newtonsoft.Json.JsonProperty("targetCorpusIds", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList TargetCorpusIds { get; set; } = new System.Collections.ObjectModel.Collection(); + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class TranslationParallelCorpusUpdateConfig + { + [Newtonsoft.Json.JsonProperty("sourceCorpusIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? SourceCorpusIds { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("targetCorpusIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TargetCorpusIds { get; set; } = default!; + + } + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] public partial class Pretranslation { @@ -8339,6 +9840,29 @@ public partial class TranslationBuild [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] public partial class TrainingCorpus + { + [Newtonsoft.Json.JsonProperty("corpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public ResourceLink? Corpus { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TextIds { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? ScriptureRange { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("parallelCorpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public ResourceLink? ParallelCorpus { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("sourceFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? SourceFilters { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("targetFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TargetFilters { get; set; } = default!; + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class ParallelCorpusFilter { [Newtonsoft.Json.JsonProperty("corpus", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required] @@ -8355,9 +9879,8 @@ public partial class TrainingCorpus [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] public partial class PretranslateCorpus { - [Newtonsoft.Json.JsonProperty("corpus", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public ResourceLink Corpus { get; set; } = new ResourceLink(); + [Newtonsoft.Json.JsonProperty("corpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public ResourceLink? Corpus { get; set; } = default!; [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public System.Collections.Generic.IList? TextIds { get; set; } = default!; @@ -8365,6 +9888,12 @@ public partial class PretranslateCorpus [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public string? ScriptureRange { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("parallelCorpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public ResourceLink? ParallelCorpus { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("sourceFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? SourceFilters { get; set; } = default!; + } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] @@ -8386,6 +9915,29 @@ public partial class TranslationBuildConfig [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] public partial class TrainingCorpusConfig + { + [Newtonsoft.Json.JsonProperty("corpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? CorpusId { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TextIds { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? ScriptureRange { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("parallelCorpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? ParallelCorpusId { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("sourceFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? SourceFilters { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("targetFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TargetFilters { get; set; } = default!; + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class ParallelCorpusFilterConfig { [Newtonsoft.Json.JsonProperty("corpusId", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] @@ -8402,9 +9954,8 @@ public partial class TrainingCorpusConfig [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] public partial class PretranslateCorpusConfig { - [Newtonsoft.Json.JsonProperty("corpusId", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string CorpusId { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("corpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? CorpusId { get; set; } = default!; [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public System.Collections.Generic.IList? TextIds { get; set; } = default!; @@ -8412,6 +9963,12 @@ public partial class PretranslateCorpusConfig [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public string? ScriptureRange { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("parallelCorpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? ParallelCorpusId { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("sourceFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? SourceFilters { get; set; } = default!; + } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] diff --git a/src/Serval/src/Serval.DataFiles/Configuration/IMediatorRegistrationConfiguratorExtensions.cs b/src/Serval/src/Serval.DataFiles/Configuration/IMediatorRegistrationConfiguratorExtensions.cs index cbafe62a..34a762b9 100644 --- a/src/Serval/src/Serval.DataFiles/Configuration/IMediatorRegistrationConfiguratorExtensions.cs +++ b/src/Serval/src/Serval.DataFiles/Configuration/IMediatorRegistrationConfiguratorExtensions.cs @@ -6,6 +6,7 @@ public static IMediatorRegistrationConfigurator AddDataFilesConsumers( this IMediatorRegistrationConfigurator configurator ) { + configurator.AddConsumer(); configurator.AddConsumer(); configurator.AddConsumer(); return configurator; diff --git a/src/Serval/src/Serval.DataFiles/Configuration/IMongoDataAccessConfiguratorExtensions.cs b/src/Serval/src/Serval.DataFiles/Configuration/IMongoDataAccessConfiguratorExtensions.cs index 86d10a52..d9370a0f 100644 --- a/src/Serval/src/Serval.DataFiles/Configuration/IMongoDataAccessConfiguratorExtensions.cs +++ b/src/Serval/src/Serval.DataFiles/Configuration/IMongoDataAccessConfiguratorExtensions.cs @@ -21,6 +21,13 @@ public static IMongoDataAccessConfigurator AddDataFilesRepositories(this IMongoD new CreateIndexModel(Builders.IndexKeys.Ascending(p => p.DeletedAt)) ) ); + configurator.AddRepository( + "corpora.corpus", + init: c => + c.Indexes.CreateOrUpdateAsync( + new CreateIndexModel(Builders.IndexKeys.Ascending(p => p.Owner)) + ) + ); return configurator; } } diff --git a/src/Serval/src/Serval.DataFiles/Configuration/IServalBuilderExtensions.cs b/src/Serval/src/Serval.DataFiles/Configuration/IServalBuilderExtensions.cs index 64a25658..91756a6c 100644 --- a/src/Serval/src/Serval.DataFiles/Configuration/IServalBuilderExtensions.cs +++ b/src/Serval/src/Serval.DataFiles/Configuration/IServalBuilderExtensions.cs @@ -11,6 +11,9 @@ public static IServalBuilder AddDataFiles(this IServalBuilder builder) builder.Services.AddScoped(); builder.Services.AddHostedService(); + + builder.Services.AddScoped(); + return builder; } } diff --git a/src/Serval/src/Serval.DataFiles/Consumers/GetCorpusConsumer.cs b/src/Serval/src/Serval.DataFiles/Consumers/GetCorpusConsumer.cs new file mode 100644 index 00000000..c369d528 --- /dev/null +++ b/src/Serval/src/Serval.DataFiles/Consumers/GetCorpusConsumer.cs @@ -0,0 +1,45 @@ +namespace Serval.DataFiles.Consumers; + +public class GetCorpusConsumer(ICorpusService corpusService) : IConsumer +{ + private readonly ICorpusService _corpusService = corpusService; + + public async Task Consume(ConsumeContext context) + { + try + { + Corpus corpus = await _corpusService.GetAsync( + context.Message.CorpusId, + context.Message.Owner, + context.CancellationToken + ); + await context.RespondAsync( + new CorpusResult + { + CorpusId = corpus.Id, + Name = corpus.Name, + Language = corpus.Language, + Files = corpus + .Files.Select(f => new CorpusFileResult + { + TextId = f.TextId!, + File = new DataFileResult + { + DataFileId = f.File.Id, + Filename = f.File.Filename, + Format = f.File.Format, + Name = f.File.Name + } + }) + .ToList() + } + ); + } + catch (EntityNotFoundException) + { + await context.RespondAsync( + new CorpusNotFound { CorpusId = context.Message.CorpusId, Owner = context.Message.Owner } + ); + } + } +} diff --git a/src/Serval/src/Serval.DataFiles/Contracts/CorpusConfigDto.cs b/src/Serval/src/Serval.DataFiles/Contracts/CorpusConfigDto.cs new file mode 100644 index 00000000..cb744e41 --- /dev/null +++ b/src/Serval/src/Serval.DataFiles/Contracts/CorpusConfigDto.cs @@ -0,0 +1,10 @@ +namespace Serval.DataFiles.Contracts; + +public record CorpusConfigDto +{ + public string? Name { get; init; } + + public required string Language { get; init; } + + public required IReadOnlyList Files { get; init; } +} diff --git a/src/Serval/src/Serval.DataFiles/Contracts/CorpusDto.cs b/src/Serval/src/Serval.DataFiles/Contracts/CorpusDto.cs new file mode 100644 index 00000000..b7446fa3 --- /dev/null +++ b/src/Serval/src/Serval.DataFiles/Contracts/CorpusDto.cs @@ -0,0 +1,11 @@ +namespace Serval.DataFiles.Contracts; + +public record CorpusDto +{ + public required string Id { get; init; } + public required int Revision { get; init; } + public required string Language { get; init; } + public string? Name { get; init; } + public required string Url { get; init; } + public required IReadOnlyList Files { get; set; } +} diff --git a/src/Serval/src/Serval.DataFiles/Contracts/CorpusFileConfigDto.cs b/src/Serval/src/Serval.DataFiles/Contracts/CorpusFileConfigDto.cs new file mode 100644 index 00000000..746a9686 --- /dev/null +++ b/src/Serval/src/Serval.DataFiles/Contracts/CorpusFileConfigDto.cs @@ -0,0 +1,7 @@ +namespace Serval.DataFiles.Contracts; + +public record CorpusFileConfigDto +{ + public required string FileId { get; init; } + public string? TextId { get; init; } +} diff --git a/src/Serval/src/Serval.DataFiles/Contracts/CorpusFileDto.cs b/src/Serval/src/Serval.DataFiles/Contracts/CorpusFileDto.cs new file mode 100644 index 00000000..d2d175be --- /dev/null +++ b/src/Serval/src/Serval.DataFiles/Contracts/CorpusFileDto.cs @@ -0,0 +1,7 @@ +namespace Serval.DataFiles.Contracts; + +public record CorpusFileDto +{ + public required DataFileDto File { get; init; } + public string? TextId { get; init; } +} diff --git a/src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs b/src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs new file mode 100644 index 00000000..29bf041e --- /dev/null +++ b/src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs @@ -0,0 +1,214 @@ +namespace Serval.DataFiles.Controllers; + +[ApiVersion("1.0")] +[Route("api/v{version:apiVersion}/corpora")] +[OpenApiTag("Corpora")] +public class CorporaController( + IAuthorizationService authService, + ICorpusService corpusService, + IUrlService urlService, + IDataFileService dataFileService +) : ServalControllerBase(authService) +{ + private readonly ICorpusService _corpusService = corpusService; + private readonly IUrlService _urlService = urlService; + + private readonly IDataFileService _dataFileService = dataFileService; + + /// + /// Get all corpora + /// + /// A list of all corpora owned by the client + /// The client is not authenticated + /// The authenticated client cannot perform the operation + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.ReadFiles)] + [HttpGet] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> GetAllAsync(CancellationToken cancellationToken) + { + return (await _corpusService.GetAllAsync(Owner, cancellationToken)).Select(Map); + } + + /// + /// Get a corpus by unique id + /// + /// The unique identifier for the corpus + /// + /// The corpus exists + /// The client is not authenticated + /// The authenticated client cannot perform the operation or does not own the corpus + /// The corpus does not exist + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.ReadFiles)] + [HttpGet("{id}", Name = Endpoints.GetCorpus)] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> GetAsync([NotNull] string id, CancellationToken cancellationToken) + { + Corpus corpus = await _corpusService.GetAsync(id, cancellationToken); + await AuthorizeAsync(corpus); + return Ok(Map(corpus)); + } + + /// + /// Create a new corpus + /// + /// + /// + /// The corpus was created successfully + /// Bad request + /// The client is not authenticated + /// The authenticated client cannot perform the operation + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.CreateFiles)] + [HttpPost] + [ProducesResponseType(StatusCodes.Status201Created)] + [ProducesResponseType(typeof(void), StatusCodes.Status400BadRequest)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> CreateAsync( + [FromBody] CorpusConfigDto corpusConfig, + [FromServices] IRequestClient getDataFileClient, + [FromServices] IIdGenerator idGenerator, + CancellationToken cancellationToken + ) + { + Corpus corpus = await MapAsync(corpusConfig, idGenerator.GenerateId(), cancellationToken); + await _corpusService.CreateAsync(corpus, cancellationToken); + CorpusDto dto = Map(corpus); + return Created(dto.Url, dto); + } + + /// + /// Update an existing corpus + /// + /// The unique identifier for the corpus + /// Tuples of the ids of the new corpus files and the associated text ids + /// + /// The corpus was updated successfully + /// Bad request + /// The client is not authenticated + /// The authenticated client cannot perform the operation or does not own the corpus + /// The corpus does not exist and therefore cannot be updated + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.UpdateFiles)] + [HttpPatch("{id}")] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status400BadRequest)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> UpdateAsync( + [NotNull] string id, + [NotNull] IReadOnlyList files, + CancellationToken cancellationToken + ) + { + await AuthorizeAsync(id, cancellationToken); + + Corpus corpus = await _corpusService.UpdateAsync( + id, + await MapAsync(files, cancellationToken), + cancellationToken + ); + + CorpusDto dto = Map(corpus); + return Ok(dto); + } + + /// + /// Delete an existing corpus + /// + /// The unique identifier for the corpus + /// + /// The corpus was deleted successfully + /// The client is not authenticated + /// The authenticated client cannot perform the operation or does not own the corpus + /// The corpus does not exist and therefore cannot be deleted + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.DeleteFiles)] + [HttpDelete("{id}")] + [ProducesResponseType(typeof(void), StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task DeleteAsync([NotNull] string id, CancellationToken cancellationToken) + { + await AuthorizeAsync(id, cancellationToken); + await _corpusService.DeleteAsync(id, cancellationToken); + return Ok(); + } + + private async Task AuthorizeAsync(string id, CancellationToken cancellationToken) + { + Corpus corpus = await _corpusService.GetAsync(id, cancellationToken); + await AuthorizeAsync(corpus); + } + + private async Task MapAsync(CorpusConfigDto corpusConfig, string id, CancellationToken cancellationToken) + { + return new Corpus + { + Id = id, + Owner = Owner, + Language = corpusConfig.Language, + Files = await MapAsync(corpusConfig.Files, cancellationToken) + }; + } + + private async Task> MapAsync( + IReadOnlyList files, + CancellationToken cancellationToken + ) + { + var dataFiles = new List(); + foreach (CorpusFileConfigDto file in files) + { + DataFile? dataFile = await _dataFileService.GetAsync(file.FileId, cancellationToken); + if (dataFile == null) + throw new InvalidOperationException($"DataFile with id {file.FileId} does not exist."); + dataFiles.Add(new CorpusFile { File = dataFile, TextId = file.TextId }); + } + return dataFiles; + } + + private CorpusDto Map(Corpus source) + { + return new CorpusDto + { + Id = source.Id, + Language = source.Language, + Url = _urlService.GetUrl(Endpoints.GetCorpus, new { id = source.Id }), + Name = source.Name, + Revision = source.Revision, + Files = source.Files.Select(Map).ToList() + }; + } + + private CorpusFileDto Map(CorpusFile source) + { + return new CorpusFileDto { File = Map(source.File), TextId = source.TextId }; + } + + private DataFileDto Map(DataFile source) + { + return new DataFileDto + { + Id = source.Id, + Url = _urlService.GetUrl(Endpoints.GetDataFile, new { id = source.Id }), + Name = source.Name, + Format = source.Format, + Revision = source.Revision + }; + } +} diff --git a/src/Serval/src/Serval.DataFiles/Controllers/DataFilesController.cs b/src/Serval/src/Serval.DataFiles/Controllers/DataFilesController.cs index 32218a68..7d368ac4 100644 --- a/src/Serval/src/Serval.DataFiles/Controllers/DataFilesController.cs +++ b/src/Serval/src/Serval.DataFiles/Controllers/DataFilesController.cs @@ -194,7 +194,7 @@ CancellationToken cancellationToken /// Delete an existing file /// /// - /// If a file is in a corpora and the file is deleted, it will be automatically removed from the corpora. + /// If a file is in a corpus and the file is deleted, it will be automatically removed from that corpus. /// If a build job has started before the file was deleted, the file will be used for the build job, even /// though it will no longer be accessible through the API. /// diff --git a/src/Serval/src/Serval.DataFiles/Models/Corpus.cs b/src/Serval/src/Serval.DataFiles/Models/Corpus.cs new file mode 100644 index 00000000..8a4fca70 --- /dev/null +++ b/src/Serval/src/Serval.DataFiles/Models/Corpus.cs @@ -0,0 +1,11 @@ +namespace Serval.DataFiles.Models; + +public record Corpus : IOwnedEntity +{ + public string Id { get; set; } = ""; + public int Revision { get; set; } = 1; + public required string Language { get; init; } + public string? Name { get; set; } + public required string Owner { get; init; } + public required IReadOnlyList Files { get; set; } +} diff --git a/src/Serval/src/Serval.DataFiles/Models/CorpusFile.cs b/src/Serval/src/Serval.DataFiles/Models/CorpusFile.cs new file mode 100644 index 00000000..a4311e39 --- /dev/null +++ b/src/Serval/src/Serval.DataFiles/Models/CorpusFile.cs @@ -0,0 +1,7 @@ +namespace Serval.DataFiles.Models; + +public record CorpusFile +{ + public required DataFile File { get; init; } + public string? TextId { get; init; } +} diff --git a/src/Serval/src/Serval.DataFiles/Models/DeletedFile.cs b/src/Serval/src/Serval.DataFiles/Models/DeletedFile.cs index 5c76fc4c..0e60003f 100644 --- a/src/Serval/src/Serval.DataFiles/Models/DeletedFile.cs +++ b/src/Serval/src/Serval.DataFiles/Models/DeletedFile.cs @@ -1,6 +1,6 @@ namespace Serval.DataFiles.Models; -public class DeletedFile : IEntity +public record DeletedFile : IEntity { public string Id { get; set; } = ""; public int Revision { get; set; } = 1; diff --git a/src/Serval/src/Serval.DataFiles/Services/CorpusService.cs b/src/Serval/src/Serval.DataFiles/Services/CorpusService.cs new file mode 100644 index 00000000..f5b8e4b6 --- /dev/null +++ b/src/Serval/src/Serval.DataFiles/Services/CorpusService.cs @@ -0,0 +1,28 @@ +namespace Serval.DataFiles.Services; + +public class CorpusService(IRepository corpora) : OwnedEntityServiceBase(corpora), ICorpusService +{ + public async Task GetAsync(string id, string owner, CancellationToken cancellationToken = default) + { + Corpus? corpus = await Entities.GetAsync(c => c.Id == id && c.Owner == owner, cancellationToken); + if (corpus == null) + throw new EntityNotFoundException($"Could not find the Corpus '{id}' with owner '{owner}'."); + return corpus; + } + + public async Task UpdateAsync( + string id, + IReadOnlyList files, + CancellationToken cancellationToken = default + ) + { + Corpus? corpus = await Entities.UpdateAsync( + c => c.Id == id, + u => u.Set(c => c.Files, files), + cancellationToken: cancellationToken + ); + if (corpus is null) + throw new EntityNotFoundException($"Could not find Corpus '{id}."); + return corpus; + } +} diff --git a/src/Serval/src/Serval.DataFiles/Services/ICorpusService.cs b/src/Serval/src/Serval.DataFiles/Services/ICorpusService.cs new file mode 100644 index 00000000..a4f0e242 --- /dev/null +++ b/src/Serval/src/Serval.DataFiles/Services/ICorpusService.cs @@ -0,0 +1,11 @@ +namespace Serval.DataFiles.Services; + +public interface ICorpusService +{ + Task> GetAllAsync(string owner, CancellationToken cancellationToken); + Task GetAsync(string id, CancellationToken cancellationToken = default); + Task GetAsync(string id, string owner, CancellationToken cancellationToken = default); + Task CreateAsync(Corpus corpus, CancellationToken cancellationToken = default); + Task UpdateAsync(string id, IReadOnlyList files, CancellationToken cancellationToken = default); + Task DeleteAsync(string id, CancellationToken cancellationToken = default); +} diff --git a/src/Serval/src/Serval.Grpc/Protos/serval/translation/v1/engine.proto b/src/Serval/src/Serval.Grpc/Protos/serval/translation/v1/engine.proto index 6ae643c5..98918f0c 100644 --- a/src/Serval/src/Serval.Grpc/Protos/serval/translation/v1/engine.proto +++ b/src/Serval/src/Serval.Grpc/Protos/serval/translation/v1/engine.proto @@ -69,7 +69,7 @@ message StartBuildRequest { string engine_id = 2; string build_id = 3; optional string options = 4; - repeated Corpus corpora = 5; + repeated ParallelCorpus corpora = 5; } message CancelBuildRequest { @@ -152,18 +152,20 @@ message WordGraph { repeated WordGraphArc arcs = 4; } -message Corpus { +message ParallelCorpus { string id = 1; - string source_language = 2; - string target_language = 3; - bool train_on_all = 4; - bool pretranslate_all = 5; - map train_on_chapters = 6; - map pretranslate_chapters = 7; - repeated string train_on_text_ids = 8; - repeated string pretranslate_text_ids = 9; - repeated CorpusFile source_files = 10; - repeated CorpusFile target_files = 11; + repeated MonolingualCorpus source_corpora = 2; + repeated MonolingualCorpus target_corpora = 3; +} + +message MonolingualCorpus { + string id = 1; + string language = 2; + map train_on_chapters = 5; + map pretranslate_chapters = 6; + repeated string train_on_text_ids = 7; + repeated string pretranslate_text_ids = 8; + repeated CorpusFile files = 9; } message ScriptureChapters { @@ -185,4 +187,4 @@ enum TranslationSource { TRANSLATION_SOURCE_PRIMARY = 0; TRANSLATION_SOURCE_SECONDARY = 1; TRANSLATION_SOURCE_HUMAN = 2; -} +} \ No newline at end of file diff --git a/src/Serval/src/Serval.Shared/Contracts/CorpusFileResult.cs b/src/Serval/src/Serval.Shared/Contracts/CorpusFileResult.cs new file mode 100644 index 00000000..953d705b --- /dev/null +++ b/src/Serval/src/Serval.Shared/Contracts/CorpusFileResult.cs @@ -0,0 +1,7 @@ +namespace Serval.Shared.Contracts; + +public record CorpusFileResult +{ + public required DataFileResult File { get; init; } + public required string TextId { get; init; } +} diff --git a/src/Serval/src/Serval.Shared/Contracts/CorpusNotFound.cs b/src/Serval/src/Serval.Shared/Contracts/CorpusNotFound.cs new file mode 100644 index 00000000..81f9246b --- /dev/null +++ b/src/Serval/src/Serval.Shared/Contracts/CorpusNotFound.cs @@ -0,0 +1,7 @@ +namespace Serval.Shared.Contracts; + +public record CorpusNotFound +{ + public required string CorpusId { get; init; } + public required string Owner { get; init; } +} diff --git a/src/Serval/src/Serval.Shared/Contracts/CorpusResult.cs b/src/Serval/src/Serval.Shared/Contracts/CorpusResult.cs new file mode 100644 index 00000000..0c0f8380 --- /dev/null +++ b/src/Serval/src/Serval.Shared/Contracts/CorpusResult.cs @@ -0,0 +1,9 @@ +namespace Serval.Shared.Contracts; + +public record CorpusResult +{ + public required string CorpusId { get; init; } + public required string Language { get; init; } + public string? Name { get; init; } + public required IReadOnlyList Files { get; set; } +} diff --git a/src/Serval/src/Serval.Shared/Contracts/GetCorpus.cs b/src/Serval/src/Serval.Shared/Contracts/GetCorpus.cs new file mode 100644 index 00000000..a29b4f12 --- /dev/null +++ b/src/Serval/src/Serval.Shared/Contracts/GetCorpus.cs @@ -0,0 +1,7 @@ +namespace Serval.Shared.Contracts; + +public record GetCorpus +{ + public required string CorpusId { get; init; } + public required string Owner { get; init; } +} diff --git a/src/Serval/src/Serval.Shared/Controllers/Endpoints.cs b/src/Serval/src/Serval.Shared/Controllers/Endpoints.cs index 2779062b..e8e147a0 100644 --- a/src/Serval/src/Serval.Shared/Controllers/Endpoints.cs +++ b/src/Serval/src/Serval.Shared/Controllers/Endpoints.cs @@ -6,6 +6,7 @@ public static class Endpoints public const string GetTranslationEngine = "GetTranslationEngine"; public const string GetTranslationCorpus = "GetTranslationCorpus"; + public const string GetParallelTranslationCorpus = "GetParallelTranslationCorpus"; public const string GetTranslationBuild = "GetTranslationBuild"; public const string GetAssessmentEngine = "GetAssessmentEngine"; @@ -14,4 +15,6 @@ public static class Endpoints public const string GetAssessmentJob = "GetAssessmentJob"; public const string GetWebhook = "GetWebhook"; + + public const string GetCorpus = "GetCorpus"; } diff --git a/src/Serval/src/Serval.Translation/Contracts/ParallelCorpusFilterConfigDto.cs b/src/Serval/src/Serval.Translation/Contracts/ParallelCorpusFilterConfigDto.cs new file mode 100644 index 00000000..5e51244e --- /dev/null +++ b/src/Serval/src/Serval.Translation/Contracts/ParallelCorpusFilterConfigDto.cs @@ -0,0 +1,8 @@ +namespace Serval.Translation.Contracts; + +public record ParallelCorpusFilterConfigDto +{ + public required string CorpusId { get; init; } + public IReadOnlyList? TextIds { get; init; } + public string? ScriptureRange { get; init; } +} diff --git a/src/Serval/src/Serval.Translation/Contracts/ParallelCorpusFilterDto.cs b/src/Serval/src/Serval.Translation/Contracts/ParallelCorpusFilterDto.cs new file mode 100644 index 00000000..7aa22907 --- /dev/null +++ b/src/Serval/src/Serval.Translation/Contracts/ParallelCorpusFilterDto.cs @@ -0,0 +1,8 @@ +namespace Serval.Translation.Contracts; + +public record ParallelCorpusFilterDto +{ + public required ResourceLinkDto Corpus { get; init; } + public IReadOnlyList? TextIds { get; init; } + public string? ScriptureRange { get; init; } +} diff --git a/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusConfigDto.cs b/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusConfigDto.cs index 6be52b14..a88ebe3b 100644 --- a/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusConfigDto.cs +++ b/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusConfigDto.cs @@ -2,9 +2,12 @@ public record PretranslateCorpusConfigDto { - public required string CorpusId { get; init; } + public string? CorpusId { get; init; } public IReadOnlyList? TextIds { get; init; } public string? ScriptureRange { get; init; } + + public string? ParallelCorpusId { get; init; } + public IReadOnlyList? SourceFilters { get; init; } } diff --git a/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusDto.cs b/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusDto.cs index 6394ec4d..9aa6f939 100644 --- a/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusDto.cs +++ b/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusDto.cs @@ -2,9 +2,12 @@ public record PretranslateCorpusDto { - public required ResourceLinkDto Corpus { get; init; } + public ResourceLinkDto? Corpus { get; init; } public IReadOnlyList? TextIds { get; init; } public string? ScriptureRange { get; init; } + + public ResourceLinkDto? ParallelCorpus { get; init; } + public IReadOnlyList? SourceFilters { get; init; } } diff --git a/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusConfigDto.cs b/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusConfigDto.cs index db933fc3..c8161a5f 100644 --- a/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusConfigDto.cs +++ b/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusConfigDto.cs @@ -2,7 +2,11 @@ namespace Serval.Translation.Contracts; public record TrainingCorpusConfigDto { - public required string CorpusId { get; init; } + public string? CorpusId { get; init; } public IReadOnlyList? TextIds { get; init; } public string? ScriptureRange { get; init; } + + public string? ParallelCorpusId { get; init; } + public IReadOnlyList? SourceFilters { get; init; } + public IReadOnlyList? TargetFilters { get; init; } } diff --git a/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusDto.cs b/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusDto.cs index 563f4fed..f734f43b 100644 --- a/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusDto.cs +++ b/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusDto.cs @@ -2,9 +2,11 @@ namespace Serval.Translation.Contracts; public record TrainingCorpusDto { - public required ResourceLinkDto Corpus { get; init; } - + public ResourceLinkDto? Corpus { get; init; } public IReadOnlyList? TextIds { get; init; } - public string? ScriptureRange { get; init; } + + public ResourceLinkDto? ParallelCorpus { get; init; } + public IReadOnlyList? SourceFilters { get; init; } + public IReadOnlyList? TargetFilters { get; init; } } diff --git a/src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusConfigDto.cs b/src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusConfigDto.cs new file mode 100644 index 00000000..b043c758 --- /dev/null +++ b/src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusConfigDto.cs @@ -0,0 +1,12 @@ +namespace Serval.Translation.Contracts; + +public record TranslationParallelCorpusConfigDto +{ + /// + /// The corpus name. + /// + public string? Name { get; init; } + + public required IReadOnlyList SourceCorpusIds { get; init; } = new List(); + public required IReadOnlyList TargetCorpusIds { get; init; } = new List(); +} diff --git a/src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusDto.cs b/src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusDto.cs new file mode 100644 index 00000000..063da1d2 --- /dev/null +++ b/src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusDto.cs @@ -0,0 +1,10 @@ +namespace Serval.Translation.Contracts; + +public record TranslationParallelCorpusDto +{ + public required string Id { get; init; } + public required string Url { get; init; } + public required ResourceLinkDto Engine { get; init; } + public required IReadOnlyList SourceCorpora { get; init; } + public required IReadOnlyList TargetCorpora { get; init; } +} diff --git a/src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusUpdateDto.cs b/src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusUpdateDto.cs new file mode 100644 index 00000000..b47447d0 --- /dev/null +++ b/src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusUpdateDto.cs @@ -0,0 +1,23 @@ +using System.ComponentModel.DataAnnotations; + +namespace Serval.Translation.Contracts; + +public record TranslationParallelCorpusUpdateConfigDto : IValidatableObject +{ + public IReadOnlyList? SourceCorpusIds { get; init; } + + public IReadOnlyList? TargetCorpusIds { get; init; } + + public IEnumerable Validate( + ValidationContext validationContext + ) + { + if (SourceCorpusIds is null && TargetCorpusIds is null) + { + yield return new System.ComponentModel.DataAnnotations.ValidationResult( + "At least one field must be specified.", + [nameof(SourceCorpusIds), nameof(TargetCorpusIds)] + ); + } + } +} diff --git a/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs b/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs index 31d69846..cd18dc82 100644 --- a/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs +++ b/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs @@ -422,7 +422,7 @@ corpusConfig.TargetFiles is null /// /// The translation engine id /// - /// The files + /// The corpora /// The client is not authenticated /// The authenticated client cannot perform the operation or does not own the translation engine /// The engine does not exist @@ -510,6 +510,206 @@ CancellationToken cancellationToken return Ok(); } + /// + /// Add a corpus to a translation engine + /// + /// + /// ## Parameters + /// * **name**: A name to help identify and distinguish the corpus from other corpora + /// * The name does not have to be unique since the corpus is uniquely identified by an auto-generated id + /// * **sourceLanguage**: The source language code (See documentation on endpoint /translation/engines/ - "Create a new translation engine" for details on language codes). + /// * Normally, this is the same as the engine sourceLanguage. This may change for future engines as a means of transfer learning. + /// * **targetLanguage**: The target language code (See documentation on endpoint /translation/engines/ - "Create a new translation engine" for details on language codes). + /// * **SourceFiles**: The source files associated with the corpus + /// * **FileId**: The unique id referencing the uploaded file + /// * **TextId**: The client-defined name to associate source and target files. + /// * If the TextIds in the SourceFiles and TargetFiles match, they will be used to train the engine. + /// * If selected for pretranslation when building, all SourceFiles that have no TargetFile, or lines of text in a SourceFile that have missing or blank lines in the TargetFile will be pretranslated. + /// * If a TextId is used more than once in SourceFiles, the sources will be randomly and evenly mixed for training. + /// * For pretranslating, multiple sources with the same TextId will be combined, but the first source will always take precedence (no random mixing). + /// * For Paratext projects, TextId will be ignored - multiple Paratext source projects will always be mixed (as if they have the same TextId). + /// * **TargetFiles**: The target files associated with the corpus + /// * Same as SourceFiles, except only a single instance of a TextID or a single paratext project is supported. There is no mixing or combining of multiple targets. + /// + /// The translation engine id + /// The corpus configuration (see remarks) + /// + /// + /// + /// The added corpus + /// Bad request + /// The client is not authenticated. + /// The authenticated client cannot perform the operation or does not own the translation engine. + /// The engine does not exist. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.UpdateTranslationEngines)] + [HttpPost("{id}/parallel-corpora")] + [ProducesResponseType(StatusCodes.Status201Created)] + [ProducesResponseType(typeof(void), StatusCodes.Status400BadRequest)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> AddParallelCorpusAsync( + [NotNull] string id, + [FromBody] TranslationParallelCorpusConfigDto corpusConfig, + [FromServices] IRequestClient getCorpusClient, + [FromServices] IIdGenerator idGenerator, + CancellationToken cancellationToken + ) + { + Engine engine = await _engineService.GetAsync(id, cancellationToken); + await AuthorizeAsync(engine); + ParallelCorpus corpus = await MapAsync( + getCorpusClient, + idGenerator.GenerateId(), + corpusConfig, + cancellationToken + ); + await _engineService.AddParallelCorpusAsync(id, corpus, cancellationToken); + TranslationParallelCorpusDto dto = Map(id, corpus); + return Created(dto.Url, dto); + } + + /// + /// Update a corpus with a new set of files + /// + /// + /// Will completely replace corpus' file associations. Will not affect jobs already queued or running. Will not affect existing pretranslations until new build is complete. + /// + /// The translation engine id + /// The corpus id + /// The corpus configuration + /// The data file client + /// + /// The corpus was updated successfully + /// Bad request + /// The client is not authenticated. + /// The authenticated client cannot perform the operation or does not own the translation engine. + /// The engine or corpus does not exist. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.UpdateTranslationEngines)] + [HttpPatch("{id}/parallel-corpora/{parallelCorpusId}")] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status400BadRequest)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> UpdateParallelCorpusAsync( + [NotNull] string id, + [NotNull] string parallelCorpusId, + [FromBody] TranslationParallelCorpusUpdateConfigDto corpusConfig, + [FromServices] IRequestClient getCorpusClient, + CancellationToken cancellationToken + ) + { + await AuthorizeAsync(id, cancellationToken); + ParallelCorpus parallelCorpus = await _engineService.UpdateParallelCorpusAsync( + id, + parallelCorpusId, + corpusConfig.SourceCorpusIds is null + ? null + : await MapAsync(getCorpusClient, corpusConfig.SourceCorpusIds, cancellationToken), + corpusConfig.TargetCorpusIds is null + ? null + : await MapAsync(getCorpusClient, corpusConfig.TargetCorpusIds, cancellationToken), + cancellationToken + ); + return Ok(Map(id, parallelCorpus)); + } + + /// + /// Get all parallel corpora for a translation engine + /// + /// The translation engine id + /// + /// The parallel corpora + /// The client is not authenticated + /// The authenticated client cannot perform the operation or does not own the translation engine + /// The engine does not exist + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.ReadTranslationEngines)] + [HttpGet("{id}/parallel-corpora")] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task>> GetAllParallelCorporaAsync( + [NotNull] string id, + CancellationToken cancellationToken + ) + { + Engine engine = await _engineService.GetAsync(id, cancellationToken); + await AuthorizeAsync(engine); + return Ok(engine.ParallelCorpora.Select(c => Map(id, c))); + } + + /// + /// Get the configuration of a parallel corpus for a translation engine + /// + /// The translation engine id + /// The parallel corpus id + /// + /// The parallel corpus configuration + /// The client is not authenticated. + /// The authenticated client cannot perform the operation or does not own the translation engine. + /// The engine or parallel corpus does not exist. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.ReadTranslationEngines)] + [HttpGet("{id}/parallel-corpora/{parallelCorpusId}", Name = Endpoints.GetParallelTranslationCorpus)] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> GetParallelCorpusAsync( + [NotNull] string id, + [NotNull] string parallelCorpusId, + CancellationToken cancellationToken + ) + { + Engine engine = await _engineService.GetAsync(id, cancellationToken); + await AuthorizeAsync(engine); + ParallelCorpus? corpus = engine.ParallelCorpora.FirstOrDefault(f => f.Id == parallelCorpusId); + if (corpus == null) + return NotFound(); + return Ok(Map(id, corpus)); + } + + /// + /// Remove a parallel corpus from a translation engine + /// + /// + /// Removing a parallel corpus will remove all pretranslations associated with that corpus. + /// + /// The translation engine id + /// The parallel corpus id + /// + /// The parallel corpus was deleted successfully. + /// The client is not authenticated. + /// The authenticated client cannot perform the operation or does not own the translation engine. + /// The engine or parallel corpus does not exist. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.UpdateTranslationEngines)] + [HttpDelete("{id}/parallel-corpora/{parallelCorpusId}")] + [ProducesResponseType(typeof(void), StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task DeleteParallelCorpusAsync( + [NotNull] string id, + [NotNull] string parallelCorpusId, + CancellationToken cancellationToken + ) + { + await AuthorizeAsync(id, cancellationToken); + await _engineService.DeleteParallelCorpusAsync(id, parallelCorpusId, cancellationToken); + return Ok(); + } + /// /// Get all pretranslations in a corpus of a translation engine /// @@ -1007,6 +1207,21 @@ CancellationToken cancellationToken }; } + private async Task MapAsync( + IRequestClient getDataFileClient, + string corpusId, + TranslationParallelCorpusConfigDto source, + CancellationToken cancellationToken + ) + { + return new ParallelCorpus + { + Id = corpusId, + SourceCorpora = await MapAsync(getDataFileClient, source.SourceCorpusIds, cancellationToken), + TargetCorpora = await MapAsync(getDataFileClient, source.TargetCorpusIds, cancellationToken) + }; + } + private async Task> MapAsync( IRequestClient getDataFileClient, IEnumerable fileConfigs, @@ -1040,6 +1255,47 @@ CancellationToken cancellationToken return files; } + private async Task> MapAsync( + IRequestClient getCorpusClient, + IEnumerable corpusIds, + CancellationToken cancellationToken + ) + { + var corpora = new List(); + foreach (string corpusId in corpusIds) + { + Response response = await getCorpusClient.GetResponse< + CorpusResult, + CorpusNotFound + >(new GetCorpus { CorpusId = corpusId, Owner = Owner }, cancellationToken); + if (response.Is(out Response? result)) + { + corpora.Add( + new MonolingualCorpus + { + Id = corpusId, + Name = result.Message.Name ?? "", + Language = result.Message.Language, + Files = result + .Message.Files.Select(f => new CorpusFile + { + Id = f.File.DataFileId, + Filename = f.File.Filename, + Format = f.File.Format, + TextId = f.TextId + }) + .ToList(), + } + ); + } + else if (response.Is(out Response? _)) + { + throw new InvalidOperationException($"The corpus {corpusId} cannot be found."); + } + } + return corpora; + } + private Engine Map(TranslationEngineConfigDto source) { return new Engine @@ -1072,29 +1328,57 @@ private static Build Map(Engine engine, TranslationBuildConfigDto source) return null; var corpusIds = new HashSet(engine.Corpora.Select(c => c.Id)); + var parallelCorpusIds = new HashSet(engine.ParallelCorpora.Select(c => c.Id)); var pretranslateCorpora = new List(); - foreach (PretranslateCorpusConfigDto ptcc in source) + foreach (PretranslateCorpusConfigDto pcc in source) { - if (!corpusIds.Contains(ptcc.CorpusId)) + if (pcc.CorpusId != null) { - throw new InvalidOperationException( - $"The corpus {ptcc.CorpusId} is not valid: This corpus does not exist for engine {engine.Id}." + if (pcc.ParallelCorpusId != null) + { + throw new InvalidOperationException($"Only one of ParallelCorpusId and CorpusId can be set."); + } + if (!corpusIds.Contains(pcc.CorpusId)) + { + throw new InvalidOperationException( + $"The corpus {pcc.CorpusId} is not valid: This corpus does not exist for engine {engine.Id}." + ); + } + if (pcc.TextIds != null && pcc.ScriptureRange != null) + { + throw new InvalidOperationException( + $"The corpus {pcc.CorpusId} is not valid: Set at most one of TextIds and ScriptureRange." + ); + } + pretranslateCorpora.Add( + new PretranslateCorpus + { + CorpusRef = pcc.CorpusId, + TextIds = pcc.TextIds?.ToList(), + ScriptureRange = pcc.ScriptureRange + } ); } - if (ptcc.TextIds != null && ptcc.ScriptureRange != null) + else { - throw new InvalidOperationException( - $"The corpus {ptcc.CorpusId} is not valid: Set at most one of TextIds and ScriptureRange." - ); - } - pretranslateCorpora.Add( - new PretranslateCorpus + if (pcc.ParallelCorpusId == null) { - CorpusRef = ptcc.CorpusId, - TextIds = ptcc.TextIds?.ToList(), - ScriptureRange = ptcc.ScriptureRange + throw new InvalidOperationException($"One of ParallelCorpusId and CorpusId must be set."); } - ); + if (!parallelCorpusIds.Contains(pcc.ParallelCorpusId)) + { + throw new InvalidOperationException( + $"The parallel corpus {pcc.ParallelCorpusId} is not valid: This parallel corpus does not exist for engine {engine.Id}." + ); + } + pretranslateCorpora.Add( + new PretranslateCorpus + { + ParallelCorpusRef = pcc.ParallelCorpusId, + SourceFilters = pcc.SourceFilters?.Select(Map).ToList() + } + ); + } } return pretranslateCorpora; } @@ -1105,33 +1389,78 @@ private static Build Map(Engine engine, TranslationBuildConfigDto source) return null; var corpusIds = new HashSet(engine.Corpora.Select(c => c.Id)); + var parallelCorpusIds = new HashSet(engine.ParallelCorpora.Select(c => c.Id)); var trainOnCorpora = new List(); foreach (TrainingCorpusConfigDto tcc in source) { - if (!corpusIds.Contains(tcc.CorpusId)) + if (tcc.CorpusId != null) { - throw new InvalidOperationException( - $"The corpus {tcc.CorpusId} is not valid: This corpus does not exist for engine {engine.Id}." + if (tcc.ParallelCorpusId != null) + { + throw new InvalidOperationException($"Only one of ParallelCorpusId and CorpusId can be set."); + } + if (!corpusIds.Contains(tcc.CorpusId)) + { + throw new InvalidOperationException( + $"The corpus {tcc.CorpusId} is not valid: This corpus does not exist for engine {engine.Id}." + ); + } + if (tcc.TextIds != null && tcc.ScriptureRange != null) + { + throw new InvalidOperationException( + $"The corpus {tcc.CorpusId} is not valid: Set at most one of TextIds and ScriptureRange." + ); + } + trainOnCorpora.Add( + new TrainingCorpus + { + CorpusRef = tcc.CorpusId, + TextIds = tcc.TextIds?.ToList(), + ScriptureRange = tcc.ScriptureRange + } ); } - if (tcc.TextIds != null && tcc.ScriptureRange != null) + else { - throw new InvalidOperationException( - $"The corpus {tcc.CorpusId} is not valid: Set at most one of TextIds and ScriptureRange." - ); - } - trainOnCorpora.Add( - new TrainingCorpus + if (tcc.ParallelCorpusId == null) { - CorpusRef = tcc.CorpusId, - TextIds = tcc.TextIds?.ToList(), - ScriptureRange = tcc.ScriptureRange + throw new InvalidOperationException($"One of ParallelCorpusId and CorpusId must be set."); } - ); + if (!parallelCorpusIds.Contains(tcc.ParallelCorpusId)) + { + throw new InvalidOperationException( + $"The parallel corpus {tcc.ParallelCorpusId} is not valid: This parallel corpus does not exist for engine {engine.Id}." + ); + } + trainOnCorpora.Add( + new TrainingCorpus + { + ParallelCorpusRef = tcc.ParallelCorpusId, + SourceFilters = tcc.SourceFilters?.Select(Map).ToList(), + TargetFilters = tcc.TargetFilters?.Select(Map).ToList() + } + ); + } } return trainOnCorpora; } + private static ParallelCorpusFilter Map(ParallelCorpusFilterConfigDto source) + { + if (source.TextIds != null && source.ScriptureRange != null) + { + throw new InvalidOperationException( + $"The parallel corpus filter for corpus {source.CorpusId} is not valid: At most, one of TextIds and ScriptureRange can be set." + ); + } + return new ParallelCorpusFilter + { + CorpusRef = source.CorpusId, + TextIds = source.TextIds, + ScriptureRange = source.ScriptureRange + }; + } + private static Dictionary? Map(object? source) { try @@ -1194,30 +1523,75 @@ private PretranslateCorpusDto Map(string engineId, PretranslateCorpus source) { return new PretranslateCorpusDto { - Corpus = new ResourceLinkDto - { - Id = source.CorpusRef, - Url = _urlService.GetUrl( - Endpoints.GetTranslationCorpus, - new { id = engineId, corpusId = source.CorpusRef } - ) - }, + Corpus = + source.CorpusRef != null + ? new ResourceLinkDto + { + Id = source.CorpusRef, + Url = _urlService.GetUrl( + Endpoints.GetTranslationCorpus, + new { id = engineId, corpusId = source.CorpusRef } + ) + } + : null, TextIds = source.TextIds, - ScriptureRange = source.ScriptureRange + ScriptureRange = source.ScriptureRange, + ParallelCorpus = + source.ParallelCorpusRef != null + ? new ResourceLinkDto + { + Id = source.ParallelCorpusRef, + Url = _urlService.GetUrl( + Endpoints.GetParallelTranslationCorpus, + new { id = engineId, parallelCorpusId = source.ParallelCorpusRef } + ) + } + : null, + SourceFilters = source.SourceFilters?.Select(Map).ToList() }; } private TrainingCorpusDto Map(string engineId, TrainingCorpus source) { return new TrainingCorpusDto + { + Corpus = + source.CorpusRef != null + ? new ResourceLinkDto + { + Id = source.CorpusRef, + Url = _urlService.GetUrl( + Endpoints.GetTranslationCorpus, + new { id = engineId, corpusId = source.CorpusRef } + ) + } + : null, + TextIds = source.TextIds, + ScriptureRange = source.ScriptureRange, + ParallelCorpus = + source.ParallelCorpusRef != null + ? new ResourceLinkDto + { + Id = source.ParallelCorpusRef, + Url = _urlService.GetUrl( + Endpoints.GetParallelTranslationCorpus, + new { id = engineId, parallelCorpusId = source.ParallelCorpusRef } + ) + } + : null, + SourceFilters = source.SourceFilters?.Select(Map).ToList(), + TargetFilters = source.TargetFilters?.Select(Map).ToList() + }; + } + + private ParallelCorpusFilterDto Map(ParallelCorpusFilter source) + { + return new ParallelCorpusFilterDto { Corpus = new ResourceLinkDto { Id = source.CorpusRef, - Url = _urlService.GetUrl( - Endpoints.GetTranslationCorpus, - new { id = engineId, corpusId = source.CorpusRef } - ) + Url = _urlService.GetUrl(Endpoints.GetCorpus, new { id = source.CorpusRef }) }, TextIds = source.TextIds, ScriptureRange = source.ScriptureRange @@ -1309,6 +1683,34 @@ private TranslationCorpusDto Map(string engineId, Corpus source) }; } + private TranslationParallelCorpusDto Map(string engineId, ParallelCorpus source) + { + return new TranslationParallelCorpusDto + { + Id = source.Id, + Url = _urlService.GetUrl(Endpoints.GetCorpus, new { id = engineId, corpusId = source.Id }), + Engine = new ResourceLinkDto + { + Id = engineId, + Url = _urlService.GetUrl(Endpoints.GetTranslationEngine, new { id = engineId }) + }, + SourceCorpora = source + .SourceCorpora.Select(c => new ResourceLinkDto + { + Id = c.Id, + Url = _urlService.GetUrl(Endpoints.GetCorpus, new { Id = c.Id }) + }) + .ToList(), + TargetCorpora = source + .TargetCorpora.Select(c => new ResourceLinkDto + { + Id = c.Id, + Url = _urlService.GetUrl(Endpoints.GetCorpus, new { Id = c.Id }) + }) + .ToList() + }; + } + private TranslationCorpusFileDto Map(CorpusFile source) { return new TranslationCorpusFileDto diff --git a/src/Serval/src/Serval.Translation/Models/Engine.cs b/src/Serval/src/Serval.Translation/Models/Engine.cs index 9548ad17..b4d0f55b 100644 --- a/src/Serval/src/Serval.Translation/Models/Engine.cs +++ b/src/Serval/src/Serval.Translation/Models/Engine.cs @@ -9,7 +9,8 @@ public record Engine : IOwnedEntity public required string TargetLanguage { get; init; } public required string Type { get; init; } public required string Owner { get; init; } - public required IReadOnlyList Corpora { get; init; } + public IReadOnlyList Corpora { get; init; } = new List(); + public IReadOnlyList ParallelCorpora { get; init; } = new List(); public bool? IsModelPersisted { get; init; } public bool IsBuilding { get; init; } public int ModelRevision { get; init; } diff --git a/src/Serval/src/Serval.Translation/Models/MonolingualCorpus.cs b/src/Serval/src/Serval.Translation/Models/MonolingualCorpus.cs new file mode 100644 index 00000000..0762e878 --- /dev/null +++ b/src/Serval/src/Serval.Translation/Models/MonolingualCorpus.cs @@ -0,0 +1,9 @@ +namespace Serval.Translation.Models; + +public record MonolingualCorpus +{ + public required string Id { get; set; } + public string? Name { get; set; } + public required string Language { get; set; } + public required IReadOnlyList Files { get; set; } +} diff --git a/src/Serval/src/Serval.Translation/Models/ParallelCorpus.cs b/src/Serval/src/Serval.Translation/Models/ParallelCorpus.cs new file mode 100644 index 00000000..0fd059c7 --- /dev/null +++ b/src/Serval/src/Serval.Translation/Models/ParallelCorpus.cs @@ -0,0 +1,8 @@ +namespace Serval.Translation.Models; + +public record ParallelCorpus +{ + public required string Id { get; set; } + public IReadOnlyList SourceCorpora { get; set; } = new List(); + public IReadOnlyList TargetCorpora { get; set; } = new List(); +} diff --git a/src/Serval/src/Serval.Translation/Models/ParallelCorpusFilter.cs b/src/Serval/src/Serval.Translation/Models/ParallelCorpusFilter.cs new file mode 100644 index 00000000..1cb311e8 --- /dev/null +++ b/src/Serval/src/Serval.Translation/Models/ParallelCorpusFilter.cs @@ -0,0 +1,8 @@ +namespace Serval.Translation.Contracts; + +public record ParallelCorpusFilter +{ + public required string CorpusRef { get; set; } + public IReadOnlyList? TextIds { get; set; } + public string? ScriptureRange { get; set; } +} diff --git a/src/Serval/src/Serval.Translation/Models/PretranslateCorpus.cs b/src/Serval/src/Serval.Translation/Models/PretranslateCorpus.cs index 4adb7640..4be036b6 100644 --- a/src/Serval/src/Serval.Translation/Models/PretranslateCorpus.cs +++ b/src/Serval/src/Serval.Translation/Models/PretranslateCorpus.cs @@ -2,7 +2,10 @@ public record PretranslateCorpus { - public required string CorpusRef { get; set; } + public string? CorpusRef { get; set; } public IReadOnlyList? TextIds { get; set; } public string? ScriptureRange { get; set; } + + public string? ParallelCorpusRef { get; set; } + public IReadOnlyList? SourceFilters { get; set; } } diff --git a/src/Serval/src/Serval.Translation/Models/TrainingCorpus.cs b/src/Serval/src/Serval.Translation/Models/TrainingCorpus.cs index e91fb1ea..fc927406 100644 --- a/src/Serval/src/Serval.Translation/Models/TrainingCorpus.cs +++ b/src/Serval/src/Serval.Translation/Models/TrainingCorpus.cs @@ -2,7 +2,11 @@ namespace Serval.Translation.Models; public record TrainingCorpus { - public required string CorpusRef { get; set; } = default!; + public string? CorpusRef { get; set; } = default!; public IReadOnlyList? TextIds { get; set; } public string? ScriptureRange { get; set; } + + public string? ParallelCorpusRef { get; set; } + public IReadOnlyList? SourceFilters { get; set; } + public IReadOnlyList? TargetFilters { get; set; } } diff --git a/src/Serval/src/Serval.Translation/Services/EngineService.cs b/src/Serval/src/Serval.Translation/Services/EngineService.cs index bfee7000..47c4ab9b 100644 --- a/src/Serval/src/Serval.Translation/Services/EngineService.cs +++ b/src/Serval/src/Serval.Translation/Services/EngineService.cs @@ -199,126 +199,68 @@ await _dataAccessContext.WithTransactionAsync( ); } + private Dictionary> GetChapters(string fileLocation, string scriptureRange) + { + try + { + return ScriptureRangeParser.GetChapters( + scriptureRange, + _scriptureDataFileService.GetParatextProjectSettings(fileLocation).Versification + ); + } + catch (ArgumentException ae) + { + throw new InvalidOperationException($"The scripture range {scriptureRange} is not valid: {ae.Message}"); + } + } + public async Task StartBuildAsync(Build build, CancellationToken cancellationToken = default) { Engine engine = await GetAsync(build.EngineRef, cancellationToken); await _builds.InsertAsync(build, cancellationToken); + TranslationEngineApi.TranslationEngineApiClient client = + _grpcClientFactory.CreateClient(engine.Type); + try { - var pretranslate = build.Pretranslate?.ToDictionary(c => c.CorpusRef); - var trainOn = build.TrainOn?.ToDictionary(c => c.CorpusRef); - TranslationEngineApi.TranslationEngineApiClient client = - _grpcClientFactory.CreateClient(engine.Type); - Dictionary> GetChapters(V1.Corpus corpus, string scriptureRange) + StartBuildRequest request; + if (engine.ParallelCorpora.Any()) { - try - { - return ScriptureRangeParser.GetChapters( - scriptureRange, - _scriptureDataFileService - .GetParatextProjectSettings(corpus.TargetFiles.First().Location) - .Versification - ); - } - catch (ArgumentException ae) + var trainOn = build.TrainOn?.ToDictionary(c => c.ParallelCorpusRef!); + var pretranslate = build.Pretranslate?.ToDictionary(c => c.ParallelCorpusRef!); + request = new StartBuildRequest { - throw new InvalidOperationException( - $"The scripture range {scriptureRange} is not valid: {ae.Message}" - ); - } + EngineType = engine.Type, + EngineId = engine.Id, + BuildId = build.Id, + Corpora = + { + engine.ParallelCorpora.Select(c => + Map(c, trainOn?.GetValueOrDefault(c.Id), pretranslate?.GetValueOrDefault(c.Id)) + ) + } + }; } - var request = new StartBuildRequest + else { - EngineType = engine.Type, - EngineId = engine.Id, - BuildId = build.Id, - Corpora = + var pretranslate = build.Pretranslate?.ToDictionary(c => c.CorpusRef!); + var trainOn = build.TrainOn?.ToDictionary(c => c.CorpusRef!); + + request = new StartBuildRequest { - engine.Corpora.Select(c => + EngineType = engine.Type, + EngineId = engine.Id, + BuildId = build.Id, + Corpora = { - V1.Corpus corpus = Map(c); - if (pretranslate?.TryGetValue(c.Id, out PretranslateCorpus? pretranslateCorpus) ?? false) - { - corpus.PretranslateAll = - pretranslateCorpus.TextIds is null && pretranslateCorpus.ScriptureRange is null; - if (pretranslateCorpus.TextIds is not null && pretranslateCorpus.ScriptureRange is not null) - { - throw new InvalidOperationException( - $"The corpus {c.Id} cannot specify both 'textIds' and 'scriptureRange' for 'pretranslate'." - ); - } - if (pretranslateCorpus.TextIds is not null) - corpus.PretranslateTextIds.Add(pretranslateCorpus.TextIds); - if (!string.IsNullOrEmpty(pretranslateCorpus.ScriptureRange)) - { - if ( - c.TargetFiles.Count > 1 - || c.TargetFiles[0].Format != Shared.Contracts.FileFormat.Paratext - ) - { - throw new InvalidOperationException( - $"The corpus {c.Id} is not compatible with using a scripture range" - ); - } - corpus.PretranslateChapters.Add( - GetChapters(corpus, pretranslateCorpus.ScriptureRange) - .Select( - (kvp) => - { - var scriptureChapters = new ScriptureChapters(); - scriptureChapters.Chapters.Add(kvp.Value); - return (kvp.Key, scriptureChapters); - } - ) - .ToDictionary() - ); - } - } - if (trainOn?.TryGetValue(c.Id, out TrainingCorpus? trainingCorpus) ?? false) - { - corpus.TrainOnAll = trainingCorpus.TextIds is null && trainingCorpus.ScriptureRange is null; - if (trainingCorpus.TextIds is not null && trainingCorpus.ScriptureRange is not null) - { - throw new InvalidOperationException( - $"The corpus {c.Id} cannot specify both 'textIds' and 'scriptureRange' for trainOn" - ); - } - if (trainingCorpus.TextIds is not null) - corpus.TrainOnTextIds.Add(trainingCorpus.TextIds); - if (!string.IsNullOrEmpty(trainingCorpus.ScriptureRange)) - { - if ( - c.TargetFiles.Count > 1 - || c.TargetFiles[0].Format != Shared.Contracts.FileFormat.Paratext - ) - { - throw new InvalidOperationException( - $"The corpus {c.Id} is not compatible with using a scripture range" - ); - } - corpus.TrainOnChapters.Add( - GetChapters(corpus, trainingCorpus.ScriptureRange) - .Select( - (kvp) => - { - var scriptureChapters = new ScriptureChapters(); - scriptureChapters.Chapters.Add(kvp.Value); - return (kvp.Key, scriptureChapters); - } - ) - .ToDictionary() - ); - } - } - else if (trainOn is null) - { - corpus.TrainOnAll = true; - } - return corpus; - }) - } - }; + engine.Corpora.Select(c => + Map(c, trainOn?.GetValueOrDefault(c.Id), pretranslate?.GetValueOrDefault(c.Id)) + ) + } + }; + } + if (build.Options is not null) request.Options = JsonSerializer.Serialize(build.Options); @@ -349,7 +291,6 @@ Dictionary> GetChapters(V1.Corpus corpus, string scriptureRang _logger.LogInformation("Error parsing build request summary."); _logger.LogInformation("{request}", JsonSerializer.Serialize(request)); } - await client.StartBuildAsync(request, cancellationToken: cancellationToken); } catch @@ -475,6 +416,76 @@ string id in originalEngine.Corpora.SelectMany(c => } } + public Task AddParallelCorpusAsync( + string engineId, + Models.ParallelCorpus corpus, + CancellationToken cancellationToken = default + ) + { + return Entities.UpdateAsync( + engineId, + u => u.Add(e => e.ParallelCorpora, corpus), + cancellationToken: cancellationToken + ); + } + + public async Task UpdateParallelCorpusAsync( + string engineId, + string parallelCorpusId, + IReadOnlyList? sourceCorpora, + IReadOnlyList? targetCorpora, + CancellationToken cancellationToken = default + ) + { + Engine? engine = await Entities.UpdateAsync( + e => e.Id == engineId && e.ParallelCorpora.Any(c => c.Id == parallelCorpusId), + u => + { + if (sourceCorpora is not null) + u.Set(c => c.ParallelCorpora[ArrayPosition.FirstMatching].SourceCorpora, sourceCorpora); + if (targetCorpora is not null) + u.Set(c => c.ParallelCorpora[ArrayPosition.FirstMatching].TargetCorpora, targetCorpora); + }, + cancellationToken: cancellationToken + ); + if (engine is null) + { + throw new EntityNotFoundException( + $"Could not find the Corpus '{parallelCorpusId}' in Engine '{engineId}'." + ); + } + + return engine.ParallelCorpora.First(c => c.Id == parallelCorpusId); + } + + public async Task DeleteParallelCorpusAsync( + string engineId, + string parallelCorpusId, + CancellationToken cancellationToken = default + ) + { + Engine? originalEngine = null; + await _dataAccessContext.WithTransactionAsync( + async (ct) => + { + originalEngine = await Entities.UpdateAsync( + engineId, + u => u.RemoveAll(e => e.ParallelCorpora, c => c.Id == parallelCorpusId), + returnOriginal: true, + cancellationToken: ct + ); + if (originalEngine is null || !originalEngine.ParallelCorpora.Any(c => c.Id == parallelCorpusId)) + { + throw new EntityNotFoundException( + $"Could not find the Corpus '{parallelCorpusId}' in Engine '{engineId}'." + ); + } + await _pretranslations.DeleteAllAsync(pt => pt.CorpusRef == parallelCorpusId, ct); + }, + cancellationToken: cancellationToken + ); + } + public Task DeleteAllCorpusFilesAsync(string dataFileId, CancellationToken cancellationToken = default) { return Entities.UpdateAllAsync( @@ -581,16 +592,191 @@ private Models.WordGraphArc Map(V1.WordGraphArc source) }; } - private V1.Corpus Map(Models.Corpus source) + private V1.ParallelCorpus Map(Corpus source, TrainingCorpus? trainingCorpus, PretranslateCorpus? pretranslateCorpus) { - return new V1.Corpus + IEnumerable sourceFiles = source.SourceFiles.Select(Map); + IEnumerable targetFiles = source.TargetFiles.Select(Map); + V1.MonolingualCorpus sourceCorpus = + new() { Language = source.SourceLanguage, Files = { source.SourceFiles.Select(Map) } }; + V1.MonolingualCorpus targetCorpus = + new() { Language = source.TargetLanguage, Files = { source.TargetFiles.Select(Map) } }; + + if (trainingCorpus != null) + { + if (trainingCorpus.TextIds is not null && trainingCorpus.ScriptureRange is not null) + { + throw new InvalidOperationException( + $"The corpus {source.Id} cannot specify both 'textIds' and 'scriptureRange' for trainOn" + ); + } + if (trainingCorpus.TextIds is not null) + { + sourceCorpus.TrainOnTextIds.Add(trainingCorpus.TextIds); + targetCorpus.TrainOnTextIds.Add(trainingCorpus.TextIds); + } + if (!string.IsNullOrEmpty(trainingCorpus.ScriptureRange)) + { + if (targetCorpus.Files.Count > 1 || targetCorpus.Files[0].Format != V1.FileFormat.Paratext) + { + throw new InvalidOperationException( + $"The corpus {source.Id} is not compatible with using a scripture range" + ); + } + var chapters = GetChapters(targetCorpus.Files[0].Location, trainingCorpus.ScriptureRange) + .Select( + (kvp) => + { + var scriptureChapters = new ScriptureChapters(); + scriptureChapters.Chapters.Add(kvp.Value); + return (kvp.Key, scriptureChapters); + } + ) + .ToDictionary(); + sourceCorpus.TrainOnChapters.Add(chapters); + targetCorpus.TrainOnChapters.Add(chapters); + } + } + if (pretranslateCorpus != null) + { + if (pretranslateCorpus.TextIds is not null && pretranslateCorpus.ScriptureRange is not null) + { + throw new InvalidOperationException( + $"The corpus {source.Id} cannot specify both 'textIds' and 'scriptureRange' for 'pretranslate'." + ); + } + if (pretranslateCorpus.TextIds is not null) + sourceCorpus.PretranslateTextIds.Add(pretranslateCorpus.TextIds); + if (!string.IsNullOrEmpty(pretranslateCorpus.ScriptureRange)) + { + if (targetCorpus.Files.Count > 1 || targetCorpus.Files[0].Format != V1.FileFormat.Paratext) + { + throw new InvalidOperationException( + $"The corpus {source.Id} is not compatible with using a scripture range" + ); + } + sourceCorpus.PretranslateChapters.Add( + GetChapters(targetCorpus.Files[0].Location, pretranslateCorpus.ScriptureRange) + .Select( + (kvp) => + { + var scriptureChapters = new ScriptureChapters(); + scriptureChapters.Chapters.Add(kvp.Value); + return (kvp.Key, scriptureChapters); + } + ) + .ToDictionary() + ); + } + } + return new V1.ParallelCorpus + { + Id = source.Id, + SourceCorpora = { sourceCorpus }, + TargetCorpora = { targetCorpus } + }; + } + + private V1.ParallelCorpus Map( + Models.ParallelCorpus source, + TrainingCorpus? trainingCorpus, + PretranslateCorpus? pretranslateCorpus + ) + { + string? referenceFileLocation = + source.TargetCorpora.Count > 0 && source.TargetCorpora[0].Files.Count > 0 + ? Map(source.TargetCorpora[0].Files[0]).Location + : null; + + return new V1.ParallelCorpus + { + Id = source.Id, + SourceCorpora = + { + source.SourceCorpora.Select(sc => + Map( + sc, + trainingCorpus?.SourceFilters?.Where(sf => sf.CorpusRef == sc.Id).FirstOrDefault(), + pretranslateCorpus?.SourceFilters?.Where(sf => sf.CorpusRef == sc.Id).FirstOrDefault(), + referenceFileLocation + ) + ) + }, + TargetCorpora = + { + source.TargetCorpora.Select(tc => + Map( + tc, + trainingCorpus?.TargetFilters?.Where(sf => sf.CorpusRef == tc.Id).FirstOrDefault(), + null, + referenceFileLocation + ) + ) + } + }; + } + + private V1.MonolingualCorpus Map( + Models.MonolingualCorpus source, + ParallelCorpusFilter? trainingFilter, + ParallelCorpusFilter? pretranslateFilter, + string? referenceFileLocation + ) + { + Dictionary? trainOnChapters = null; + if ( + trainingFilter is not null + && trainingFilter.ScriptureRange is not null + && referenceFileLocation is not null + ) + { + trainOnChapters = GetChapters(referenceFileLocation, trainingFilter.ScriptureRange) + .Select( + (kvp) => + { + var scriptureChapters = new ScriptureChapters(); + scriptureChapters.Chapters.Add(kvp.Value); + return (kvp.Key, scriptureChapters); + } + ) + .ToDictionary(); + } + + Dictionary? pretranslateChapters = null; + if ( + pretranslateFilter is not null + && pretranslateFilter.ScriptureRange is not null + && referenceFileLocation is not null + ) + { + GetChapters(referenceFileLocation, pretranslateFilter.ScriptureRange) + .Select( + (kvp) => + { + var scriptureChapters = new ScriptureChapters(); + scriptureChapters.Chapters.Add(kvp.Value); + return (kvp.Key, scriptureChapters); + } + ) + .ToDictionary(); + } + + var corpus = new V1.MonolingualCorpus { Id = source.Id, - SourceLanguage = source.SourceLanguage, - TargetLanguage = source.TargetLanguage, - SourceFiles = { source.SourceFiles.Select(Map) }, - TargetFiles = { source.TargetFiles.Select(Map) } + Language = source.Language, + Files = { source.Files.Select(Map) } }; + + if (trainOnChapters is not null) + corpus.TrainOnChapters.Add(trainOnChapters); + if (trainingFilter?.TextIds is not null) + corpus.TrainOnTextIds.Add(trainingFilter.TextIds); + if (pretranslateChapters is not null) + corpus.PretranslateChapters.Add(pretranslateChapters); + if (pretranslateFilter?.TextIds is not null) + corpus.PretranslateTextIds.Add(pretranslateFilter.TextIds); + + return corpus; } private V1.CorpusFile Map(Models.CorpusFile source) diff --git a/src/Serval/src/Serval.Translation/Services/IEngineService.cs b/src/Serval/src/Serval.Translation/Services/IEngineService.cs index ce8b1765..6497ac1a 100644 --- a/src/Serval/src/Serval.Translation/Services/IEngineService.cs +++ b/src/Serval/src/Serval.Translation/Services/IEngineService.cs @@ -52,6 +52,20 @@ Task DeleteCorpusAsync( CancellationToken cancellationToken = default ); + Task AddParallelCorpusAsync(string engineId, ParallelCorpus corpus, CancellationToken cancellationToken = default); + Task UpdateParallelCorpusAsync( + string engineId, + string parallelCorpusId, + IReadOnlyList? sourceCorpora, + IReadOnlyList? targetCorpora, + CancellationToken cancellationToken = default + ); + Task DeleteParallelCorpusAsync( + string engineId, + string parallelCorpusId, + CancellationToken cancellationToken = default + ); + Task DeleteAllCorpusFilesAsync(string dataFileId, CancellationToken cancellationToken = default); Task GetQueueAsync(string engineType, CancellationToken cancellationToken = default); diff --git a/src/Serval/test/Serval.ApiServer.IntegrationTests/TranslationEngineTests.cs b/src/Serval/test/Serval.ApiServer.IntegrationTests/TranslationEngineTests.cs index 2be669b2..6d0b2df2 100644 --- a/src/Serval/test/Serval.ApiServer.IntegrationTests/TranslationEngineTests.cs +++ b/src/Serval/test/Serval.ApiServer.IntegrationTests/TranslationEngineTests.cs @@ -24,6 +24,13 @@ public class TranslationEngineTests new TranslationCorpusFileConfig { FileId = FILE2_ID, TextId = "all" } } }; + private static readonly TranslationParallelCorpusConfig TestParallelCorpusConfig = + new() + { + Name = "TestCorpus", + SourceCorpusIds = [SOURCE_CORPUS_ID], + TargetCorpusIds = [TARGET_CORPUS_ID], + }; private static readonly TranslationCorpusConfig TestCorpusConfigNonEcho = new() { @@ -63,7 +70,8 @@ public class TranslationEngineTests private const string FILE3_FILENAME = "file_c"; private const string FILE4_ID = "f00000000000000000000004"; private const string FILE4_FILENAME = "file_d"; - + private const string SOURCE_CORPUS_ID = "cc0000000000000000000001"; + private const string TARGET_CORPUS_ID = "cc0000000000000000000002"; private const string DOES_NOT_EXIST_ENGINE_ID = "e00000000000000000000004"; private const string DOES_NOT_EXIST_CORPUS_ID = "c00000000000000000000001"; @@ -159,6 +167,22 @@ public async Task SetUp() Format = Shared.Contracts.FileFormat.Paratext }; await _env.DataFiles.InsertAllAsync([srcFile, trgFile, srcParatextFile, trgParatextFile]); + + var srcCorpus = new DataFiles.Models.Corpus + { + Id = SOURCE_CORPUS_ID, + Language = "en", + Owner = "client1", + Files = [new() { File = srcFile, TextId = "all" }] + }; + var trgCorpus = new DataFiles.Models.Corpus + { + Id = TARGET_CORPUS_ID, + Language = "en", + Owner = "client1", + Files = [new() { File = trgFile, TextId = "all" }] + }; + await _env.Corpora.InsertAllAsync([srcCorpus, trgCorpus]); } [Test] @@ -777,6 +801,307 @@ string engineId } } + [Test] + public async Task AddParallelCorpusToEngineByIdAsync() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient( + new[] { Scopes.UpdateTranslationEngines } + ); + TranslationParallelCorpus result = await client.AddParallelCorpusAsync( + ECHO_ENGINE1_ID, + TestParallelCorpusConfig + ); + Assert.Multiple(() => + { + Assert.That(result.SourceCorpora.First().Id, Is.EqualTo(SOURCE_CORPUS_ID)); + Assert.That(result.TargetCorpora.First().Id, Is.EqualTo(TARGET_CORPUS_ID)); + }); + Engine? engine = await _env.Engines.GetAsync(ECHO_ENGINE1_ID); + Assert.That(engine, Is.Not.Null); + Assert.Multiple(() => + { + Assert.That(engine.ParallelCorpora[0].SourceCorpora[0].Files[0].Filename, Is.EqualTo(FILE1_FILENAME)); + Assert.That(engine.ParallelCorpora[0].TargetCorpora[0].Files[0].Filename, Is.EqualTo(FILE2_FILENAME)); + }); + } + + public void AddParallelCorpusToEngineById_NoSuchEngine() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient( + new[] { Scopes.UpdateTranslationEngines } + ); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.AddParallelCorpusAsync(DOES_NOT_EXIST_ENGINE_ID, TestParallelCorpusConfig); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public void AddParallelCorpusToEngineById_NotAuthorized() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(new[] { Scopes.ReadFiles }); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.AddParallelCorpusAsync(ECHO_ENGINE1_ID, TestParallelCorpusConfig); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(403)); + } + + [Test] + public async Task UpdateParallelCorpusByIdForEngineByIdAsync() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + + TranslationParallelCorpus result = await client.AddParallelCorpusAsync( + ECHO_ENGINE1_ID, + TestParallelCorpusConfig + ); + var updateConfig = new TranslationParallelCorpusUpdateConfig + { + SourceCorpusIds = [SOURCE_CORPUS_ID], + TargetCorpusIds = [TARGET_CORPUS_ID] + }; + await client.UpdateParallelCorpusAsync(ECHO_ENGINE1_ID, result.Id, updateConfig); + Engine? engine = await _env.Engines.GetAsync(ECHO_ENGINE1_ID); + Assert.That(engine, Is.Not.Null); + Assert.Multiple(() => + { + Assert.That(engine.ParallelCorpora[0].SourceCorpora[0].Files[0].Filename, Is.EqualTo(FILE1_FILENAME)); + Assert.That(engine.ParallelCorpora[0].TargetCorpora[0].Files[0].Filename, Is.EqualTo(FILE2_FILENAME)); + }); + } + + [Test] + public void UpdateParallelCorpusByIdForEngineById_NoSuchCorpus() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + var updateConfig = new TranslationParallelCorpusUpdateConfig + { + SourceCorpusIds = [SOURCE_CORPUS_ID], + TargetCorpusIds = [TARGET_CORPUS_ID] + }; + await client.UpdateParallelCorpusAsync(ECHO_ENGINE1_ID, DOES_NOT_EXIST_CORPUS_ID, updateConfig); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public void UpdateParallelCorpusByIdForEngineById_NoSuchEngine() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + var updateConfig = new TranslationParallelCorpusUpdateConfig + { + SourceCorpusIds = [SOURCE_CORPUS_ID], + TargetCorpusIds = [TARGET_CORPUS_ID] + }; + await client.UpdateParallelCorpusAsync(DOES_NOT_EXIST_ENGINE_ID, SOURCE_CORPUS_ID, updateConfig); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public void UpdateParallelCorpusByIdForEngineById_NotAuthorized() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(new[] { Scopes.ReadFiles }); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + var updateConfig = new TranslationParallelCorpusUpdateConfig + { + SourceCorpusIds = [SOURCE_CORPUS_ID], + TargetCorpusIds = [TARGET_CORPUS_ID] + }; + await client.UpdateParallelCorpusAsync(ECHO_ENGINE1_ID, DOES_NOT_EXIST_CORPUS_ID, updateConfig); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(403)); + } + + [Test] + public async Task GetAllParallelCorporaForEngineByIdAsync() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + + TranslationParallelCorpus result = await client.AddParallelCorpusAsync( + ECHO_ENGINE1_ID, + TestParallelCorpusConfig + ); + TranslationParallelCorpus resultAfterAdd = (await client.GetAllParallelCorporaAsync(ECHO_ENGINE1_ID)).First(); + Assert.Multiple(() => + { + Assert.That(resultAfterAdd.Id, Is.EqualTo(result.Id)); + Assert.That(resultAfterAdd.SourceCorpora.First().Id, Is.EqualTo(result.SourceCorpora.First().Id)); + }); + } + + [Test] + public void GetAllParallelCorporaForEngineById_NoSuchEngine() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + TranslationParallelCorpus result = ( + await client.GetAllParallelCorporaAsync(DOES_NOT_EXIST_ENGINE_ID) + ).First(); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public void GetAllParallelCorporaForEngineById_NotAuthorized() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(new[] { Scopes.ReadFiles }); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + TranslationParallelCorpus result = (await client.GetAllParallelCorporaAsync(ECHO_ENGINE1_ID)).First(); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(403)); + } + + [Test] + public async Task GetParallelCorpusByIdForEngineByIdAsync() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + TranslationParallelCorpus result = await client.AddParallelCorpusAsync( + ECHO_ENGINE1_ID, + TestParallelCorpusConfig + ); + + Assert.That(result, Is.Not.Null); + TranslationParallelCorpus resultAfterAdd = await client.GetParallelCorpusAsync(ECHO_ENGINE1_ID, result.Id); + Assert.Multiple(() => + { + Assert.That(resultAfterAdd.Id, Is.EqualTo(result.Id)); + Assert.That(resultAfterAdd.SourceCorpora[0].Id, Is.EqualTo(result.SourceCorpora[0].Id)); + }); + } + + [Test] + public void GetParallelCorpusByIdForEngineById_NoCorpora() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + TranslationParallelCorpus result_afterAdd = await client.GetParallelCorpusAsync( + ECHO_ENGINE1_ID, + DOES_NOT_EXIST_CORPUS_ID + ); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public void GetParallelCorpusByIdForEngineById_NoSuchEngine() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + TranslationParallelCorpus result_afterAdd = await client.GetParallelCorpusAsync( + DOES_NOT_EXIST_ENGINE_ID, + SOURCE_CORPUS_ID + ); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public async Task GetParallelCorpusByIdForEngineById_NoSuchCorpus() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + TranslationParallelCorpus result = await client.AddParallelCorpusAsync( + ECHO_ENGINE1_ID, + TestParallelCorpusConfig + ); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + TranslationParallelCorpus result_afterAdd = await client.GetParallelCorpusAsync( + ECHO_ENGINE1_ID, + DOES_NOT_EXIST_CORPUS_ID + ); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public void GetParallelCorpusByIdForEngineById_NotAuthorized() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(new[] { Scopes.ReadFiles }); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + TranslationParallelCorpus result_afterAdd = await client.GetParallelCorpusAsync( + ECHO_ENGINE1_ID, + DOES_NOT_EXIST_CORPUS_ID + ); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(403)); + } + + [Test] + public async Task DeleteParallelCorpusByIdForEngineByIdAsync() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + + TranslationParallelCorpus result = await client.AddParallelCorpusAsync( + ECHO_ENGINE1_ID, + TestParallelCorpusConfig + ); + await client.DeleteParallelCorpusAsync(ECHO_ENGINE1_ID, result.Id); + ICollection resultsAfterDelete = await client.GetAllParallelCorporaAsync( + ECHO_ENGINE1_ID + ); + Assert.That(resultsAfterDelete, Has.Count.EqualTo(0)); + } + + [Test] + public void DeleteParallelCorpusByIdForEngineById_NoSuchCorpus() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.DeleteParallelCorpusAsync(ECHO_ENGINE1_ID, DOES_NOT_EXIST_CORPUS_ID); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public void DeleteParallelCorpusByIdForEngineById_NoSuchEngine() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.DeleteParallelCorpusAsync(DOES_NOT_EXIST_ENGINE_ID, SOURCE_CORPUS_ID); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public void DeleteParallelCorpusByIdForEngineById_NotAuthorized() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(new[] { Scopes.ReadFiles }); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.DeleteParallelCorpusAsync(ECHO_ENGINE1_ID, SOURCE_CORPUS_ID); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(403)); + } + [Test] public async Task DeleteCorpusAndFilesAsync() { @@ -1241,6 +1566,174 @@ public async Task CancelCurrentBuildForEngineByIdAsync( } } + [Test] + public async Task StartBuild_ParallelCorpus() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + TranslationParallelCorpus addedCorpus = await client.AddParallelCorpusAsync( + NMT_ENGINE1_ID, + TestParallelCorpusConfig + ); + PretranslateCorpusConfig ptcc = + new() + { + ParallelCorpusId = addedCorpus.Id, + SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID, TextIds = ["all"] }] + }; + TrainingCorpusConfig tcc = + new() + { + ParallelCorpusId = addedCorpus.Id, + SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID, TextIds = ["all"] }], + TargetFilters = [new() { CorpusId = TARGET_CORPUS_ID, TextIds = ["all"] }] + }; + ; + TranslationBuildConfig tbc = new TranslationBuildConfig + { + Pretranslate = [ptcc], + TrainOn = [tcc], + Options = """ + {"max_steps":10, + "use_key_terms":false, + "some_double":10.5, + "some_nested": {"more_nested": {"other_double":10.5}}, + "some_string":"string"} + """ + }; + TranslationBuild resultAfterStart; + Assert.ThrowsAsync(async () => + { + resultAfterStart = await client.GetCurrentBuildAsync(NMT_ENGINE1_ID); + }); + + TranslationBuild build = await client.StartBuildAsync(NMT_ENGINE1_ID, tbc); + Assert.That(build, Is.Not.Null); + + build = await client.GetCurrentBuildAsync(NMT_ENGINE1_ID); + Assert.That(build, Is.Not.Null); + } + + [Test] + public async Task StartBuildAsync_ParallelCorpus() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + TranslationParallelCorpus addedCorpus = await client.AddParallelCorpusAsync( + NMT_ENGINE1_ID, + TestParallelCorpusConfig + ); + PretranslateCorpusConfig ptcc = + new() + { + ParallelCorpusId = addedCorpus.Id, + SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID, TextIds = ["all"] }] + }; + TrainingCorpusConfig tcc = + new() + { + ParallelCorpusId = addedCorpus.Id, + SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID, TextIds = ["all"] }], + TargetFilters = [new() { CorpusId = TARGET_CORPUS_ID, TextIds = ["all"] }] + }; + ; + TranslationBuildConfig tbc = new TranslationBuildConfig + { + Pretranslate = [ptcc], + TrainOn = [tcc], + Options = """ + {"max_steps":10, + "use_key_terms":false, + "some_double":10.5, + "some_nested": {"more_nested": {"other_double":10.5}}, + "some_string":"string"} + """ + }; + TranslationBuild resultAfterStart; + Assert.ThrowsAsync(async () => + { + resultAfterStart = await client.GetCurrentBuildAsync(NMT_ENGINE1_ID); + }); + + TranslationBuild build = await client.StartBuildAsync(NMT_ENGINE1_ID, tbc); + Assert.That(build, Is.Not.Null); + + build = await client.GetCurrentBuildAsync(NMT_ENGINE1_ID); + Assert.That(build, Is.Not.Null); + } + + [Test] + public async Task StartBuildAsync_ParallelCorpus_PretranslateParallelAndNormalCorpus() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + TranslationCorpus addedCorpus = await client.AddCorpusAsync(NMT_ENGINE1_ID, TestCorpusConfig); + TranslationParallelCorpus addedParallelCorpus = await client.AddParallelCorpusAsync( + NMT_ENGINE1_ID, + TestParallelCorpusConfig + ); + PretranslateCorpusConfig ptcc = new() { CorpusId = addedCorpus.Id, ParallelCorpusId = addedParallelCorpus.Id }; + TrainingCorpusConfig tcc = new() { ParallelCorpusId = addedParallelCorpus.Id }; + TranslationBuildConfig tbc = new TranslationBuildConfig { Pretranslate = [ptcc], TrainOn = [tcc] }; + TranslationBuild resultAfterStart; + Assert.ThrowsAsync(async () => + { + resultAfterStart = await client.StartBuildAsync(NMT_ENGINE1_ID, tbc); + }); + } + + [Test] + public async Task StartBuildAsync_ParallelCorpus_TrainOnParallelAndNormalCorpus() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + TranslationCorpus addedCorpus = await client.AddCorpusAsync(NMT_ENGINE1_ID, TestCorpusConfig); + TranslationParallelCorpus addedParallelCorpus = await client.AddParallelCorpusAsync( + NMT_ENGINE1_ID, + TestParallelCorpusConfig + ); + PretranslateCorpusConfig ptcc = new() { ParallelCorpusId = addedParallelCorpus.Id }; + TrainingCorpusConfig tcc = new() { CorpusId = addedCorpus.Id, ParallelCorpusId = addedParallelCorpus.Id }; + TranslationBuildConfig tbc = new TranslationBuildConfig { Pretranslate = [ptcc], TrainOn = [tcc] }; + TranslationBuild resultAfterStart; + Assert.ThrowsAsync(async () => + { + resultAfterStart = await client.StartBuildAsync(NMT_ENGINE1_ID, tbc); + }); + } + + [Test] + public async Task StartBuildAsync_ParallelCorpus_PretranslateNoCorpusSpecified() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + TranslationParallelCorpus addedParallelCorpus = await client.AddParallelCorpusAsync( + NMT_ENGINE1_ID, + TestParallelCorpusConfig + ); + PretranslateCorpusConfig ptcc = new() { }; + TrainingCorpusConfig tcc = new() { ParallelCorpusId = addedParallelCorpus.Id }; + TranslationBuildConfig tbc = new TranslationBuildConfig { Pretranslate = [ptcc], TrainOn = [tcc] }; + TranslationBuild resultAfterStart; + Assert.ThrowsAsync(async () => + { + resultAfterStart = await client.StartBuildAsync(NMT_ENGINE1_ID, tbc); + }); + } + + [Test] + public async Task StartBuildAsync_ParallelCorpus_TrainOnNoCorpusSpecified() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + TranslationParallelCorpus addedParallelCorpus = await client.AddParallelCorpusAsync( + NMT_ENGINE1_ID, + TestParallelCorpusConfig + ); + PretranslateCorpusConfig ptcc = new() { ParallelCorpusId = addedParallelCorpus.Id }; + TrainingCorpusConfig tcc = new() { }; + TranslationBuildConfig tbc = new TranslationBuildConfig { Pretranslate = [ptcc], TrainOn = [tcc] }; + TranslationBuild resultAfterStart; + Assert.ThrowsAsync(async () => + { + resultAfterStart = await client.StartBuildAsync(NMT_ENGINE1_ID, tbc); + }); + } + [Test] public async Task TryToQueueMultipleBuildsPerSingleUser() { @@ -1294,6 +1787,44 @@ public async Task GetPretranslatedUsfmAsync_BookExists() ); } + [Test] + public async Task GetPretranslationsByTextId() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + TranslationCorpus addedCorpus = await client.AddCorpusAsync(ECHO_ENGINE1_ID, TestCorpusConfigScripture); + + await _env.Engines.UpdateAsync(ECHO_ENGINE1_ID, u => u.Set(e => e.ModelRevision, 1)); + var pret = new Translation.Models.Pretranslation + { + CorpusRef = addedCorpus.Id, + TextId = "MAT", + EngineRef = ECHO_ENGINE1_ID, + Refs = ["MAT 1:1"], + Translation = "translation", + ModelRevision = 1 + }; + await _env.Pretranslations.InsertAsync(pret); + + IList pretranslations = await client.GetPretranslationsByTextIdAsync( + ECHO_ENGINE1_ID, + addedCorpus.Id, + "MAT" + ); + Assert.That(pretranslations, Has.Count.EqualTo(1)); + Assert.That(pretranslations[0].Translation, Is.EqualTo("translation")); + } + + [Test] + public void GetPretranslationsByTextId_EngineDoesNotExist() + { + TranslationEnginesClient client = _env.CreateTranslationEnginesClient(); + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.GetPretranslationsByTextIdAsync(DOES_NOT_EXIST_ENGINE_ID, DOES_NOT_EXIST_CORPUS_ID, "MAT"); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + [Test] public async Task GetPretranslatedUsfmAsync_BookDoesNotExist() { @@ -1375,6 +1906,7 @@ public TestEnvironment() _scope = Factory.Services.CreateScope(); Engines = _scope.ServiceProvider.GetRequiredService>(); DataFiles = _scope.ServiceProvider.GetRequiredService>(); + Corpora = _scope.ServiceProvider.GetRequiredService>(); Pretranslations = _scope.ServiceProvider.GetRequiredService< IRepository >(); @@ -1549,6 +2081,7 @@ public TestEnvironment() public ServalWebApplicationFactory Factory { get; } public IRepository Engines { get; } public IRepository DataFiles { get; } + public IRepository Corpora { get; } public IRepository Pretranslations { get; } public IRepository Builds { get; } public TranslationEngineApi.TranslationEngineApiClient EchoClient { get; } diff --git a/src/Serval/test/Serval.DataFiles.Tests/Services/CorpusServiceTests.cs b/src/Serval/test/Serval.DataFiles.Tests/Services/CorpusServiceTests.cs new file mode 100644 index 00000000..22cdd14e --- /dev/null +++ b/src/Serval/test/Serval.DataFiles.Tests/Services/CorpusServiceTests.cs @@ -0,0 +1,57 @@ +namespace Serval.DataFiles.Services; + +[TestFixture] +public class CorpusServiceTests +{ + private const string CorpusId = "c00000000000000000000001"; + + private static readonly DataFile DefaultDataFile = + new() + { + Id = "df0000000000000000000001", + Owner = "owner1", + Name = "file1", + Filename = "file1.txt", + Format = FileFormat.Text + }; + private static readonly Corpus DefaultCorpus = + new() + { + Id = CorpusId, + Owner = "owner1", + Name = "corpus1", + Language = "en", + Files = new List() { new() { File = DefaultDataFile } } + }; + + [Test] + public async Task CreateAsync() + { + var env = new TestEnvironment(); + Corpus corpus = await env.Service.CreateAsync(DefaultCorpus); + Assert.That(corpus.Name, Is.EqualTo((await env.Service.GetAsync(CorpusId)).Name)); + } + + [Test] + public async Task UpdateAsync() + { + var env = new TestEnvironment(); + await env.Service.CreateAsync(DefaultCorpus); + await env.Service.UpdateAsync(CorpusId, new List()); + Corpus corpus = await env.Service.GetAsync(CorpusId); + Assert.That(corpus.Files, Has.Count.EqualTo(0)); + } + + private class TestEnvironment + { + public TestEnvironment() + { + Corpora = new MemoryRepository(); + Service = new CorpusService(Corpora); + } + + public MemoryRepository Corpora { get; } + + public CorpusService Service { get; } + } +} diff --git a/src/Serval/test/Serval.Translation.Tests/Services/EngineServiceTests.cs b/src/Serval/test/Serval.Translation.Tests/Services/EngineServiceTests.cs index 49e114cb..59d24d0c 100644 --- a/src/Serval/test/Serval.Translation.Tests/Services/EngineServiceTests.cs +++ b/src/Serval/test/Serval.Translation.Tests/Services/EngineServiceTests.cs @@ -118,28 +118,45 @@ public async Task StartBuildAsync_TrainOnNotSpecified() EngineType = "Smt", Corpora = { - new V1.Corpus + new V1.ParallelCorpus { Id = "corpus1", - SourceLanguage = "es", - TargetLanguage = "en", - TrainOnAll = true, - SourceFiles = + SourceCorpora = { - new V1.CorpusFile + new List { - Location = "file1.txt", - Format = FileFormat.Text, - TextId = "text1" + new() + { + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file1.txt", + Format = FileFormat.Text, + TextId = "text1" + } + } + } } }, - TargetFiles = + TargetCorpora = { - new V1.CorpusFile + new List { - Location = "file2.txt", - Format = FileFormat.Text, - TextId = "text1" + new() + { + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file2.txt", + Format = FileFormat.Text, + TextId = "text1" + } + } + } } } } @@ -170,29 +187,47 @@ await env.Service.StartBuildAsync( EngineType = "Smt", Corpora = { - new V1.Corpus + new V1.ParallelCorpus { Id = "corpus1", - SourceLanguage = "es", - TargetLanguage = "en", - TrainOnAll = false, - TrainOnTextIds = { }, - SourceFiles = + SourceCorpora = { - new V1.CorpusFile + new List { - Location = "file1.txt", - Format = FileFormat.Text, - TextId = "text1" + new() + { + Language = "es", + TrainOnTextIds = { }, + Files = + { + new V1.CorpusFile + { + Location = "file1.txt", + Format = FileFormat.Text, + TextId = "text1" + } + } + } } }, - TargetFiles = + TargetCorpora = { - new V1.CorpusFile + new List { - Location = "file2.txt", - Format = FileFormat.Text, - TextId = "text1" + new() + { + Language = "en", + TrainOnTextIds = { }, + Files = + { + new V1.CorpusFile + { + Location = "file2.txt", + Format = FileFormat.Text, + TextId = "text1" + } + } + } } } } @@ -223,29 +258,47 @@ await env.Service.StartBuildAsync( EngineType = "Smt", Corpora = { - new V1.Corpus + new V1.ParallelCorpus { Id = "corpus1", - SourceLanguage = "es", - TargetLanguage = "en", - TrainOnAll = false, - TrainOnTextIds = { "text1" }, - SourceFiles = + SourceCorpora = { - new V1.CorpusFile + new List { - Location = "file1.txt", - Format = FileFormat.Text, - TextId = "text1" + new() + { + Language = "es", + TrainOnTextIds = { "text1" }, + Files = + { + new V1.CorpusFile + { + Location = "file1.txt", + Format = FileFormat.Text, + TextId = "text1" + } + } + } } }, - TargetFiles = + TargetCorpora = { - new V1.CorpusFile + new List { - Location = "file2.txt", - Format = FileFormat.Text, - TextId = "text1" + new() + { + Language = "en", + TrainOnTextIds = { "text1" }, + Files = + { + new V1.CorpusFile + { + Location = "file2.txt", + Format = FileFormat.Text, + TextId = "text1" + } + } + } } } } @@ -276,28 +329,45 @@ await env.Service.StartBuildAsync( EngineType = "Smt", Corpora = { - new V1.Corpus + new V1.ParallelCorpus { Id = "corpus1", - SourceLanguage = "es", - TargetLanguage = "en", - TrainOnAll = true, - SourceFiles = + SourceCorpora = { - new V1.CorpusFile + new List { - Location = "file1.txt", - Format = FileFormat.Text, - TextId = "text1" + new() + { + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file1.txt", + Format = FileFormat.Text, + TextId = "text1" + } + } + } } }, - TargetFiles = + TargetCorpora = { - new V1.CorpusFile + new List { - Location = "file2.txt", - Format = FileFormat.Text, - TextId = "text1" + new() + { + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file2.txt", + Format = FileFormat.Text, + TextId = "text1" + } + } + } } } } @@ -346,39 +416,763 @@ await env.Service.StartBuildAsync( EngineType = "Smt", Corpora = { - new V1.Corpus + new V1.ParallelCorpus { Id = "corpus1", - SourceLanguage = "es", - TargetLanguage = "en", - TrainOnAll = false, - TrainOnChapters = + SourceCorpora = { + new List { - "MAT", - new ScriptureChapters { Chapters = { 1 } } + new() + { + Language = "es", + TrainOnChapters = + { + { + "MAT", + new ScriptureChapters { Chapters = { 1 } } + }, + { + "MRK", + new ScriptureChapters { Chapters = { } } + } + }, + Files = + { + new V1.CorpusFile + { + Location = "file1.zip", + Format = FileFormat.Paratext, + TextId = "file1.zip" + } + } + } + } + }, + TargetCorpora = + { + new List + { + new() + { + Language = "en", + TrainOnChapters = + { + { + "MAT", + new ScriptureChapters { Chapters = { 1 } } + }, + { + "MRK", + new ScriptureChapters { Chapters = { } } + } + }, + Files = + { + new V1.CorpusFile + { + Location = "file2.zip", + Format = FileFormat.Paratext, + TextId = "file2.zip" + } + } + } + } + } + } + } + } + ); + } + + [Test] + public async Task StartBuildAsync_ScriptureRangeEmptyString() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateEngineWithParatextProjectAsync()).Id; + await env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = [new TrainingCorpus { CorpusRef = "corpus1", ScriptureRange = "" }] + } + ); + _ = env.TranslationServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Smt", + Corpora = + { + new V1.ParallelCorpus + { + Id = "corpus1", + SourceCorpora = + { + new List + { + new() + { + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file1.zip", + Format = FileFormat.Paratext, + TextId = "file1.zip" + } + } + } + } + }, + TargetCorpora = + { + new List + { + new() + { + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file2.zip", + Format = FileFormat.Paratext, + TextId = "file2.zip" + } + } + } + } + } + } + } + } + ); + } + + [Test] + public async Task StartBuildAsync_ParallelCorpus_TextFiles() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateParallelCorpusEngineWithTextFilesAsync()).Id; + await env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = + [ + new TrainingCorpus + { + ParallelCorpusRef = "parallel-corpus1", + SourceFilters = new List() + { + new() + { + CorpusRef = "parallel-corpus1-source1", + TextIds = new List { "MAT" } + } + }, + TargetFilters = new List() + { + new() + { + CorpusRef = "parallel-corpus1-target1", + TextIds = new List { "MAT" } + } + } + } + ] + } + ); + _ = env.TranslationServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Smt", + Corpora = + { + new V1.ParallelCorpus + { + Id = "parallel-corpus1", + SourceCorpora = + { + new List + { + new() + { + Id = "parallel-corpus1-source1", + Language = "es", + TrainOnTextIds = { "MAT" }, + Files = + { + new V1.CorpusFile + { + Location = "file1.txt", + Format = FileFormat.Text, + TextId = "MAT" + } + } + }, + new() + { + Id = "parallel-corpus1-source2", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file3.txt", + Format = FileFormat.Text, + TextId = "MRK" + } + } + } + } + }, + TargetCorpora = + { + new List + { + new() + { + Id = "parallel-corpus1-target1", + Language = "en", + TrainOnTextIds = { "MAT" }, + Files = + { + new V1.CorpusFile + { + Location = "file2.txt", + Format = FileFormat.Text, + TextId = "MAT" + } + } + }, + new() + { + Id = "parallel-corpus1-target2", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file4.txt", + Format = FileFormat.Text, + TextId = "MRK" + } + } + } + } + } + } + } + } + ); + } + + [Test] + public async Task StartBuildAsync_TextIds_ParallelCorpus() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateParallelCorpusEngineWithParatextProjectAsync()).Id; + await env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = + [ + new TrainingCorpus + { + ParallelCorpusRef = "parallel-corpus1", + SourceFilters = new List() + { + new() + { + CorpusRef = "parallel-corpus1-source1", + TextIds = new List() { "MAT", "MRK" } + } + }, + TargetFilters = new List() + { + new() + { + CorpusRef = "parallel-corpus1-target1", + TextIds = new List() { "MAT", "MRK" } + } + } + } + ] + } + ); + _ = env.TranslationServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Smt", + Corpora = + { + new V1.ParallelCorpus + { + Id = "parallel-corpus1", + SourceCorpora = + { + new List() + { + new() + { + Id = "parallel-corpus1-source1", + Language = "es", + TrainOnTextIds = { "MAT", "MRK" }, + Files = + { + new V1.CorpusFile + { + Location = "file1.zip", + Format = FileFormat.Paratext, + TextId = "file1.zip" + } + } + }, + new() + { + Id = "parallel-corpus1-source2", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file3.zip", + Format = FileFormat.Paratext, + TextId = "file3.zip" + } + } + } + } + }, + TargetCorpora = + { + new List() + { + new() + { + Id = "parallel-corpus1-target1", + Language = "en", + TrainOnTextIds = { "MAT", "MRK" }, + Files = + { + new V1.CorpusFile + { + Location = "file2.zip", + Format = FileFormat.Paratext, + TextId = "file2.zip" + } + } + }, + new() + { + Id = "parallel-corpus1-target2", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file4.zip", + Format = FileFormat.Paratext, + TextId = "file4.zip" + } + } + } + } + } + } + } + } + ); + } + + [Test] + public async Task StartBuildAsync_ScriptureRange_ParallelCorpus() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateParallelCorpusEngineWithParatextProjectAsync()).Id; + await env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = + [ + new TrainingCorpus + { + ParallelCorpusRef = "parallel-corpus1", + SourceFilters = new List() + { + new() { CorpusRef = "parallel-corpus1-source1", ScriptureRange = "MAT 1;MRK" } + }, + TargetFilters = new List() + { + new() { CorpusRef = "parallel-corpus1-target1", ScriptureRange = "MAT 1;MRK" } + } + } + ] + } + ); + _ = env.TranslationServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Smt", + Corpora = + { + new V1.ParallelCorpus + { + Id = "parallel-corpus1", + SourceCorpora = + { + new List() + { + new() + { + Id = "parallel-corpus1-source1", + Language = "es", + TrainOnChapters = + { + { + "MAT", + new ScriptureChapters { Chapters = { 1 } } + }, + { + "MRK", + new ScriptureChapters { Chapters = { } } + } + }, + Files = + { + new V1.CorpusFile + { + Location = "file1.zip", + Format = FileFormat.Paratext, + TextId = "file1.zip" + } + } + }, + new() + { + Id = "parallel-corpus1-source2", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file3.zip", + Format = FileFormat.Paratext, + TextId = "file3.zip" + } + } + } + } + }, + TargetCorpora = + { + new List() + { + new() + { + Id = "parallel-corpus1-target1", + Language = "en", + TrainOnChapters = + { + { + "MAT", + new ScriptureChapters { Chapters = { 1 } } + }, + { + "MRK", + new ScriptureChapters { } + } + }, + Files = + { + new V1.CorpusFile + { + Location = "file2.zip", + Format = FileFormat.Paratext, + TextId = "file2.zip" + } + } + }, + new() + { + Id = "parallel-corpus1-target2", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file4.zip", + Format = FileFormat.Paratext, + TextId = "file4.zip" + } + } + } + } + } + } + } + } + ); + } + + [Test] + public async Task StartBuildAsync_MixedSourceAndTarget_ParallelCorpus() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateParallelCorpusEngineWithParatextProjectAsync()).Id; + await env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = + [ + new TrainingCorpus + { + ParallelCorpusRef = "parallel-corpus1", + SourceFilters = new List() + { + new() { CorpusRef = "parallel-corpus1-source1", ScriptureRange = "MAT 1-2;MRK 1-2" }, + new() { CorpusRef = "parallel-corpus1-source2", ScriptureRange = "MAT 3;MRK 1" } + }, + TargetFilters = new List() + { + new() { CorpusRef = "parallel-corpus1-target1", ScriptureRange = "MAT 2-3;MRK 2" }, + new() { CorpusRef = "parallel-corpus1-target2", ScriptureRange = "MAT 1;MRK 1-2" } + } + } + ] + } + ); + _ = env.TranslationServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Smt", + Corpora = + { + new V1.ParallelCorpus + { + Id = "parallel-corpus1", + SourceCorpora = + { + new V1.MonolingualCorpus() + { + Id = "parallel-corpus1-source1", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file1.zip", + Format = FileFormat.Paratext, + TextId = "file1.zip" + } + }, + TrainOnChapters = + { + { + "MAT", + new ScriptureChapters { Chapters = { 1, 2 } } + }, + { + "MRK", + new ScriptureChapters { Chapters = { 1, 2 } } + } + } }, + new V1.MonolingualCorpus() { - "MRK", - new ScriptureChapters { Chapters = { } } + Id = "parallel-corpus1-source2", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file3.zip", + Format = FileFormat.Paratext, + TextId = "file3.zip" + } + }, + TrainOnChapters = + { + { + "MAT", + new ScriptureChapters { Chapters = { 3 } } + }, + { + "MRK", + new ScriptureChapters { Chapters = { 1 } } + } + } } }, - SourceFiles = + TargetCorpora = + { + new V1.MonolingualCorpus() + { + Id = "parallel-corpus1-target1", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file2.zip", + Format = FileFormat.Paratext, + TextId = "file2.zip" + } + }, + TrainOnChapters = + { + { + "MAT", + new ScriptureChapters { Chapters = { 2, 3 } } + }, + { + "MRK", + new ScriptureChapters { Chapters = { 2 } } + } + } + }, + new V1.MonolingualCorpus() + { + Id = "parallel-corpus1-target2", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file4.zip", + Format = FileFormat.Paratext, + TextId = "file4.zip" + } + }, + TrainOnChapters = + { + { + "MAT", + new ScriptureChapters { Chapters = { 1 } } + }, + { + "MRK", + new ScriptureChapters { Chapters = { 1, 2 } } + } + } + } + } + } + } + } + ); + } + + [Test] + public async Task StartBuildAsync_NoFilters_ParallelCorpus() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateParallelCorpusEngineWithParatextProjectAsync()).Id; + await env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = [new TrainingCorpus { ParallelCorpusRef = "parallel-corpus1" }] + } + ); + _ = env.TranslationServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Smt", + Corpora = + { + new V1.ParallelCorpus + { + Id = "parallel-corpus1", + SourceCorpora = { - new V1.CorpusFile + new V1.MonolingualCorpus() { - Location = "file1.zip", - Format = FileFormat.Paratext, - TextId = "file1.zip" + Id = "parallel-corpus1-source1", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file1.zip", + Format = FileFormat.Paratext, + TextId = "file1.zip" + } + } + }, + new V1.MonolingualCorpus() + { + Id = "parallel-corpus1-source2", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file3.zip", + Format = FileFormat.Paratext, + TextId = "file3.zip" + } + } } }, - TargetFiles = + TargetCorpora = { - new V1.CorpusFile + new V1.MonolingualCorpus() { - Location = "file2.zip", - Format = FileFormat.Paratext, - TextId = "file2.zip" + Id = "parallel-corpus1-target1", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file2.zip", + Format = FileFormat.Paratext, + TextId = "file2.zip" + } + } + }, + new V1.MonolingualCorpus() + { + Id = "parallel-corpus1-target2", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file4.zip", + Format = FileFormat.Paratext, + TextId = "file4.zip" + } + } } } } @@ -388,16 +1182,112 @@ await env.Service.StartBuildAsync( } [Test] - public async Task StartBuildAsync_ScriptureRangeEmptyString() + public async Task StartBuildAsync_TrainOnNotSpecified_ParallelCorpus() { var env = new TestEnvironment(); - string engineId = (await env.CreateEngineWithParatextProjectAsync()).Id; + string engineId = (await env.CreateParallelCorpusEngineWithParatextProjectAsync()).Id; + await env.Service.StartBuildAsync(new Build { Id = BUILD1_ID, EngineRef = engineId }); + _ = env.TranslationServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Smt", + Corpora = + { + new V1.ParallelCorpus + { + Id = "parallel-corpus1", + SourceCorpora = + { + new V1.MonolingualCorpus() + { + Id = "parallel-corpus1-source1", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file1.zip", + Format = FileFormat.Paratext, + TextId = "file1.zip" + } + } + }, + new V1.MonolingualCorpus() + { + Id = "parallel-corpus1-source2", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file3.zip", + Format = FileFormat.Paratext, + TextId = "file3.zip" + } + } + } + }, + TargetCorpora = + { + new V1.MonolingualCorpus() + { + Id = "parallel-corpus1-target1", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file2.zip", + Format = FileFormat.Paratext, + TextId = "file2.zip" + } + } + }, + new V1.MonolingualCorpus() + { + Id = "parallel-corpus1-target2", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file4.zip", + Format = FileFormat.Paratext, + TextId = "file4.zip" + } + } + } + } + } + } + } + ); + } + + [Test] + public async Task StartBuildAsync_NoTargetFilter_ParallelCorpus() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateParallelCorpusEngineWithParatextProjectAsync()).Id; await env.Service.StartBuildAsync( new Build { Id = BUILD1_ID, EngineRef = engineId, - TrainOn = [new TrainingCorpus { CorpusRef = "corpus1", ScriptureRange = "" }] + TrainOn = + [ + new TrainingCorpus + { + ParallelCorpusRef = "parallel-corpus1", + SourceFilters = new List() + { + new() { CorpusRef = "parallel-corpus1-source1", ScriptureRange = "MAT 1;MRK" } + } + } + ] } ); _ = env.TranslationServiceClient.Received() @@ -409,28 +1299,80 @@ await env.Service.StartBuildAsync( EngineType = "Smt", Corpora = { - new V1.Corpus + new V1.ParallelCorpus { - Id = "corpus1", - SourceLanguage = "es", - TargetLanguage = "en", - TrainOnAll = false, - SourceFiles = + Id = "parallel-corpus1", + SourceCorpora = { - new V1.CorpusFile + new V1.MonolingualCorpus() { - Location = "file1.zip", - Format = FileFormat.Paratext, - TextId = "file1.zip" + Id = "parallel-corpus1-source1", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file1.zip", + Format = FileFormat.Paratext, + TextId = "file1.zip" + } + }, + TrainOnChapters = + { + { + "MAT", + new ScriptureChapters { Chapters = { 1 } } + }, + { + "MRK", + new ScriptureChapters { } + } + } + }, + new V1.MonolingualCorpus() + { + Id = "parallel-corpus1-source2", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file3.zip", + Format = FileFormat.Paratext, + TextId = "file3.zip" + } + } } }, - TargetFiles = + TargetCorpora = { - new V1.CorpusFile + new V1.MonolingualCorpus() + { + Id = "parallel-corpus1-target1", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file2.zip", + Format = FileFormat.Paratext, + TextId = "file2.zip" + } + } + }, + new V1.MonolingualCorpus() { - Location = "file2.zip", - Format = FileFormat.Paratext, - TextId = "file2.zip" + Id = "parallel-corpus1-target2", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file4.zip", + Format = FileFormat.Paratext, + TextId = "file4.zip" + } + } } } } @@ -727,6 +1669,188 @@ public async Task CreateEngineWithParatextProjectAsync() return engine; } + public async Task CreateParallelCorpusEngineWithTextFilesAsync() + { + var engine = new Engine + { + Id = "engine1", + Owner = "owner1", + SourceLanguage = "es", + TargetLanguage = "en", + Type = "Smt", + ParallelCorpora = new Models.ParallelCorpus[] + { + new() + { + Id = "parallel-corpus1", + SourceCorpora = new List() + { + new() + { + Id = "parallel-corpus1-source1", + Name = "", + Language = "es", + Files = + [ + new() + { + Id = "file1", + Filename = "file1.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "MAT" + } + ] + }, + new() + { + Id = "parallel-corpus1-source2", + Name = "", + Language = "es", + Files = + [ + new() + { + Id = "file3", + Filename = "file3.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "MRK" + } + ] + } + }, + TargetCorpora = new List() + { + new() + { + Id = "parallel-corpus1-target1", + Name = "", + Language = "en", + Files = + [ + new() + { + Id = "file2", + Filename = "file2.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "MAT" + } + ] + }, + new() + { + Id = "parallel-corpus1-target2", + Name = "", + Language = "en", + Files = + [ + new() + { + Id = "file4", + Filename = "file4.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "MRK" + } + ] + } + } + } + } + }; + await Engines.InsertAsync(engine); + return engine; + } + + public async Task CreateParallelCorpusEngineWithParatextProjectAsync() + { + var engine = new Engine + { + Id = "engine1", + Owner = "owner1", + SourceLanguage = "es", + TargetLanguage = "en", + Type = "Smt", + ParallelCorpora = new Models.ParallelCorpus[] + { + new() + { + Id = "parallel-corpus1", + SourceCorpora = new List() + { + new() + { + Id = "parallel-corpus1-source1", + Name = "", + Language = "es", + Files = + [ + new() + { + Id = "file1", + Filename = "file1.zip", + Format = Shared.Contracts.FileFormat.Paratext, + TextId = "file1.zip" + } + ] + }, + new() + { + Id = "parallel-corpus1-source2", + Name = "", + Language = "es", + Files = + [ + new() + { + Id = "file3", + Filename = "file3.zip", + Format = Shared.Contracts.FileFormat.Paratext, + TextId = "file3.zip" + } + ] + } + }, + TargetCorpora = new List() + { + new() + { + Id = "parallel-corpus1-target1", + Name = "", + Language = "en", + Files = + [ + new() + { + Id = "file2", + Filename = "file2.zip", + Format = Shared.Contracts.FileFormat.Paratext, + TextId = "file2.zip" + } + ] + }, + new() + { + Id = "parallel-corpus1-target2", + Name = "", + Language = "en", + Files = + [ + new() + { + Id = "file4", + Filename = "file4.zip", + Format = Shared.Contracts.FileFormat.Paratext, + TextId = "file4.zip" + } + ] + } + } + } + } + }; + await Engines.InsertAsync(engine); + return engine; + } + private static TranslationSources[] GetSources(int count, bool isUnknown) { var sources = new TranslationSources[count];