Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[API] LT-21005: Sort References Properly #225

Draft
wants to merge 1 commit into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions LCM.sln.DotSettings
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
<s:String x:Key="/Default/PatternsAndTemplates/Todo/TodoPatterns/=C6562928DAAA5C419C0A4E5109498163/Pattern/@EntryValue">(?&lt;=\W|^)(?&lt;TAG&gt;REVIEW)(\W|$)(.*)</s:String>
<s:String x:Key="/Default/PatternsAndTemplates/Todo/TodoPatterns/=C6562928DAAA5C419C0A4E5109498163/TodoIconStyle/@EntryValue">Normal</s:String>
<s:Boolean x:Key="/Default/UserDictionary/Words/=analyses/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=bldr/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Charis/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Duolos/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=flid/@EntryIndexedValue">True</s:Boolean>
Expand Down
10 changes: 9 additions & 1 deletion src/SIL.LCModel.Core/Text/TsStringUtils.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2004-2020 SIL International
// Copyright (c) 2004-2022 SIL International
// This software is licensed under the LGPL, version 2.1 or later
// (http://www.gnu.org/licenses/lgpl-2.1.html)

Expand Down Expand Up @@ -1856,6 +1856,14 @@ public static bool IsNullOrEmpty(ITsString testMe)
{
return testMe == null || testMe.Length <= 0;
}

/// <returns>
/// True if the string is null, empty, or a placeholder (e.g. ***)
/// </returns>
public static bool IsNullOrPlaceholder(ITsString testMe, string placeholder)
{
return IsNullOrEmpty(testMe) || testMe.Text.Equals(placeholder);
}
}
#endregion

Expand Down
82 changes: 75 additions & 7 deletions src/SIL.LCModel/DomainImpl/ScrTxtPara.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
// Copyright (c) 2003-2018 SIL International
// Copyright (c) 2003-2022 SIL International
// This software is licensed under the LGPL, version 2.1 or later
// (http://www.gnu.org/licenses/lgpl-2.1.html)

using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using Icu;
using SIL.LCModel.Core.KernelInterfaces;
using SIL.LCModel.Core.Scripture;
Expand Down Expand Up @@ -2117,25 +2120,90 @@ public override ITsString Reference(ISegment seg, int ich)
{
var stText = Owner as IStText;
if (stText == null)
return Cache.MakeUserTss("unknown"); // should never happen, I think?
return Cache.MakeUserTss(Strings.ksStars); // should never happen, I think?
if (stText.OwningFlid == ScrSectionTags.kflidContent)
{
// Body of Scripture. Figure a book/chapter/verse
IScrBook book = (IScrBook) stText.Owner.Owner;
string mainRef = ScriptureServices.FullScrRef(this, ich, book.BestUIAbbrev).Trim();
return Cache.MakeUserTss(mainRef + ScriptureServices.VerseSegLabel(this, SegmentsOS.IndexOf(seg)));
}
if (stText.OwningFlid == ScrSectionTags.kflidHeading)
//if (stText.OwningFlid == ScrSectionTags.kflidHeading)
//{
// // use the section title without qualifiers.
// return stText.Title.BestVernacularAnalysisAlternative;
//}
if (stText.OwningFlid == ScrBookTags.kflidTitle)
{
// use the section title without qualifiers.
return stText.Title.BestVernacularAnalysisAlternative;
}
if (stText.OwningFlid == ScrBookTags.kflidTitle)
return Cache.MakeUserTss(Strings.ksStars); // should never happen, I think?
}

/// <inheritdoc/>
public override ITsString ReferenceForSorting(ISegment seg, int ich)
{
if (!(Owner is IStText stText))
{
return stText.Title.BestVernacularAnalysisAlternative;
return Scripture.Name.NotFoundTss;
}

// Use a prefix to make scripture references sort together when mixed with other references
// (Scripture is sorted canonically, but a comparer for mixed references would sort alphabetically)
var bldr = new StringBuilder(RefForSortingPrefix);
switch (stText.OwningFlid)
{
case ScrSectionTags.kflidContent:
RefForSortAddBookInfo(bldr, (IScrBook) stText.Owner.Owner);

// Append the numerical portion of the reference, including any letter indicating part of a verse
var refSansBookBldr = new StringBuilder(ScriptureServices.FullScrRef(this, ich, string.Empty).Trim());
var numbersInRef = new Regex(@"\d+").Matches(refSansBookBldr.ToString());
foreach (var number in numbersInRef.Cast<Match>().Reverse())
{
ZeroPadForStringComparison(refSansBookBldr, number.Index, number.Length);
}
bldr.Append(" ").Append(refSansBookBldr).Append(ScriptureServices.VerseSegLabel(this, SegmentsOS.IndexOf(seg)));
break;
case ScrBookTags.kflidTitle:
RefForSortAddBookInfo(bldr, (IScrBook) stText.Owner);
// The book title should sort before anything else in the book
bldr.Append(" 0");
break;
default:
return Cache.MakeUserTss(Strings.ksStars);
}

// add ich
bldr.Append(" ").Append(ZeroPadForStringComparison(ich));
return Cache.MakeUserTss(bldr.ToString());
}

protected internal const string RefForSortingPrefix = "0 Scr ";

protected static void RefForSortAddBookInfo(StringBuilder bldr, IScrBook book)
{
// Append the book number to sort in canonical order.
bldr.Append(book.CanonicalNum);
// Append the book name. It makes no difference for sorting, but could make debugging easier.
bldr.Append("_").Append(book.BestUIAbbrev);
}

protected internal static void ZeroPadForStringComparison(StringBuilder bldr, int index, int cExistingDigits)
{
for (var remaining = 5 - cExistingDigits; remaining > 0; remaining--)
{
bldr.Insert(index, "0");
}
return Cache.MakeUserTss("unknown"); // should never happen, I think?
}

protected internal static string ZeroPadForStringComparison(string intInRef)
{
var bldr = new StringBuilder(intInRef);
ZeroPadForStringComparison(bldr, 0, intInRef.Length);
return bldr.ToString();
}

/// ------------------------------------------------------------------------------------
/// <summary>
/// Gets the footnote sequence.
Expand Down
58 changes: 53 additions & 5 deletions src/SIL.LCModel/DomainImpl/StTxtPara.cs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public IStTxtPara PreviousParagraph
/// <summary>
/// Return a Reference (e.g., Scripture reference, or text abbreviation/para #/sentence#) for the specified character
/// position (in the whole paragraph), which is assumed to belong to the specified segment.
/// (For now, ich is not actually used, but it may become important if we decide not to split segements for
/// (For now, ich is not actually used, but it may become important if we decide not to split segments for
/// verse numbers.)
/// Overridden in ScrTxtPara to handle special cases for Scripture refs.
/// </summary>
Expand Down Expand Up @@ -100,10 +100,6 @@ public virtual ITsString Reference(ISegment seg, int ich)
if (bldr.Length > 0)
bldr.Replace(bldr.Length, bldr.Length, " ", props);

// if Scripture.IsResponsibleFor(stText) we should try to get the verse number of the annotation.
//if (stText.OwningFlid == (int)Text.TextTags.kflidContents)
//{

// Insert paragraph number.
int ipara = stText.ParagraphsOS.IndexOf(this) + 1;
bldr.Replace(bldr.Length, bldr.Length, ipara.ToString(), props);
Expand All @@ -117,6 +113,58 @@ public virtual ITsString Reference(ISegment seg, int ich)
return bldr.GetString();
}

/// <inheritdoc/>
public virtual ITsString ReferenceForSorting(ISegment seg, int ich)
{
if (!(Owner is IStText stText))
{
return TsStringUtils.EmptyString(Cache.DefaultUserWs);
}

ITsString tssName = null;
var fUsingAbbr = false;
if (stText.Owner is IText text)
{
tssName = text.Abbreviation.BestVernacularAnalysisAlternative;
if (!TsStringUtils.IsNullOrPlaceholder(tssName, stText.Title.NotFoundTss.Text))
{
fUsingAbbr = true;
}
}
if (!fUsingAbbr)
{
tssName = stText.Title.BestVernacularAnalysisAlternative;
}

// Make a TsTextProps specifying only the writing system.
var propBldr = TsStringUtils.MakePropsBldr();
var wsActual = tssName.get_Properties(0).GetIntPropValues((int)FwTextPropType.ktptWs, out _);
propBldr.SetIntPropValues((int)FwTextPropType.ktptWs, (int)FwTextPropVar.ktpvDefault, wsActual);
var props = propBldr.GetTextProps();

var bldr = TsStringUtils.IsNullOrPlaceholder(tssName, stText.Title.NotFoundTss.Text) ? new TsStrBldr() : tssName.GetBldr();

// Start with a space even if we don't have a title, so untitled texts sort to the top.
bldr.Append(" ", props);

// Insert paragraph and segment numbers.
var iPara = stText.ParagraphsOS.IndexOf(this) + 1;
var iSeg = SegmentsOS.IndexOf(seg) + 1;
bldr.Append(ZeroPadForStringComparison(iPara), props).Append(".", props).Append(ZeroPadForStringComparison(iSeg), props);

// Insert the offset so that two references in the same segment are sorted properly (LT-8457)
bldr.Append(" ", props).Append(ZeroPadForStringComparison(ich), props);

return bldr.GetString();
}

/// <summary>Pads the given int with zeroes to the max length of an int</summary>
protected internal static string ZeroPadForStringComparison(int i)
{
// because int.MaxValue.ToString().Length is 10
return i.ToString("D10");
}

/// ------------------------------------------------------------------------------------
/// <summary>
/// Finds the ORC of the specified picture and deletes it from the paragraph and any
Expand Down
22 changes: 16 additions & 6 deletions src/SIL.LCModel/InterfaceAdditions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3136,15 +3136,25 @@ IStTxtPara PreviousParagraph
List<IConstChartWordGroup> GetChartCellRefs();

/// ------------------------------------------------------------------------------------
/// <summary>
/// Return a Reference (e.g., Scripture reference, or text abbreviation/para #/sentence#) for the specified character
/// position (in the whole paragraph), which is assumed to belong to the specified segment.
/// (For now, ich is not actually used, but it may become important if we decide not to split segements for
/// verse numbers.)
/// </summary>
/// <summary>
/// Return a Reference (e.g., Scripture reference, or text abbreviation+para #+sentence #) for the specified character
/// position (in the whole paragraph), which is assumed to belong to the specified segment.
/// (For now, ich is not actually used, but it may become important if we decide not to split segments for
/// verse numbers.)
/// </summary>
/// ------------------------------------------------------------------------------------
ITsString Reference(ISegment seg, int ich);

/// ------------------------------------------------------------------------------------
/// <summary>
/// Return a Reference (e.g., Scripture reference, or text abbreviation+para #+sentence #) for the specified character
/// position (in the whole paragraph), which is assumed to belong to the specified segment.
/// To allow greater accuracy and precision in sorting, numbers are zero-padded to the length of <see cref="int.MaxValue"/> and ich
/// is included at the end.
/// </summary>
/// ------------------------------------------------------------------------------------
ITsString ReferenceForSorting(ISegment seg, int ich);

/// ------------------------------------------------------------------------------------
/// <summary>
/// Splits the paragraph at the specified character index.
Expand Down
24 changes: 23 additions & 1 deletion tests/SIL.LCModel.Core.Tests/Text/TsStringUtilsTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2004-2021 SIL International
// Copyright (c) 2004-2022 SIL International
// This software is licensed under the LGPL, version 2.1 or later
// (http://www.gnu.org/licenses/lgpl-2.1.html)

Expand Down Expand Up @@ -2110,6 +2110,28 @@ public void RemoveIllegalXmlChars()
Assert.That(TsStringUtils.RemoveIllegalXmlChars(outOfOrderSurrogates).Text, Is.EqualTo("\xd800\xdc00z"));
}

[Test]
public void IsNull_OrMissing_Null()
{
Assert.That(TsStringUtils.IsNullOrEmpty(null), Is.True, "null is null or empty");
Assert.That(TsStringUtils.IsNullOrPlaceholder(null, "***"), Is.True, "null is null or placeholder");
}

[TestCase("", ExpectedResult = true)]
[TestCase("***", ExpectedResult = false)]
[TestCase("t", ExpectedResult = false)]
public bool IsNullOrEmpty(string actual)
{
return TsStringUtils.IsNullOrEmpty(TsStringUtils.MakeString(actual, m_wsf.UserWs));
}

[TestCase("", ExpectedResult = true)]
[TestCase("***", ExpectedResult = true)]
[TestCase("t", ExpectedResult = false)]
public bool IsNullOrPlaceholder(string actual)
{
return TsStringUtils.IsNullOrPlaceholder(TsStringUtils.MakeString(actual, m_wsf.UserWs), "***");
}
#endregion
}
}
67 changes: 57 additions & 10 deletions tests/SIL.LCModel.Tests/DomainImpl/ScrTxtParaTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2003-2018 SIL International
// Copyright (c) 2003-2022 SIL International
// This software is licensed under the LGPL, version 2.1 or later
// (http://www.gnu.org/licenses/lgpl-2.1.html)

Expand Down Expand Up @@ -549,23 +549,70 @@ public void Reference()
AddDataToMatthew();
var para1 = (IStTxtPara) m_book.SectionsOS[1].ContentOA.ParagraphsOS[0]; // Actually ScrTxtPara
var seg = para1.SegmentsOS[1]; // first content ref, after the chapter and verse number stuff.
Assert.That(para1.Reference(seg, seg.BeginOffset + 1).Text, Is.EqualTo("MAT 1:1"));
var v1Seg1Idx = seg.BeginOffset + 1;
Assert.That(para1.Reference(seg, v1Seg1Idx).Text, Is.EqualTo("MAT 1:1"));
Assert.That(para1.ReferenceForSorting(seg, v1Seg1Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00001 {v1Seg1Idx:D10}"));
AddRunToMockedPara(para1, "Verse two second sentence.", null);
var v2seg1 = para1.SegmentsOS[3]; // first segment of two-sentence verse
Assert.That(para1.Reference(v2seg1, v2seg1.BeginOffset + 1).Text, Is.EqualTo("MAT 1:2a"));
var v2seg2 = para1.SegmentsOS[4]; // first segment of two-sentence verse
Assert.That(para1.Reference(v2seg2, v2seg2.BeginOffset + 1).Text, Is.EqualTo("MAT 1:2b"));
var v2Seg1 = para1.SegmentsOS[3]; // first segment of two-sentence verse
var v2Seg1Idx = v2Seg1.BeginOffset + 1;
Assert.That(para1.Reference(v2Seg1, v2Seg1Idx).Text, Is.EqualTo("MAT 1:2a"));
Assert.That(para1.ReferenceForSorting(v2Seg1, v2Seg1Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00002a {v2Seg1Idx:D10}"));
var v2Seg2 = para1.SegmentsOS[4]; // first segment of two-sentence verse
var v2Seg2Idx = v2Seg2.BeginOffset + 1;
Assert.That(para1.Reference(v2Seg2, v2Seg2Idx).Text, Is.EqualTo("MAT 1:2b"));
Assert.That(para1.ReferenceForSorting(v2Seg2, v2Seg2Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00002b {v2Seg2Idx:D10}"));
IStTxtPara para2 = AddParaToMockedSectionContent((IScrSection)para1.Owner.Owner, ScrStyleNames.NormalParagraph);
AddRunToMockedPara(para2, "Verse 2 seg 3", null);
var v2seg3 = para2.SegmentsOS[0]; // third segment of three-sentence verse split over two paragraphs.
Assert.That(para2.Reference(v2seg3, v2seg3.BeginOffset + 1).Text, Is.EqualTo("MAT 1:2c"));
var v2Seg3 = para2.SegmentsOS[0]; // third segment of three-sentence verse split over two paragraphs.
var v2Seg3Idx = v2Seg3.BeginOffset + 1;
Assert.That(para2.Reference(v2Seg3, v2Seg3Idx).Text, Is.EqualTo("MAT 1:2c"));
Assert.That(para2.ReferenceForSorting(v2Seg3, v2Seg3Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00002c {v2Seg3Idx:D10}"));
var newSection = AddSectionToMockedBook(m_book);
IStTxtPara para3 = AddParaToMockedSectionContent(newSection, ScrStyleNames.NormalParagraph);
AddRunToMockedPara(para3, "Verse 2 seg 4", null);
var v2seg4 = para3.SegmentsOS[0]; // fourth segment of four-sentence verse split over two sections(!).
var v2Seg4 = para3.SegmentsOS[0]; // fourth segment of four-sentence verse split over two sections(!).
var v2Seg4Idx = v2Seg4.BeginOffset + 1;
// JohnT: arguably this should give MAT 1:2d. The current implementation does not detect the
// segments in the previous section.
Assert.That(para3.Reference(v2seg4, v2seg4.BeginOffset + 1).Text, Is.EqualTo("MAT 1:2"));
Assert.That(para3.Reference(v2Seg4, v2Seg4Idx).Text, Is.EqualTo("MAT 1:2"));
Assert.That(para3.ReferenceForSorting(v2Seg4, v2Seg4Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00002 {v2Seg4Idx:D10}"));

var scrBook1Samuel = CreateBookData(9, "1 Samuel");
//var scrBookSusanna = CreateBookData(75/*?*/, "Susanna");
// TODO (Hasso) 2022.03: Enoch or some other >100 book
}

[Test]
public void Reference_IntroPara()
{
// add section and empty paragraph
var section = AddSectionToMockedBook(m_book, true);
var para = AddParaToMockedSectionContent(section, ScrStyleNames.IntroParagraph);
var seg = para.SegmentsOS[0];
const int ich = 3;
Assert.That(para.Reference(seg, ich).Text, Is.EqualTo("Matthew (TODO)"));
Assert.That(para.ReferenceForSorting(seg, ich).Text, Is.EqualTo("0 Scr 40_MAT 00001:00000 0000000003"));
}

[Test]
public void Reference_BookTitle()
{
// add title
var title = AddTitleToMockedBook(m_book, "This is the title");
var para = (IStTxtPara)title.ParagraphsOS[0];
var seg = para.SegmentsOS[0];
const int ich = 5;
Assert.That(para.Reference(seg, ich).Text, Is.EqualTo("Matthew (Title)"));
Assert.That(para.ReferenceForSorting(seg, ich).Text, Is.EqualTo("0 Scr 40_MAT 0 0000000005"));
}

[TestCase("", ExpectedResult = "00000")]
[TestCase("9", ExpectedResult = "00009")]
[TestCase("176", ExpectedResult = "00176")]
[TestCase("31103", ExpectedResult = "31103")]
public string ZeroPadForStringComparison(string intInRef)
{
return ScrTxtPara.ZeroPadForStringComparison(intInRef);
}
#endregion

Expand Down
Loading