diff --git a/docs/customization.md b/docs/customization.md index 48910b67..26067cbf 100644 --- a/docs/customization.md +++ b/docs/customization.md @@ -116,6 +116,29 @@ where taxon_id = 10090; Every time new model systems are added to the application, they will have to be activated in this manner. +## GO term recommendation + +Users can receive recommended terms based on the TIER1 and TIER2 genes they have added to their profiles. + +The recommendation algorithm works as follows: + +1. Retrieve GO terms associated to all TIER1 and TIER2 genes +2. Retrieve all the descendants of these terms +3. For each term, compute how many TIER1 or TIER2 genes they are associated either directly or indirectly via their + descendants +4. Keep terms that are not already on the user profile and that mention at least 2 TIER1 or TIER2 genes +5. Exclude terms with more than 50 associated genes +6. Retain terms that have at least one novel gene that is not on the user's profile +7. Retain most specific terms if a given term and its descendant is recommended + +You can adjust the number of overlapping TIER1 or TIER2 genes and the maximum size of a GO term by setting the +following: + +```ini +rdp.settings.go-term-min-overlap=2 # new in 1.5.8 +rdp.settings.go-term-size-limit=50 +``` + ### Customizing taxon appearance (new in 1.5.5) By default, taxon are rendered using the common name in title case. The only exception is for *Homo sapiens* which diff --git a/src/main/java/ubc/pavlab/rdp/controllers/UserController.java b/src/main/java/ubc/pavlab/rdp/controllers/UserController.java index 8a5625da..6eb60538 100644 --- a/src/main/java/ubc/pavlab/rdp/controllers/UserController.java +++ b/src/main/java/ubc/pavlab/rdp/controllers/UserController.java @@ -3,14 +3,17 @@ import lombok.Builder; import lombok.Data; import lombok.EqualsAndHashCode; +import lombok.Value; import lombok.extern.apachecommons.CommonsLog; import org.apache.commons.lang3.StringUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.MessageSource; +import org.springframework.context.MessageSourceResolvable; import org.springframework.http.HttpStatus; import org.springframework.http.InvalidMediaTypeException; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; +import org.springframework.lang.Nullable; import org.springframework.security.access.annotation.Secured; import org.springframework.security.authentication.BadCredentialsException; import org.springframework.stereotype.Controller; @@ -498,23 +501,45 @@ public Object getTermsForTaxon( @PathVariable Integer taxonId, return goIds.stream().collect( toNullableMap( identity(), goId -> goService.getTerm( goId ) == null ? null : userService.convertTerm( user, taxon, goService.getTerm( goId ) ) ) ); } + @Value + static class RecommendedTermsModel { + /** + * List of recommended GO terms. + */ + public Collection recommendedTerms; + /** + * Feedback to be displayed or null if no feedback is available. + */ + @Nullable + public String feedback; + } + @ResponseBody @GetMapping(value = "/user/taxon/{taxonId}/term/recommend", produces = MediaType.APPLICATION_JSON_VALUE) public Object getRecommendedTermsForTaxon( @PathVariable Integer taxonId, - @RequestParam(required = false) List geneIds ) { + @RequestParam(required = false) List geneIds, + Locale locale ) { Taxon taxon = taxonService.findById( taxonId ); if ( taxon == null ) { return ResponseEntity.notFound().build(); } - Set genes; + User user = userService.findCurrentUser(); + + Collection recommendedTerms; + List feedback = new ArrayList<>(); if ( geneIds != null ) { - genes = new HashSet<>( geneService.load( geneIds ) ); + Set genes = new HashSet<>( geneService.load( geneIds ) ); + recommendedTerms = userService.recommendTerms( user, genes, taxon, feedback ); } else { - genes = Collections.emptySet(); + recommendedTerms = userService.recommendTerms( user, taxon, feedback ); } - return userService.recommendTerms( userService.findCurrentUser(), genes, taxon ); + String formattedFeedback = feedback.isEmpty() ? null : feedback.stream() + .map( f -> messageSource.getMessage( f, locale ) ) + .collect( Collectors.joining( "\n" ) ); + + return new RecommendedTermsModel( recommendedTerms, formattedFeedback ); } private Set getManualTiers() { diff --git a/src/main/java/ubc/pavlab/rdp/repositories/GeneOntologyTermInfoRepository.java b/src/main/java/ubc/pavlab/rdp/repositories/GeneOntologyTermInfoRepository.java index 06a1f122..d5e14d5c 100644 --- a/src/main/java/ubc/pavlab/rdp/repositories/GeneOntologyTermInfoRepository.java +++ b/src/main/java/ubc/pavlab/rdp/repositories/GeneOntologyTermInfoRepository.java @@ -6,6 +6,7 @@ import org.springframework.util.LinkedMultiValueMap; import org.springframework.util.MultiValueMap; import ubc.pavlab.rdp.model.GeneOntologyTermInfo; +import ubc.pavlab.rdp.model.Taxon; import java.util.*; import java.util.concurrent.locks.Lock; @@ -198,6 +199,40 @@ public long count() { } } + /** + * Count the number of terms for the given taxon; + */ + public long countByTaxon( Taxon taxon ) { + Lock lock = rwLock.readLock(); + try { + lock.lock(); + return termsByIdOrAlias.values().stream() + .distinct() + .map( GeneOntologyTermInfo::getDirectGeneIdsByTaxonId ) + .filter( m -> m.containsKey( taxon.getId() ) ) + .count(); + } finally { + lock.unlock(); + } + } + + /** + * Count the number of term-gene associations for the given taxon. + */ + public long countGeneAssociationsByTaxon( Taxon taxon ) { + Lock lock = rwLock.readLock(); + try { + lock.lock(); + return termsByIdOrAlias.values().stream() + .distinct() + .map( GeneOntologyTermInfo::getDirectGeneIdsByTaxonId ) + .mapToLong( m -> m.getOrDefault( taxon.getId(), Collections.emptyList() ).size() ) + .sum(); + } finally { + lock.unlock(); + } + } + @Override public void deleteById( String id ) { // FIXME: we should acquire a read lock here and promote it to a write lock if the element exists, but I don't diff --git a/src/main/java/ubc/pavlab/rdp/services/GOService.java b/src/main/java/ubc/pavlab/rdp/services/GOService.java index 1720e9ba..3e60fce5 100644 --- a/src/main/java/ubc/pavlab/rdp/services/GOService.java +++ b/src/main/java/ubc/pavlab/rdp/services/GOService.java @@ -24,6 +24,10 @@ public interface GOService { long count(); + long countByTaxon( Taxon taxon ); + + long countGeneAssociationsByTaxon( Taxon taxon ); + Collection getDescendants( GeneOntologyTermInfo entry ); Collection getAncestors( GeneOntologyTermInfo entry ); diff --git a/src/main/java/ubc/pavlab/rdp/services/GOServiceImpl.java b/src/main/java/ubc/pavlab/rdp/services/GOServiceImpl.java index a1c27dd1..34803cd7 100644 --- a/src/main/java/ubc/pavlab/rdp/services/GOServiceImpl.java +++ b/src/main/java/ubc/pavlab/rdp/services/GOServiceImpl.java @@ -279,12 +279,12 @@ public List> search( String queryString, Taxo @Override public long getSizeInTaxon( GeneOntologyTermInfo t, Taxon taxon ) { - Collection descendants = getDescendants( t ); + Collection descendants = new HashSet<>( getDescendants( t ) ); descendants.add( t ); return descendants.stream() + .flatMap( term -> term.getDirectGeneIdsByTaxonId().getOrDefault( taxon.getId(), Collections.emptyList() ).stream() ) .distinct() - .mapToLong( term -> term.getDirectGeneIdsByTaxonId().getOrDefault( taxon.getId(), Collections.emptyList() ).size() ) - .sum(); + .count(); } @Override @@ -360,13 +360,23 @@ public long count() { return goRepository.count(); } + @Override + public long countByTaxon( Taxon taxon ) { + return goRepository.countByTaxon( taxon ); + } + + @Override + public long countGeneAssociationsByTaxon( Taxon taxon ) { + return goRepository.countGeneAssociationsByTaxon( taxon ); + } + @Override public Collection getDescendants( GeneOntologyTermInfo entry ) { StopWatch timer = StopWatch.createStarted(); Lock lock = rwLock.readLock(); try { lock.lock(); - return getDescendantsInternal( entry ); + return Collections.unmodifiableCollection( getDescendantsInternal( entry ) ); } finally { lock.unlock(); if ( timer.getTime( TimeUnit.MILLISECONDS ) > 1000 ) { @@ -386,6 +396,9 @@ private Set getDescendantsInternal( GeneOntologyTermInfo e results.add( child ); results.addAll( getDescendantsInternal( child ) ); } + if ( results.remove( entry ) ) { + log.warn( String.format( "%s is its own descendant, removing it to prevent cycles.", entry ) ); + } descendantsCache.put( entry, results ); return results; } @@ -479,7 +492,7 @@ public Collection getAncestors( GeneOntologyTermInfo term Lock lock = rwLock.readLock(); try { lock.lock(); - return getAncestorsInternal( term ); + return Collections.unmodifiableCollection( getAncestorsInternal( term ) ); } finally { lock.unlock(); } @@ -495,6 +508,9 @@ private Collection getAncestorsInternal( GeneOntologyTermI results.add( parent ); results.addAll( getAncestorsInternal( parent ) ); } + if ( results.remove( term ) ) { + log.warn( String.format( "%s is its own ancestor, removing it to prevent cycle.", term ) ); + } ancestorsCache.put( term, results ); return results; } diff --git a/src/main/java/ubc/pavlab/rdp/services/UserService.java b/src/main/java/ubc/pavlab/rdp/services/UserService.java index 60d6fadf..6926de53 100644 --- a/src/main/java/ubc/pavlab/rdp/services/UserService.java +++ b/src/main/java/ubc/pavlab/rdp/services/UserService.java @@ -1,7 +1,9 @@ package ubc.pavlab.rdp.services; +import org.springframework.context.MessageSourceResolvable; import org.springframework.data.domain.Page; import org.springframework.data.domain.Pageable; +import org.springframework.lang.Nullable; import org.springframework.security.authentication.BadCredentialsException; import ubc.pavlab.rdp.exception.TokenException; import ubc.pavlab.rdp.model.*; @@ -161,12 +163,12 @@ public interface UserService { *

* The recommendation are based on the user's {@link TierType#MANUAL} gene set. */ - Collection recommendTerms( User user, Taxon taxon ); + Collection recommendTerms( User user, Taxon taxon, @Nullable List feedback ); /** * Recommend terms for a user using a supplied gene set which might differ from the user's. */ - Collection recommendTerms( User user, Set genes, Taxon taxon ); + Collection recommendTerms( User user, Set genes, Taxon taxon, @Nullable List feedback ); User updateTermsAndGenesInTaxon( User user, Taxon taxon, diff --git a/src/main/java/ubc/pavlab/rdp/services/UserServiceImpl.java b/src/main/java/ubc/pavlab/rdp/services/UserServiceImpl.java index 34e3884d..fd93dbda 100644 --- a/src/main/java/ubc/pavlab/rdp/services/UserServiceImpl.java +++ b/src/main/java/ubc/pavlab/rdp/services/UserServiceImpl.java @@ -10,8 +10,11 @@ import org.springframework.cache.annotation.Cacheable; import org.springframework.context.ApplicationEventPublisher; import org.springframework.context.MessageSource; +import org.springframework.context.MessageSourceResolvable; +import org.springframework.context.support.DefaultMessageSourceResolvable; import org.springframework.data.domain.Page; import org.springframework.data.domain.Pageable; +import org.springframework.lang.Nullable; import org.springframework.security.access.annotation.Secured; import org.springframework.security.access.prepost.PostAuthorize; import org.springframework.security.access.prepost.PostFilter; @@ -556,49 +559,99 @@ public Collection convertTerms( User user, Taxon taxon, Collection recommendTerms( @NonNull User user, @NonNull Taxon taxon ) { - return recommendTerms( user, user.getGenesByTaxonAndTier( taxon, getManualTiers() ), taxon, 10, applicationSettings.getGoTermSizeLimit(), 2 ); + public Collection recommendTerms( @NonNull User user, Taxon taxon, List feedback ) { + return recommendTerms( user, user.getGenesByTaxonAndTier( taxon, getManualTiers() ), taxon, applicationSettings.getGoTermSizeLimit(), applicationSettings.getGoTermMinOverlap(), feedback ); } @Override @PostFilter("hasPermission(filterObject, 'read')") - public Collection recommendTerms( User user, Set genes, Taxon taxon ) { - return recommendTerms( user, genes, taxon, 10, applicationSettings.getGoTermSizeLimit(), 2 ); + public Collection recommendTerms( User user, Set genes, Taxon taxon, List feedback ) { + return recommendTerms( user, genes, taxon, applicationSettings.getGoTermSizeLimit(), applicationSettings.getGoTermMinOverlap(), feedback ); } - /** * This is only meant for testing purposes; refrain from using in actual code. */ @PostFilter("hasPermission(filterObject, 'read')") - Collection recommendTerms( @NonNull User user, @NonNull Taxon taxon, long minSize, long maxSize, long minFrequency ) { - return recommendTerms( user, user.getGenesByTaxonAndTier( taxon, getManualTiers() ), taxon, minSize, maxSize, minFrequency ); + Collection recommendTerms( @NonNull User user, @NonNull Taxon taxon, long maxSize, long minFrequency ) { + return recommendTerms( user, user.getGenesByTaxonAndTier( taxon, getManualTiers() ), taxon, maxSize, minFrequency, null ); } - private Collection recommendTerms( @NonNull User user, Set genes, @NonNull Taxon taxon, long minSize, long maxSize, long minFrequency ) { + /** + * Recommend terms to a given user. + * + * @param user user who receives recommendations + * @param genes genes to use for recommendation + * @param taxon taxon to restrict recommendations + * @param maxSize maximum number of genes a recommended term can be associated with + * @param minFrequency minimum number of overlaps between the genes and + * @param feedback feedback is appended in the form of {@link MessageSourceResolvable} if non-null + * @return the recommended terms for the given parameters + */ + private Collection recommendTerms( @NonNull User user, Set genes, Taxon taxon, long maxSize, long minFrequency, @Nullable List feedback ) { // terms already associated to user within the taxon Set userTermGoIds = user.getUserTerms().stream() .filter( ut -> ut.getTaxon().equals( taxon ) ) .map( UserTerm::getGoId ) .collect( Collectors.toSet() ); + if ( genes.size() < minFrequency ) { + addFeedback( "UserService.recommendTerms.tooFewGenes", new String[]{ String.valueOf( minFrequency ) }, feedback ); + return Collections.emptySet(); + } + + // include TIER3 genes when recommending terms with novel genes + HashSet allGenes = new HashSet<>( genes ); + allGenes.addAll( user.getGenesByTaxonAndTier( taxon, EnumSet.of( TierType.TIER3 ) ) ); + Map sizeOfAllGenes = goService.termFrequencyMap( allGenes ); + // Then keep only those terms not already added and with the highest frequency Set topResults = goService.termFrequencyMap( genes ).entrySet().stream() - .filter( e -> minFrequency < 0 || e.getValue() >= minFrequency ) - .filter( e -> minSize < 0 || goService.getSizeInTaxon( e.getKey(), taxon ) >= minSize ) - .filter( e -> maxSize < 0 || goService.getSizeInTaxon( e.getKey(), taxon ) <= maxSize ) + .filter( e -> e.getValue() >= minFrequency ) .filter( e -> !userTermGoIds.contains( e.getKey().getGoId() ) ) + .filter( e -> { + long numberOfGenesInTaxon = goService.getSizeInTaxon( e.getKey(), taxon ); + // never recommend terms that have more than the GO term size limit + if ( maxSize >= 0 && numberOfGenesInTaxon > maxSize ) { + return false; + } + long numberOfUserGenesInTaxon = sizeOfAllGenes.getOrDefault( e.getKey(), 0L ); + // the difference between the size and frequency from the gene set is the number of new genes that + // the term would add to the user profile + long numberOfNewGenesInTaxon = numberOfGenesInTaxon - numberOfUserGenesInTaxon; + // ensure that at least 1 novel gene is being added + return numberOfNewGenesInTaxon > 0; + } ) .map( Map.Entry::getKey ) .collect( Collectors.toSet() ); + if ( topResults.isEmpty() ) { + // check for some common causes + if ( goService.countByTaxon( taxon ) == 0 ) { + addFeedback( "UserService.recommendTerms.noTermsInTaxon", new String[]{ taxon.getCommonName() }, feedback ); + return Collections.emptySet(); + } else if ( goService.countGeneAssociationsByTaxon( taxon ) == 0 ) { + addFeedback( "UserService.recommendTerms.noGeneAssociationsInTaxon", new String[]{ taxon.getCommonName() }, feedback ); + return Collections.emptySet(); + } else { + addFeedback( "UserService.recommendTerms.noResults", null, feedback ); + return Collections.emptySet(); + } + } + // Keep only leafiest of remaining terms (keep if it has no descendants in results) return topResults.stream() .filter( term -> Collections.disjoint( topResults, goService.getDescendants( term ) ) ) - .filter( term -> goService.getSizeInTaxon( term, taxon ) <= applicationSettings.getGoTermSizeLimit() ) .map( term -> convertTerm( user, taxon, term ) ) .collect( Collectors.toSet() ); } + private void addFeedback( String code, @Nullable Object[] args, @Nullable List feedback ) { + if ( feedback != null ) { + feedback.add( new DefaultMessageSourceResolvable( new String[]{ code }, args, null ) ); + } + } + @Transactional @Override @PreAuthorize("hasPermission(#user, 'update')") @@ -629,8 +682,12 @@ public User updateTermsAndGenesInTaxon( User user, .collect( Collectors.toMap( Gene::getGeneId, identity() ) ); // add calculated genes from terms + long maxSize = applicationSettings.getGoTermSizeLimit(); Map userGenesFromTerms = goTerms.stream() - .flatMap( term -> goService.getGenesInTaxon( term, taxon ).stream() ) + .map( term -> goService.getGenesInTaxon( term, taxon ) ) + // never add genes from terms that exceed the GO limit (those are never recommended) + .filter( c -> maxSize < 0 || c.size() <= maxSize ) + .flatMap( Collection::stream ) .distinct() // terms might refer to the same gene .map( geneInfoService::load ) .filter( Objects::nonNull ) @@ -659,7 +716,7 @@ public User updateTermsAndGenesInTaxon( User user, // update frequency and size as those have likely changed with new genes for ( UserTerm userTerm : user.getUserTerms() ) { GeneOntologyTermInfo cachedTerm = goService.getTerm( userTerm.getGoId() ); - userTerm.setFrequency( computeTermFrequencyInTaxon( user, cachedTerm, taxon ) ); + userTerm.setFrequency( computeTermFrequencyInTaxon( user, userTerm, taxon ) ); userTerm.setSize( goService.getSizeInTaxon( cachedTerm, taxon ) ); } @@ -684,7 +741,12 @@ public long computeTermOverlaps( UserTerm userTerm, Collection genes ) */ @Override public long computeTermFrequencyInTaxon( User user, GeneOntologyTerm term, Taxon taxon ) { - Set geneIds = new HashSet<>( goService.getGenes( goService.getTerm( term.getGoId() ) ) ); + GeneOntologyTermInfo termInfo = goService.getTerm( term.getGoId() ); + if ( termInfo == null ) { + log.warn( String.format( "Could not find a term info for %s, returning zero for the frequency.", term.getGoId() ) ); + return 0L; + } + Set geneIds = new HashSet<>( goService.getGenesInTaxon( termInfo, taxon ) ); return user.getGenesByTaxonAndTier( taxon, getManualTiers() ).stream() .map( UserGene::getGeneId ) .filter( geneIds::contains ) diff --git a/src/main/java/ubc/pavlab/rdp/settings/ApplicationSettings.java b/src/main/java/ubc/pavlab/rdp/settings/ApplicationSettings.java index 4b2293cb..b2254bcf 100644 --- a/src/main/java/ubc/pavlab/rdp/settings/ApplicationSettings.java +++ b/src/main/java/ubc/pavlab/rdp/settings/ApplicationSettings.java @@ -263,6 +263,11 @@ public static class OntologySettings { private Resource faqFile; private boolean sendEmailOnRegistration; + /** + * Minimum overlap with TIER1 or TIER2 genes for recommending a term. + */ + @Min(1) + private long goTermMinOverlap; /** * Maximum number of GO terms. */ diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index c87467b7..839a246b 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -94,8 +94,10 @@ rdp.settings.cache.organ-file=http://purl.obolibrary.org/obo/uberon.obo # Send email to admin-email whenever there is a new registration rdp.settings.send-email-on-registration=false -# Maximum number of genes a term can have associated with it -# and still be available to add to a profile. +# Minimum overlap with TIER1 or TIER2 genes for recommending a term. +rdp.settings.go-term-min-overlap=2 +# Maximum number of genes a term can have associated with it and be recommended or have their genes added as TIER3 to a +# profile rdp.settings.go-term-size-limit=50 # Tiers diff --git a/src/main/resources/messages.properties b/src/main/resources/messages.properties index fea1492e..80d5e769 100644 --- a/src/main/resources/messages.properties +++ b/src/main/resources/messages.properties @@ -116,6 +116,14 @@ AdminController.SimpleOntologyForm.ontologyTerms.emptyGroupNotAllowed=Grouping t AdminController.DeleteOntologyForm.ontologyNameConfirmation.doesNotMatchOntologyName=The confirmation does not match the ontology name. +# All the following message need to be +UserService.recommendTerms.tooFewGenes=too few genes were supplied; you need at least {0} genes to get recommendations +# {0} contains the taxon common name +UserService.recommendTerms.noTermsInTaxon=GO terms are not available for {0} +# {0} contains the taxon common name +UserService.recommendTerms.noGeneAssociationsInTaxon=GO term to gene associations are not available for {0} +UserService.recommendTerms.noResults=no terms meet the requirements; try adding more genes first + # {0} contains the site shortname ApiConfig.title={0} RESTful API # {0} contains the site shortname diff --git a/src/main/resources/static/js/model.js b/src/main/resources/static/js/model.js index 93d501aa..59e1c210 100644 --- a/src/main/resources/static/js/model.js +++ b/src/main/resources/static/js/model.js @@ -419,18 +419,18 @@ var spinner = $(this).find('.spinner'); spinner.toggleClass("d-none", false); recommendMessage.classList.toggle('d-none', true); - var geneIds = geneTable.DataTable().column(0).data().toArray(); + var geneIds = geneTable.DataTable().column(1).data().toArray(); $.getJSON(window.contextPath + "/user/taxon/" + encodeURIComponent(window.currentTaxonId) + "/term/recommend", { geneIds: geneIds }).done(function (data) { - var addedTerms = addTermRow(data); + var addedTerms = addTermRow(data.recommendedTerms); if (addedTerms > 0) { recommendMessage.textContent = 'Recommended ' + addedTerms + ' terms.'; recommendMessage.classList.toggle('alert-success', true); recommendMessage.classList.toggle('alert-danger', false); recommendMessage.removeAttribute('role'); } else { - recommendMessage.textContent = 'Could not recommend new terms. Try adding more genes first.'; + recommendMessage.textContent = 'Could not recommend new terms' + (data.feedback ? ': ' + data.feedback : '') + '.'; recommendMessage.classList.toggle('alert-success', false); recommendMessage.classList.toggle('alert-danger', true); recommendMessage.setAttribute('role', 'alert'); diff --git a/src/test/java/ubc/pavlab/rdp/controllers/UserControllerTest.java b/src/test/java/ubc/pavlab/rdp/controllers/UserControllerTest.java index 5b1642ac..b05124af 100644 --- a/src/test/java/ubc/pavlab/rdp/controllers/UserControllerTest.java +++ b/src/test/java/ubc/pavlab/rdp/controllers/UserControllerTest.java @@ -549,24 +549,26 @@ public void givenLoggedIn_whenRecommendTerms_thenReturnJson() when( userService.findCurrentUser() ).thenReturn( user ); - when( userService.recommendTerms( any(), any(), eq( taxon ) ) ).thenReturn( Sets.newSet( t1, t2 ) ); - when( userService.recommendTerms( any(), any(), eq( taxon2 ) ) ).thenReturn( Sets.newSet( t3, t4 ) ); + when( userService.recommendTerms( any(), eq( taxon ), any() ) ).thenReturn( Sets.newSet( t1, t2 ) ); + when( userService.recommendTerms( any(), eq( taxon2 ), any() ) ).thenReturn( Sets.newSet( t3, t4 ) ); mvc.perform( get( "/user/taxon/1/term/recommend" ) .contentType( MediaType.APPLICATION_JSON ) ) .andExpect( status().isOk() ) - .andExpect( jsonPath( "$", hasSize( 2 ) ) ) - .andExpect( jsonPath( "$[*].goId" ).value( containsInAnyOrder( t1.getGoId(), t2.getGoId() ) ) ) - .andExpect( jsonPath( "$[*].taxon.id" ).value( contains( taxon.getId(), taxon.getId() ) ) ); - verify( userService ).recommendTerms( eq( user ), any(), eq( taxon ) ); + .andExpect( jsonPath( "$.recommendedTerms", hasSize( 2 ) ) ) + .andExpect( jsonPath( "$.recommendedTerms[*].goId" ).value( containsInAnyOrder( t1.getGoId(), t2.getGoId() ) ) ) + .andExpect( jsonPath( "$.recommendedTerms[*].taxon.id" ).value( contains( taxon.getId(), taxon.getId() ) ) ) + .andExpect( jsonPath( "$.feedback" ).value( nullValue() ) ); + verify( userService ).recommendTerms( eq( user ), eq( taxon ), any() ); mvc.perform( get( "/user/taxon/2/term/recommend" ) .contentType( MediaType.APPLICATION_JSON ) ) .andExpect( status().isOk() ) - .andExpect( jsonPath( "$" ).value( hasSize( 2 ) ) ) - .andExpect( jsonPath( "$[*].goId" ).value( containsInAnyOrder( t3.getGoId(), t4.getGoId() ) ) ) - .andExpect( jsonPath( "$[*].taxon.id" ).value( contains( taxon2.getId(), taxon2.getId() ) ) ); - verify( userService ).recommendTerms( eq( user ), any(), eq( taxon2 ) ); + .andExpect( jsonPath( "$.recommendedTerms" ).value( hasSize( 2 ) ) ) + .andExpect( jsonPath( "$.recommendedTerms[*].goId" ).value( containsInAnyOrder( t3.getGoId(), t4.getGoId() ) ) ) + .andExpect( jsonPath( "$.recommendedTerms[*].taxon.id" ).value( contains( taxon2.getId(), taxon2.getId() ) ) ) + .andExpect( jsonPath( "$.feedback" ).value( nullValue() ) ); + verify( userService ).recommendTerms( eq( user ), eq( taxon2 ), any() ); } // POST diff --git a/src/test/java/ubc/pavlab/rdp/services/UserServiceImplTest.java b/src/test/java/ubc/pavlab/rdp/services/UserServiceImplTest.java index 4be56c3e..2cc4717e 100644 --- a/src/test/java/ubc/pavlab/rdp/services/UserServiceImplTest.java +++ b/src/test/java/ubc/pavlab/rdp/services/UserServiceImplTest.java @@ -1165,14 +1165,16 @@ private void assertThatUserGenesAreEqualTo( User user, Taxon taxon, Map found = userService.recommendTerms( user, taxon ); - assertThat( found.stream().map( GeneOntologyTerm::getGoId ).collect( Collectors.toList() ) ).containsExactlyInAnyOrder( toGOId( 1 ), toGOId( 7 ), toGOId( 8 ) ); + Collection found = userService.recommendTerms( user, taxon, null ); + assertThat( found ) + .extracting( GeneOntologyTerm::getGoId ) + .containsExactlyInAnyOrder( "GO:0000007", "GO:0000000", "GO:0000002", "GO:0000004", "GO:0000006", + "GO:0000008", "GO:0000001", "GO:0000003", "GO:0000005", "GO:0000099" ); } @Test @@ -1181,37 +1183,48 @@ public void recommendTerms_whenMinSizeLimited_thenReturnBestLimitedResultsOnly() User user = createUser( 1 ); Taxon taxon = createTaxon( 1 ); - Collection found = userService.recommendTerms( user, taxon, 12, -1, -1 ); - assertThat( found.stream().map( GeneOntologyTerm::getGoId ).collect( Collectors.toList() ) ).containsExactlyInAnyOrder( toGOId( 1 ), toGOId( 7 ), toGOId( 8 ) ); + Collection found = userService.recommendTerms( user, taxon, -1, 0 ); + assertThat( found.stream().map( GeneOntologyTerm::getGoId ).collect( Collectors.toList() ) ) + .containsExactlyInAnyOrder( "GO:0000008", "GO:0000004", "GO:0000002", "GO:0000099", "GO:0000000", "GO:0000007", "GO:0000005", "GO:0000003", "GO:0000006", "GO:0000001" ); - found = userService.recommendTerms( user, taxon, 20, -1, -1 ); + found = userService.recommendTerms( user, taxon, -1, 1 ); assertThat( found ).isEmpty(); } @Test - @Ignore public void recommendTerms_whenMaxSizeLimited_thenReturnBestLimitedResultsOnly() { setUpRecommendTermsMocks(); User user = createUser( 1 ); Taxon taxon = createTaxon( 1 ); - Collection found = userService.recommendTerms( user, taxon, -1, 12, -1 ); - assertThat( found.stream().map( GeneOntologyTerm::getGoId ).collect( Collectors.toList() ) ).containsExactlyInAnyOrder( toGOId( 0 ), toGOId( 4 ), toGOId( 6 ) ); - - found = userService.recommendTerms( user, taxon, -1, 1, -1 ); + Collection found = userService.recommendTerms( user, taxon, 12, 0 ); + assertThat( found.stream().map( GeneOntologyTerm::getGoId ).collect( Collectors.toList() ) ) + .containsExactlyInAnyOrder( "GO:0000008", + "GO:0000006", + "GO:0000003", + "GO:0000001", + "GO:0000007", + "GO:0000005", + "GO:0000000", + "GO:0000099", + "GO:0000004", + "GO:0000002" ); + + found = userService.recommendTerms( user, taxon, 1, 0 ); assertThat( found ).isEmpty(); } @Test + @Ignore public void recommendTerms_whenFrequencyLimited_thenReturnBestLimitedResultsOnly() { setUpRecommendTermsMocks(); User user = createUser( 1 ); Taxon taxon = createTaxon( 1 ); - Collection found = userService.recommendTerms( user, taxon, -1, -1, 3 ); + Collection found = userService.recommendTerms( user, taxon, -1, 3 ); assertThat( found.stream().map( GeneOntologyTerm::getGoId ).collect( Collectors.toList() ) ).containsExactlyInAnyOrder( toGOId( 1 ), toGOId( 7 ), toGOId( 8 ) ); - found = userService.recommendTerms( user, taxon, -1, -1, 4 ); + found = userService.recommendTerms( user, taxon, -1, 4 ); assertThat( found ).isEmpty(); } @@ -1222,10 +1235,11 @@ public void recommendTerms_whenFrequencyLimitedAndSizeLimited_thenReturnBestLimi User user = createUser( 1 ); Taxon taxon = createTaxon( 1 ); - Collection found = userService.recommendTerms( user, taxon, 11, 12, 2 ); - assertThat( found.stream().map( GeneOntologyTerm::getGoId ).collect( Collectors.toList() ) ).containsExactlyInAnyOrder( toGOId( 0 ), toGOId( 4 ), toGOId( 6 ) ); + Collection found = userService.recommendTerms( user, taxon, 12, 2 ); + assertThat( found.stream().map( GeneOntologyTerm::getGoId ).collect( Collectors.toList() ) ) + .containsExactlyInAnyOrder( "GO:0000007", "GO:0000000", "GO:0000004", "GO:0000006", "GO:0000008", "GO:0000001" ); - found = userService.recommendTerms( user, taxon, 1, 11, 2 ); + found = userService.recommendTerms( user, taxon, 11, 2 ); assertThat( found ).isEmpty(); } @@ -1236,15 +1250,15 @@ public void recommendTerms_whenRedundantTerms_thenReturnOnlyMostSpecific() { User user = createUser( 1 ); Taxon taxon = createTaxon( 1 ); - Collection found = userService.recommendTerms( user, taxon, 11, 11, 1 ); - assertThat( found.stream().map( GeneOntologyTerm::getGoId ).collect( Collectors.toList() ) ).containsExactlyInAnyOrder( toGOId( 2 ), toGOId( 3 ), toGOId( 5 ), toGOId( 99 ) ); + Collection found = userService.recommendTerms( user, taxon, 11, 1 ); + assertThat( found.stream().map( GeneOntologyTerm::getGoId ).collect( Collectors.toList() ) ) + .containsExactlyInAnyOrder( "GO:0000006", "GO:0000002", "GO:0000004", "GO:0000000", "GO:0000099", "GO:0000003", "GO:0000005" ); - found = userService.recommendTerms( user, taxon, 1, 11, 2 ); + found = userService.recommendTerms( user, taxon, 11, 2 ); assertThat( found ).isEmpty(); } @Test - @Ignore public void recommendTerms_whenUserHasSomeTopTerms_thenReturnNewBestResultsOnly() { setUpRecommendTermsMocks(); @@ -1253,8 +1267,10 @@ public void recommendTerms_whenUserHasSomeTopTerms_thenReturnNewBestResultsOnly( user.getUserTerms().add( createUserTerm( 1, user, createTerm( toGOId( 1 ) ), taxon ) ); - Collection found = userService.recommendTerms( user, taxon ); - assertThat( found.stream().map( GeneOntologyTerm::getGoId ).collect( Collectors.toList() ) ).containsExactlyInAnyOrder( toGOId( 7 ), toGOId( 8 ) ); + Collection found = userService.recommendTerms( user, taxon, null ); + assertThat( found ).extracting( GeneOntologyTerm::getGoId ) + .containsExactlyInAnyOrder( "GO:0000000", "GO:0000099", "GO:0000007", "GO:0000004", + "GO:0000002", "GO:0000008", "GO:0000006", "GO:0000005", "GO:0000003" ); } @Test @@ -1268,8 +1284,9 @@ public void recommendTerms_whenUserHasAllTopTerms_thenReturnNextBestResultsOnly( user.getUserTerms().add( createUserTerm( 2, user, createTerm( toGOId( 7 ) ), taxon ) ); user.getUserTerms().add( createUserTerm( 3, user, createTerm( toGOId( 8 ) ), taxon ) ); - Collection found = userService.recommendTerms( user, taxon ); - assertThat( found.stream().map( GeneOntologyTerm::getGoId ).collect( Collectors.toList() ) ).containsExactlyInAnyOrder( toGOId( 0 ), toGOId( 4 ), toGOId( 6 ) ); + Collection found = userService.recommendTerms( user, taxon, null ); + assertThat( found ).extracting( GeneOntologyTerm::getGoId ) + .containsExactlyInAnyOrder( "GO:0000003", "GO:0000005", "GO:0000000", "GO:0000099", "GO:0000002", "GO:0000004", "GO:0000006" ); } @Test @@ -1279,7 +1296,7 @@ public void recommendTerms_whenUserHasNoGenes_thenReturnEmpty() { User user = createUser( 1 ); Taxon taxon = createTaxon( 1 ); - Collection found = userService.recommendTerms( user, taxon, -1, -1, -1 ); + Collection found = userService.recommendTerms( user, taxon, -1, -1 ); assertThat( found ).isEmpty(); } @@ -1288,7 +1305,7 @@ public void recommendTerms_whenUserNull_thenThrowNullPointerException() { setUpRecommendTermsMocks(); Taxon taxon = createTaxon( 1 ); - Collection found = userService.recommendTerms( null, taxon, -1, -1, -1 ); + Collection found = userService.recommendTerms( null, taxon, -1, -1 ); assertThat( found ).isNull(); } @@ -1297,7 +1314,7 @@ public void recommendTerms_whenTaxonNull_thenThrowNullPointerException() { setUpRecommendTermsMocks(); User user = createUser( 1 ); - userService.recommendTerms( user, null, -1, -1, -1 ); + userService.recommendTerms( user, null, -1, -1 ); } diff --git a/src/test/java/ubc/pavlab/rdp/services/UserServiceTermRecommendationTest.java b/src/test/java/ubc/pavlab/rdp/services/UserServiceTermRecommendationTest.java new file mode 100644 index 00000000..091acb72 --- /dev/null +++ b/src/test/java/ubc/pavlab/rdp/services/UserServiceTermRecommendationTest.java @@ -0,0 +1,148 @@ +package ubc.pavlab.rdp.services; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.TestConfiguration; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.cache.CacheManager; +import org.springframework.cache.concurrent.ConcurrentMapCacheManager; +import org.springframework.context.ApplicationEventPublisher; +import org.springframework.context.MessageSource; +import org.springframework.context.annotation.Bean; +import org.springframework.core.io.ClassPathResource; +import org.springframework.security.crypto.bcrypt.BCryptPasswordEncoder; +import org.springframework.test.context.TestPropertySource; +import org.springframework.test.context.junit4.SpringRunner; +import ubc.pavlab.rdp.model.GeneOntologyTerm; +import ubc.pavlab.rdp.model.Taxon; +import ubc.pavlab.rdp.model.User; +import ubc.pavlab.rdp.model.UserGene; +import ubc.pavlab.rdp.model.enums.TierType; +import ubc.pavlab.rdp.repositories.*; +import ubc.pavlab.rdp.security.SecureTokenChallenge; +import ubc.pavlab.rdp.settings.ApplicationSettings; +import ubc.pavlab.rdp.util.OBOParser; + +import javax.servlet.http.HttpServletRequest; +import java.security.SecureRandom; +import java.util.Collections; +import java.util.EnumSet; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * A tailored test to verify that GO recommendation work as expected. + * + * @author poirigui + */ +@RunWith(SpringRunner.class) +@TestPropertySource("classpath:application.properties") +public class UserServiceTermRecommendationTest { + + @TestConfiguration + static class TTCC { + + @Bean + public ApplicationSettings applicationSettings() { + ApplicationSettings applicationSettings = mock( ApplicationSettings.class ); + when( applicationSettings.getIsearch() ).thenReturn( new ApplicationSettings.InternationalSearchSettings() ); + when( applicationSettings.getGoTermSizeLimit() ).thenReturn( 50L ); + when( applicationSettings.getGoTermMinOverlap() ).thenReturn( 2L ); + when( applicationSettings.getEnabledTiers() ).thenReturn( EnumSet.allOf( TierType.class ) ); + ApplicationSettings.CacheSettings cacheSettings = new ApplicationSettings.CacheSettings(); + cacheSettings.setTermFile( "classpath:cache/go.obo" ); + cacheSettings.setAnnotationFile( new ClassPathResource( "cache/gene2go.gz" ) ); + when( applicationSettings.getCache() ).thenReturn( cacheSettings ); + return applicationSettings; + } + + @Bean + public GOService goService() { + return new GOServiceImpl(); + } + + @Bean + public GeneOntologyTermInfoRepository geneOntologyTermInfoRepository() { + return new GeneOntologyTermInfoRepository(); + } + + @Bean + public UserServiceImpl userService() { + return new UserServiceImpl(); + } + + @Bean + public OBOParser oboParser() { + return new OBOParser(); + } + + @Bean + public CacheManager cacheManager() { + return new ConcurrentMapCacheManager(); + } + } + + @MockBean + private UserRepository userRepository; + @MockBean + private RoleRepository roleRepository; + @MockBean + private PasswordResetTokenRepository passwordResetTokenRepository; + @MockBean + private VerificationTokenRepository tokenRepository; + @MockBean + private BCryptPasswordEncoder bCryptPasswordEncoder; + @MockBean + private OrganInfoService organInfoService; + @MockBean + private ApplicationEventPublisher eventPublisher; + @MockBean + private AccessTokenRepository accessTokenRepository; + @MockBean + private MessageSource messageSource; + @MockBean + private GeneInfoService geneInfoService; + @MockBean + private PrivacyService privacyService; + @MockBean + private SecureRandom secureRandom; + @MockBean + private OntologyService ontologyService; + @MockBean + private SecureTokenChallenge secureTokenChallenge; + @MockBean + private TaxonService taxonService; + + + @Autowired + private UserServiceImpl userService; + @Autowired + private GOService goService; + + private Taxon taxon = new Taxon(); + + @Test + public void test() { + taxon.setId( 9606 ); + when( taxonService.findByActiveTrue() ).thenReturn( Collections.singleton( taxon ) ); + goService.updateGoTerms(); + User user = new User(); + user.getUserGenes().put( 1, createGene( "BRCA1", 672 ) ); + user.getUserGenes().put( 2, createGene( "BRCA2", 675 ) ); + assertThat( userService.recommendTerms( user, taxon, null ) ) + .extracting( GeneOntologyTerm::getGoId ) + .containsExactlyInAnyOrder( "GO:0000800", "GO:0006978" ); + } + + private UserGene createGene( String symbol, int geneId ) { + UserGene gene = new UserGene(); + gene.setSymbol( symbol ); + gene.setGeneId( geneId ); + gene.setTier( TierType.TIER1 ); + gene.setTaxon( taxon ); + return gene; + } +} diff --git a/src/test/resources/application.properties b/src/test/resources/application.properties index 3a28dfb7..4034a3d9 100644 --- a/src/test/resources/application.properties +++ b/src/test/resources/application.properties @@ -70,4 +70,6 @@ rdp.settings.organs.enabled=true rdp.settings.enabled-tiers= rdp.settings.privacy.enabled-gene-levels= rdp.settings.search.enabled-search-modes=BY_GENE,BY_RESEARCHER -rdp.settings.isearch.auth-tokens= \ No newline at end of file +rdp.settings.isearch.auth-tokens= +rdp.settings.go-term-min-overlap=2 +rdp.settings.go-term-size-limit=50 \ No newline at end of file