From 498ebb62b7ca3d9d962e39fc29e80ac3f1c1e6ea Mon Sep 17 00:00:00 2001 From: Jonathan Green Date: Tue, 22 Oct 2024 16:15:20 -0300 Subject: [PATCH] Update classifier code for mypy (#2122) - Remove all star imports for classifier code from palace.manager.core.classifier import * - Add a __getattr__ method to classifier/__init__.py to let mypy know to expect dynamic imports - Move around some of the classifier code to break import cycle - Remove duplicated GradeLevelClassifier class --- pyproject.toml | 12 - .../api/admin/controller/work_editor.py | 8 +- .../manager/core/classifier/__init__.py | 874 +----------------- src/palace/manager/core/classifier/age.py | 77 ++ src/palace/manager/core/classifier/bic.py | 191 ++-- src/palace/manager/core/classifier/bisac.py | 469 +++++----- src/palace/manager/core/classifier/ddc.py | 91 +- .../manager/core/classifier/gutenberg.py | 97 +- src/palace/manager/core/classifier/keyword.py | 267 +++--- src/palace/manager/core/classifier/lcc.py | 99 +- .../manager/core/classifier/overdrive.py | 145 +-- .../manager/core/classifier/simplified.py | 17 +- src/palace/manager/core/classifier/work.py | 614 ++++++++++++ src/palace/manager/search/external_search.py | 9 +- .../sqlalchemy/model/classification.py | 7 +- src/palace/manager/sqlalchemy/model/work.py | 3 +- .../api/admin/controller/test_work_editor.py | 2 +- tests/manager/core/classifiers/test_age.py | 5 +- tests/manager/core/classifiers/test_bic.py | 2 +- tests/manager/core/classifiers/test_bisac.py | 3 +- .../core/classifiers/test_classifier.py | 14 +- tests/manager/core/classifiers/test_ddc.py | 2 +- .../manager/core/classifiers/test_keyword.py | 2 +- tests/manager/core/classifiers/test_lcc.py | 2 +- .../core/classifiers/test_overdrive.py | 2 +- .../core/classifiers/test_simplified.py | 7 +- tests/manager/core/test_opds_import.py | 2 +- tests/manager/feed/test_library_annotator.py | 6 +- .../feed/test_loan_and_hold_annotator.py | 5 - 29 files changed, 1463 insertions(+), 1571 deletions(-) create mode 100644 src/palace/manager/core/classifier/work.py diff --git a/pyproject.toml b/pyproject.toml index 9c5ed1356a..ef4322ff20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,9 +34,6 @@ profile = "black" # to silence the noise in the mypy output for now, so its easier to see when # there are errors in the output. disable_error_code = "annotation-unchecked" -exclude = [ - 'core/classifier', -] explicit_package_bases = true files = ["alembic", "src/palace", "tests"] mypy_path = ["alembic", "src"] @@ -160,15 +157,6 @@ strict_equality = true warn_return_any = true warn_unused_ignores = true -[[tool.mypy.overrides]] -# This override silences errors for modules in our own codebase that we import -# from other covered modules. Ideally we will be able to remove this override -# eventually, once we have type hints for all of our own code. -follow_imports = "silent" -module = [ - "palace.manager.core.classifier.*", -] - [[tool.mypy.overrides]] # This override silences errors for modules we import that don't currently # have type hints, or type stubs that cover them. We should go through this diff --git a/src/palace/manager/api/admin/controller/work_editor.py b/src/palace/manager/api/admin/controller/work_editor.py index c9c5292546..834d9d1283 100644 --- a/src/palace/manager/api/admin/controller/work_editor.py +++ b/src/palace/manager/api/admin/controller/work_editor.py @@ -27,12 +27,8 @@ LIBRARY_NOT_FOUND, REMOTE_INTEGRATION_FAILED, ) -from palace.manager.core.classifier import ( - NO_NUMBER, - NO_VALUE, - SimplifiedGenreClassifier, - genres, -) +from palace.manager.core.classifier import NO_NUMBER, NO_VALUE, genres +from palace.manager.core.classifier.simplified import SimplifiedGenreClassifier from palace.manager.feed.acquisition import OPDSAcquisitionFeed from palace.manager.feed.annotator.admin import AdminAnnotator from palace.manager.sqlalchemy.model.classification import ( diff --git a/src/palace/manager/core/classifier/__init__.py b/src/palace/manager/core/classifier/__init__.py index 636909220c..05b58fbd05 100644 --- a/src/palace/manager/core/classifier/__init__.py +++ b/src/palace/manager/core/classifier/__init__.py @@ -9,12 +9,7 @@ # SQL to find commonly used classifications not assigned to a genre # select count(identifiers.id) as c, subjects.type, substr(subjects.identifier, 0, 20) as i, substr(subjects.name, 0, 20) as n from workidentifiers join classifications on workidentifiers.id=classifications.work_identifier_id join subjects on classifications.subject_id=subjects.id where subjects.genre_id is null and subjects.fiction is null group by subjects.type, i, n order by c desc; - -import logging -import re -from collections import Counter - -from sqlalchemy.orm.session import Session +from __future__ import annotations from palace.manager.util.resources import resources_dir @@ -101,7 +96,7 @@ class Classifier(ClassifierConstants): if x != ClassifierConstants.AUDIENCE_RESEARCH ] - classifiers = dict() + classifiers: dict[str, type[Classifier]] = {} @classmethod def range_tuple(cls, lower, upper): @@ -316,139 +311,6 @@ def and_up(cls, young, keyword): return old -class GradeLevelClassifier(Classifier): - # How old a kid is when they start grade N in the US. - american_grade_to_age = { - # Preschool: 3-4 years - "preschool": 3, - "pre-school": 3, - "p": 3, - "pk": 4, - # Early readers - "kindergarten": 5, - "k": 5, - "0": 5, - "first": 6, - "1": 6, - "second": 7, - "2": 7, - # Chapter Books - "third": 8, - "3": 8, - "fourth": 9, - "4": 9, - "fifth": 10, - "5": 10, - "sixth": 11, - "6": 11, - "7": 12, - "8": 13, - # YA - "9": 14, - "10": 15, - "11": 16, - "12": 17, - } - - # Regular expressions that match common ways of expressing grade - # levels. - # TODO: Is this code duplicated in core/classifier/age.py? - grade_res = [ - re.compile(x, re.I) - for x in [ - "grades? ([kp0-9]+) to ([kp0-9]+)?", - "grades? ([kp0-9]+) ?-? ?([kp0-9]+)?", - r"gr\.? ([kp0-9]+) ?-? ?([kp0-9]+)?", - "grades?: ([kp0-9]+) to ([kp0-9]+)", - "grades?: ([kp0-9]+) ?-? ?([kp0-9]+)?", - r"gr\.? ([kp0-9]+)", - "([0-9]+)[tnsr][hdt] grade", - "([a-z]+) grade", - r"\b(kindergarten|preschool)\b", - ] - ] - - generic_grade_res = [ - re.compile(r"([kp0-9]+) ?- ?([0-9]+)", re.I), - re.compile(r"([kp0-9]+) ?to ?([0-9]+)", re.I), - re.compile(r"^([0-9]+)\b", re.I), - re.compile(r"^([kp])\b", re.I), - ] - - @classmethod - def audience(cls, identifier, name, require_explicit_age_marker=False): - target_age = cls.target_age(identifier, name, require_explicit_age_marker) - return cls.default_audience_for_target_age(target_age) - - @classmethod - def target_age(cls, identifier, name, require_explicit_grade_marker=False): - if (identifier and "education" in identifier) or (name and "education" in name): - # This is a book about teaching, e.g. fifth grade. - return cls.range_tuple(None, None) - - if (identifier and "grader" in identifier) or (name and "grader" in name): - # This is a book about, e.g. fifth graders. - return cls.range_tuple(None, None) - - if require_explicit_grade_marker: - res = cls.grade_res - else: - res = cls.grade_res + cls.generic_grade_res - - for r in res: - for k in identifier, name: - if not k: - continue - m = r.search(k) - if m: - gr = m.groups() - if len(gr) == 1: - young = gr[0] - old = None - else: - young, old = gr - - # Strip leading zeros - if young and young.lstrip("0"): - young = young.lstrip("0") - if old and old.lstrip("0"): - old = old.lstrip("0") - - young = cls.american_grade_to_age.get(young) - old = cls.american_grade_to_age.get(old) - - if not young and not old: - return cls.range_tuple(None, None) - - if young: - young = int(young) - if old: - old = int(old) - if old is None: - old = cls.and_up(young, k) - if old is None and young is not None: - old = young - if young is None and old is not None: - young = old - if old and young and old < young: - young, old = old, young - return cls.range_tuple(young, old) - return cls.range_tuple(None, None) - - @classmethod - def target_age_match(cls, query): - target_age = None - grade_words = None - target_age = cls.target_age(None, query, require_explicit_grade_marker=True) - if target_age: - for r in cls.grade_res: - match = r.search(query) - if match: - grade_words = match.group() - break - return (target_age, grade_words) - - # This is the large-scale structure of our classification system. # # If the name of a genre is a string, it's the name of the genre @@ -791,14 +653,25 @@ def add_genre( ) -Fantasy: GenreData -Romance: GenreData -Science_Fiction: GenreData -Contemporary_Romance: GenreData -Epic_Fantasy: GenreData +genres: dict[str, GenreData] = dict() +genres_by_variable_name: dict[str, GenreData] = dict() +GenreData.populate(genres_by_variable_name, genres, fiction_genres, nonfiction_genres) + -genres = dict() -GenreData.populate(globals(), genres, fiction_genres, nonfiction_genres) +# The structure of this module is to make all the GenreData objects available, +# for import anywhere in the codebase, as attributes of this module. This used +# to be done by adding them all to globals(), but this made type checking with +# mypy difficult. So instead we add them to a dictionary and then use __getattr__ +# to make them available as attributes of the module. This lets mypy know that +# unknown attributes are actually GenreData objects. +# +# See this stackoverflow answer for more details: +# https://stackoverflow.com/questions/60739889/bypass-mypys-module-has-no-attribute-on-dynamic-attribute-setting +# +# TODO: Eventually I'd like to refactor this, so we don't have to use __getattr__ +# here, and can just import the GenreData objects directly. +def __getattr__(name: str) -> GenreData: + return genres_by_variable_name[name] class Lowercased(str): @@ -821,710 +694,3 @@ def __new__(cls, value): def scrub_identifier(cls, identifier): if not identifier: return identifier - - -class AgeOrGradeClassifier(Classifier): - @classmethod - def audience(cls, identifier, name): - audience = AgeClassifier.audience(identifier, name) - if audience == None: - audience = GradeLevelClassifier.audience(identifier, name) - return audience - - @classmethod - def target_age(cls, identifier, name): - """This tag might contain a grade level, an age in years, or nothing. - We will try both a grade level and an age in years, but we - will require that the tag indicate what's being measured. A - tag like "9-12" will not match anything because we don't know if it's - age 9-12 or grade 9-12. - """ - age = AgeClassifier.target_age(identifier, name, True) - if age == cls.range_tuple(None, None): - age = GradeLevelClassifier.target_age(identifier, name, True) - return age - - -class FreeformAudienceClassifier(AgeOrGradeClassifier): - # NOTE: In practice, subjects like "books for all ages" tend to be - # more like advertising slogans than reliable indicators of an - # ALL_AGES audience. So the only subject of this type we handle is - # the literal string "all ages", as it would appear, e.g., in the - # output of the metadata wrangler. - - @classmethod - def audience(cls, identifier, name): - if identifier in ("children", "pre-adolescent", "beginning reader"): - return cls.AUDIENCE_CHILDREN - elif identifier in ( - "young adult", - "ya", - "teenagers", - "adolescent", - "early adolescents", - ): - return cls.AUDIENCE_YOUNG_ADULT - elif identifier == "adult": - return cls.AUDIENCE_ADULT - elif identifier == "adults only": - return cls.AUDIENCE_ADULTS_ONLY - elif identifier == "all ages": - return cls.AUDIENCE_ALL_AGES - elif identifier == "research": - return cls.AUDIENCE_RESEARCH - return AgeOrGradeClassifier.audience(identifier, name) - - @classmethod - def target_age(cls, identifier, name): - if identifier == "beginning reader": - return cls.range_tuple(5, 8) - if identifier == "pre-adolescent": - return cls.range_tuple(9, 12) - if identifier == "early adolescents": - return cls.range_tuple(13, 15) - if identifier == "all ages": - return cls.range_tuple(cls.ALL_AGES_AGE_CUTOFF, None) - strict_age = AgeClassifier.target_age(identifier, name, True) - if strict_age[0] or strict_age[1]: - return strict_age - - strict_grade = GradeLevelClassifier.target_age(identifier, name, True) - if strict_grade[0] or strict_grade[1]: - return strict_grade - - # Default to assuming it's an unmarked age. - return AgeClassifier.target_age(identifier, name, False) - - -class WorkClassifier: - """Boil down a bunch of Classification objects into a few values.""" - - # TODO: This needs a lot of additions. - genre_publishers = { - "Harlequin": Romance, - "Pocket Books/Star Trek": Media_Tie_in_SF, - "Kensington": Urban_Fiction, - "Fodor's Travel Publications": Travel, - "Marvel Entertainment, LLC": Comics_Graphic_Novels, - } - - genre_imprints = { - "Harlequin Intrigue": Romantic_Suspense, - "Love Inspired Suspense": Romantic_Suspense, - "Harlequin Historical": Historical_Romance, - "Harlequin Historical Undone": Historical_Romance, - "Frommers": Travel, - "LucasBooks": Media_Tie_in_SF, - } - - audience_imprints = { - "Harlequin Teen": Classifier.AUDIENCE_YOUNG_ADULT, - "HarperTeen": Classifier.AUDIENCE_YOUNG_ADULT, - "Open Road Media Teen & Tween": Classifier.AUDIENCE_YOUNG_ADULT, - "Rosen Young Adult": Classifier.AUDIENCE_YOUNG_ADULT, - } - - not_adult_publishers = { - "Scholastic Inc.", - "Random House Children's Books", - "Little, Brown Books for Young Readers", - "Penguin Young Readers Group", - "Hachette Children's Books", - "Nickelodeon Publishing", - } - - not_adult_imprints = { - "Scholastic", - "Scholastic Paperbacks", - "Random House Books for Young Readers", - "HMH Books for Young Readers", - "Knopf Books for Young Readers", - "Delacorte Books for Young Readers", - "Open Road Media Young Readers", - "Macmillan Young Listeners", - "Bloomsbury Childrens", - "NYR Children's Collection", - "Bloomsbury USA Childrens", - "National Geographic Children's Books", - } - - fiction_imprints = {"Del Rey"} - nonfiction_imprints = {"Harlequin Nonfiction"} - - nonfiction_publishers = {"Wiley"} - fiction_publishers = set() - - def __init__(self, work, test_session=None, debug=False): - self._db = Session.object_session(work) - if test_session: - self._db = test_session - self.work = work - self.fiction_weights = Counter() - self.audience_weights = Counter() - self.target_age_lower_weights = Counter() - self.target_age_upper_weights = Counter() - self.genre_weights = Counter() - self.direct_from_license_source = set() - self.prepared = False - self.debug = debug - self.classifications = [] - self.seen_classifications = set() - self.log = logging.getLogger("Classifier (workid=%d)" % self.work.id) - self.using_staff_genres = False - self.using_staff_fiction_status = False - self.using_staff_audience = False - self.using_staff_target_age = False - - # Keep track of whether we've seen one of Overdrive's generic - # "Juvenile" classifications, as well as its more specific - # subsets like "Picture Books" and "Beginning Readers" - self.overdrive_juvenile_generic = False - self.overdrive_juvenile_with_target_age = False - - def add(self, classification): - """Prepare a single Classification for consideration.""" - - # We only consider a given classification once from a given - # data source. - key = (classification.subject, classification.data_source) - if key in self.seen_classifications: - return - self.seen_classifications.add(key) - if self.debug: - self.classifications.append(classification) - - # Make sure the Subject is ready to be used in calculations. - if not classification.subject.checked: # or self.debug - classification.subject.assign_to_genre() - - if classification.comes_from_license_source: - self.direct_from_license_source.add(classification) - else: - if classification.subject.describes_format: - # TODO: This is a bit of a hack. - # - # Only accept a classification having to do with - # format (e.g. 'comic books') if that classification - # comes direct from the license source. Otherwise it's - # really easy for a graphic adaptation of a novel to - # get mixed up with the original novel, whereupon the - # original book is classified as a graphic novel. - return - - # Put the weight of the classification behind various - # considerations. - weight = classification.scaled_weight - subject = classification.subject - from palace.manager.sqlalchemy.model.datasource import DataSource - - from_staff = classification.data_source.name == DataSource.LIBRARY_STAFF - - # if classification is genre or NONE from staff, ignore all non-staff genres - is_genre = subject.genre != None - from palace.manager.sqlalchemy.model.classification import Subject - - is_none = ( - from_staff - and subject.type == Subject.SIMPLIFIED_GENRE - and subject.identifier == SimplifiedGenreClassifier.NONE - ) - if is_genre or is_none: - if not from_staff and self.using_staff_genres: - return - if from_staff and not self.using_staff_genres: - # first encounter with staff genre, so throw out existing genre weights - self.using_staff_genres = True - self.genre_weights = Counter() - if is_genre: - self.weigh_genre(subject.genre, weight) - - # if staff classification is fiction or nonfiction, ignore all other fictions - if not self.using_staff_fiction_status: - if from_staff and subject.type == Subject.SIMPLIFIED_FICTION_STATUS: - # encountering first staff fiction status, - # so throw out existing fiction weights - self.using_staff_fiction_status = True - self.fiction_weights = Counter() - self.fiction_weights[subject.fiction] += weight - - # if staff classification is about audience, ignore all other audience classifications - if not self.using_staff_audience: - if from_staff and subject.type == Subject.FREEFORM_AUDIENCE: - self.using_staff_audience = True - self.audience_weights = Counter() - self.audience_weights[subject.audience] += weight - else: - if classification.generic_juvenile_audience: - # We have a generic 'juvenile' classification. The - # audience might say 'Children' or it might say 'Young - # Adult' but we don't actually know which it is. - # - # We're going to split the difference, with a slight - # preference for YA, to bias against showing - # age-inappropriate material to children. To - # counterbalance the fact that we're splitting up the - # weight this way, we're also going to treat this - # classification as evidence _against_ an 'adult' - # classification. - self.audience_weights[Classifier.AUDIENCE_YOUNG_ADULT] += ( - weight * 0.6 - ) - self.audience_weights[Classifier.AUDIENCE_CHILDREN] += weight * 0.4 - for audience in Classifier.AUDIENCES_ADULT: - if audience != Classifier.AUDIENCE_ALL_AGES: - # 'All Ages' is considered an adult audience, - # but a generic 'juvenile' classification - # is not evidence against it. - self.audience_weights[audience] -= weight * 0.5 - else: - self.audience_weights[subject.audience] += weight - - if not self.using_staff_target_age: - if from_staff and subject.type == Subject.AGE_RANGE: - self.using_staff_target_age = True - self.target_age_lower_weights = Counter() - self.target_age_upper_weights = Counter() - if subject.target_age: - # Figure out how reliable this classification really is as - # an indicator of a target age. - scaled_weight = classification.weight_as_indicator_of_target_age - target_min = subject.target_age.lower - target_max = subject.target_age.upper - if target_min is not None: - if not subject.target_age.lower_inc: - target_min += 1 - self.target_age_lower_weights[target_min] += scaled_weight - if target_max is not None: - if not subject.target_age.upper_inc: - target_max -= 1 - self.target_age_upper_weights[target_max] += scaled_weight - - if not self.using_staff_audience and not self.using_staff_target_age: - if ( - subject.type == "Overdrive" - and subject.audience == Classifier.AUDIENCE_CHILDREN - ): - if subject.target_age and ( - subject.target_age.lower or subject.target_age.upper - ): - # This is a juvenile classification like "Picture - # Books" which implies a target age. - self.overdrive_juvenile_with_target_age = classification - else: - # This is a generic juvenile classification like - # "Juvenile Fiction". - self.overdrive_juvenile_generic = classification - - def weigh_metadata(self): - """Modify the weights according to the given Work's metadata. - - Use work metadata to simulate classifications. - - This is basic stuff, like: Harlequin tends to publish - romances. - """ - if self.work.title and ( - "Star Trek:" in self.work.title - or "Star Wars:" in self.work.title - or ("Jedi" in self.work.title and self.work.imprint == "Del Rey") - ): - self.weigh_genre(Media_Tie_in_SF, 100) - - publisher = self.work.publisher - imprint = self.work.imprint - if ( - imprint in self.nonfiction_imprints - or publisher in self.nonfiction_publishers - ): - self.fiction_weights[False] = 100 - elif imprint in self.fiction_imprints or publisher in self.fiction_publishers: - self.fiction_weights[True] = 100 - - if imprint in self.genre_imprints: - self.weigh_genre(self.genre_imprints[imprint], 100) - elif publisher in self.genre_publishers: - self.weigh_genre(self.genre_publishers[publisher], 100) - - if imprint in self.audience_imprints: - self.audience_weights[self.audience_imprints[imprint]] += 100 - elif ( - publisher in self.not_adult_publishers or imprint in self.not_adult_imprints - ): - for audience in [ - Classifier.AUDIENCE_ADULT, - Classifier.AUDIENCE_ADULTS_ONLY, - ]: - self.audience_weights[audience] -= 100 - - def prepare_to_classify(self): - """Called the first time classify() is called. Does miscellaneous - one-time prep work that requires all data to be in place. - """ - self.weigh_metadata() - - explicitly_indicated_audiences = ( - Classifier.AUDIENCE_CHILDREN, - Classifier.AUDIENCE_YOUNG_ADULT, - Classifier.AUDIENCE_ADULTS_ONLY, - ) - audiences_from_license_source = { - classification.subject.audience - for classification in self.direct_from_license_source - } - if ( - self.direct_from_license_source - and not self.using_staff_audience - and not any( - audience in explicitly_indicated_audiences - for audience in audiences_from_license_source - ) - ): - # If this was erotica, or a book for children or young - # adults, the distributor would have given some indication - # of that fact. In the absense of any such indication, we - # can assume very strongly that this is a regular old book - # for adults. - # - # 3M is terrible at distinguishing between childrens' - # books and YA books, but books for adults can be - # distinguished by their _lack_ of childrens/YA - # classifications. - self.audience_weights[Classifier.AUDIENCE_ADULT] += 500 - - if ( - self.overdrive_juvenile_generic - and not self.overdrive_juvenile_with_target_age - ): - # This book is classified under 'Juvenile Fiction' but not - # under 'Picture Books' or 'Beginning Readers'. The - # implicit target age here is 9-12 (the portion of - # Overdrive's 'juvenile' age range not covered by 'Picture - # Books' or 'Beginning Readers'. - weight = self.overdrive_juvenile_generic.weight_as_indicator_of_target_age - self.target_age_lower_weights[9] += weight - self.target_age_upper_weights[12] += weight - - self.prepared = True - - def classify(self, default_fiction=None, default_audience=None): - # Do a little prep work. - if not self.prepared: - self.prepare_to_classify() - - if self.debug: - for c in self.classifications: - self.log.debug( - "%d %r (via %s)", c.weight, c.subject, c.data_source.name - ) - - # Actually figure out the classifications - fiction = self.fiction(default_fiction=default_fiction) - genres = self.genres(fiction) - audience = self.audience(genres, default_audience=default_audience) - target_age = self.target_age(audience) - if self.debug: - self.log.debug("Fiction weights:") - for k, v in self.fiction_weights.most_common(): - self.log.debug(" %s: %s", v, k) - self.log.debug("Genre weights:") - for k, v in self.genre_weights.most_common(): - self.log.debug(" %s: %s", v, k) - self.log.debug("Audience weights:") - for k, v in self.audience_weights.most_common(): - self.log.debug(" %s: %s", v, k) - return genres, fiction, audience, target_age - - def fiction(self, default_fiction=None): - """Is it more likely this is a fiction or nonfiction book?""" - if not self.fiction_weights: - # We have absolutely no idea one way or the other, and it - # would be irresponsible to guess. - return default_fiction - is_fiction = default_fiction - if self.fiction_weights[True] > self.fiction_weights[False]: - is_fiction = True - elif self.fiction_weights[False] > 0: - is_fiction = False - return is_fiction - - def audience(self, genres=[], default_audience=None): - """What's the most likely audience for this book? - :param default_audience: To avoid embarassing situations we will - classify works as being intended for adults absent convincing - evidence to the contrary. In some situations (like the metadata - wrangler), it's better to state that we have no information, so - default_audience can be set to None. - """ - - # If we determined that Erotica was a significant enough - # component of the classification to count as a genre, the - # audience will always be 'Adults Only', even if the audience - # weights would indicate something else. - if Erotica in genres: - return Classifier.AUDIENCE_ADULTS_ONLY - - w = self.audience_weights - if not self.audience_weights: - # We have absolutely no idea, and it would be - # irresponsible to guess. - return default_audience - - children_weight = w.get(Classifier.AUDIENCE_CHILDREN, 0) - ya_weight = w.get(Classifier.AUDIENCE_YOUNG_ADULT, 0) - adult_weight = w.get(Classifier.AUDIENCE_ADULT, 0) - adults_only_weight = w.get(Classifier.AUDIENCE_ADULTS_ONLY, 0) - all_ages_weight = w.get(Classifier.AUDIENCE_ALL_AGES, 0) - research_weight = w.get(Classifier.AUDIENCE_RESEARCH, 0) - - total_adult_weight = adult_weight + adults_only_weight - total_weight = sum(w.values()) - - audience = default_audience - - # A book will be classified as a young adult or childrens' - # book when the weight of that audience is more than twice the - # combined weight of the 'adult' and 'adults only' audiences. - # If that combined weight is zero, then any amount of evidence - # is sufficient. - threshold = total_adult_weight * 2 - - # If both the 'children' weight and the 'YA' weight pass the - # threshold, we go with the one that weighs more. - # If the 'children' weight passes the threshold on its own - # we go with 'children'. - total_juvenile_weight = children_weight + ya_weight - if ( - research_weight > (total_adult_weight + all_ages_weight) - and research_weight > (total_juvenile_weight + all_ages_weight) - and research_weight > threshold - ): - audience = Classifier.AUDIENCE_RESEARCH - elif ( - all_ages_weight > total_adult_weight - and all_ages_weight > total_juvenile_weight - ): - audience = Classifier.AUDIENCE_ALL_AGES - elif children_weight > threshold and children_weight > ya_weight: - audience = Classifier.AUDIENCE_CHILDREN - elif ya_weight > threshold: - audience = Classifier.AUDIENCE_YOUNG_ADULT - elif total_juvenile_weight > threshold: - # Neither weight passes the threshold on its own, but - # combined they do pass the threshold. Go with - # 'Young Adult' to be safe. - audience = Classifier.AUDIENCE_YOUNG_ADULT - elif total_adult_weight > 0: - audience = Classifier.AUDIENCE_ADULT - - # If the 'adults only' weight is more than 1/4 of the total adult - # weight, classify as 'adults only' to be safe. - # - # TODO: This has not been calibrated. - if ( - audience == Classifier.AUDIENCE_ADULT - and adults_only_weight > total_adult_weight / 4 - ): - audience = Classifier.AUDIENCE_ADULTS_ONLY - - return audience - - @classmethod - def top_tier_values(self, counter): - """Given a Counter mapping values to their frequency of occurance, - return all values that are as common as the most common value. - """ - top_frequency = None - top_tier = set() - for age, freq in counter.most_common(): - if not top_frequency: - top_frequency = freq - if freq != top_frequency: - # We've run out of candidates - break - else: - # This candidate occurs with the maximum frequency. - top_tier.add(age) - return top_tier - - def target_age(self, audience): - """Derive a target age from the gathered data.""" - if audience not in ( - Classifier.AUDIENCE_CHILDREN, - Classifier.AUDIENCE_YOUNG_ADULT, - ): - # This is not a children's or YA book. Assertions about - # target age are irrelevant and the default value rules. - return Classifier.default_target_age_for_audience(audience) - - # Only consider the most reliable classifications. - - # Try to reach consensus on the lower and upper bounds of the - # age range. - if self.debug: - if self.target_age_lower_weights: - self.log.debug("Possible target age minima:") - for k, v in self.target_age_lower_weights.most_common(): - self.log.debug(" %s: %s", v, k) - if self.target_age_upper_weights: - self.log.debug("Possible target age maxima:") - for k, v in self.target_age_upper_weights.most_common(): - self.log.debug(" %s: %s", v, k) - - target_age_min = None - target_age_max = None - if self.target_age_lower_weights: - # Find the youngest age in the top tier of values. - candidates = self.top_tier_values(self.target_age_lower_weights) - target_age_min = min(candidates) - - if self.target_age_upper_weights: - # Find the oldest age in the top tier of values. - candidates = self.top_tier_values(self.target_age_upper_weights) - target_age_max = max(candidates) - - if not target_age_min and not target_age_max: - # We found no opinions about target age. Use the default. - return Classifier.default_target_age_for_audience(audience) - - if target_age_min is None: - target_age_min = target_age_max - - if target_age_max is None: - target_age_max = target_age_min - - # Err on the side of setting the minimum age too high. - if target_age_min > target_age_max: - target_age_max = target_age_min - return Classifier.range_tuple(target_age_min, target_age_max) - - def genres(self, fiction, cutoff=0.15): - """Consolidate genres and apply a low-pass filter.""" - # Remove any genres whose fiction status is inconsistent with the - # (independently determined) fiction status of the book. - # - # It doesn't matter if a book is classified as 'science - # fiction' 100 times; if we know it's nonfiction, it can't be - # science fiction. (It's probably a history of science fiction - # or something.) - genres = dict(self.genre_weights) - if not genres: - # We have absolutely no idea, and it would be - # irresponsible to guess. - return {} - - for genre in list(genres.keys()): - # If we have a fiction determination, that lets us eliminate - # possible genres that conflict with that determination. - # - # TODO: If we don't have a fiction determination, the - # genres we end up with may help us make one. - if fiction is not None and (genre.default_fiction != fiction): - del genres[genre] - - # Consolidate parent genres into their heaviest subgenre. - genres = self.consolidate_genre_weights(genres) - total_weight = float(sum(genres.values())) - - # Strip out the stragglers. - for g, score in list(genres.items()): - affinity = score / total_weight - if affinity < cutoff: - total_weight -= score - del genres[g] - return genres - - def weigh_genre(self, genre_data, weight): - """A helper method that ensure we always use database Genre - objects, not GenreData objects, when weighting genres. - """ - from palace.manager.sqlalchemy.model.classification import Genre - - genre, ignore = Genre.lookup(self._db, genre_data.name) - self.genre_weights[genre] += weight - - @classmethod - def consolidate_genre_weights(cls, weights, subgenre_swallows_parent_at=0.03): - """If a genre and its subgenres both show up, examine the subgenre - with the highest weight. If its weight exceeds a certain - proportion of the weight of the parent genre, assign the - parent's weight to the subgenre and remove the parent. - """ - # print("Before consolidation:") - # for genre, weight in weights.items(): - # print("", genre, weight) - - # Convert Genre objects to GenreData. - consolidated = Counter() - for genre, weight in list(weights.items()): - if not isinstance(genre, GenreData): - genre = genres[genre.name] - consolidated[genre] += weight - - heaviest_child = dict() - for genre, weight in list(consolidated.items()): - for parent in genre.parents: - if parent in consolidated: - if (not parent in heaviest_child) or weight > heaviest_child[ - parent - ][1]: - heaviest_child[parent] = (genre, weight) - # print("Heaviest child:") - # for parent, (genre, weight) in heaviest_child.items(): - # print("", parent, genre, weight) - made_it = False - while not made_it: - for parent, (child, weight) in sorted( - heaviest_child.items(), key=lambda genre: genre[1][1], reverse=True - ): - parent_weight = consolidated.get(parent, 0) - if weight > (subgenre_swallows_parent_at * parent_weight): - consolidated[child] += parent_weight - del consolidated[parent] - changed = False - for parent in parent.parents: - if parent in heaviest_child: - heaviest_child[parent] = (child, consolidated[child]) - changed = True - if changed: - # We changed the dict, so we need to restart - # the iteration. - break - # We made it all the way through the dict without changing it. - made_it = True - # print("Final heaviest child:") - # for parent, (genre, weight) in heaviest_child.items(): - # print("", parent, genre, weight) - # print("After consolidation:") - # for genre, weight in consolidated.items(): - # print("", genre, weight) - return consolidated - - -# Make a dictionary of classification schemes to classifiers. - -Classifier.classifiers[Classifier.FREEFORM_AUDIENCE] = FreeformAudienceClassifier -Classifier.classifiers[Classifier.AXIS_360_AUDIENCE] = AgeOrGradeClassifier - -# Finally, import classifiers described in submodules. -from palace.manager.core.classifier.age import ( - AgeClassifier, - GradeLevelClassifier, - InterestLevelClassifier, -) -from palace.manager.core.classifier.bic import BICClassifier -from palace.manager.core.classifier.bisac import BISACClassifier -from palace.manager.core.classifier.ddc import DeweyDecimalClassifier -from palace.manager.core.classifier.gutenberg import GutenbergBookshelfClassifier -from palace.manager.core.classifier.keyword import ( - Eg, - FASTClassifier, - KeywordBasedClassifier, - LCSHClassifier, - TAGClassifier, -) -from palace.manager.core.classifier.lcc import LCCClassifier -from palace.manager.core.classifier.overdrive import OverdriveClassifier -from palace.manager.core.classifier.simplified import ( - SimplifiedFictionClassifier, - SimplifiedGenreClassifier, -) diff --git a/src/palace/manager/core/classifier/age.py b/src/palace/manager/core/classifier/age.py index 396b0f3cd3..234c7d4473 100644 --- a/src/palace/manager/core/classifier/age.py +++ b/src/palace/manager/core/classifier/age.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import re from palace.manager.core.classifier import Classifier @@ -244,6 +246,81 @@ def target_age_match(cls, query): return (target_age, age_words) +class AgeOrGradeClassifier(Classifier): + @classmethod + def audience(cls, identifier, name): + audience = AgeClassifier.audience(identifier, name) + if audience == None: + audience = GradeLevelClassifier.audience(identifier, name) + return audience + + @classmethod + def target_age(cls, identifier, name): + """This tag might contain a grade level, an age in years, or nothing. + We will try both a grade level and an age in years, but we + will require that the tag indicate what's being measured. A + tag like "9-12" will not match anything because we don't know if it's + age 9-12 or grade 9-12. + """ + age = AgeClassifier.target_age(identifier, name, True) + if age == cls.range_tuple(None, None): + age = GradeLevelClassifier.target_age(identifier, name, True) + return age + + +class FreeformAudienceClassifier(AgeOrGradeClassifier): + # NOTE: In practice, subjects like "books for all ages" tend to be + # more like advertising slogans than reliable indicators of an + # ALL_AGES audience. So the only subject of this type we handle is + # the literal string "all ages", as it would appear, e.g., in the + # output of the metadata wrangler. + + @classmethod + def audience(cls, identifier, name): + if identifier in ("children", "pre-adolescent", "beginning reader"): + return cls.AUDIENCE_CHILDREN + elif identifier in ( + "young adult", + "ya", + "teenagers", + "adolescent", + "early adolescents", + ): + return cls.AUDIENCE_YOUNG_ADULT + elif identifier == "adult": + return cls.AUDIENCE_ADULT + elif identifier == "adults only": + return cls.AUDIENCE_ADULTS_ONLY + elif identifier == "all ages": + return cls.AUDIENCE_ALL_AGES + elif identifier == "research": + return cls.AUDIENCE_RESEARCH + return AgeOrGradeClassifier.audience(identifier, name) + + @classmethod + def target_age(cls, identifier, name): + if identifier == "beginning reader": + return cls.range_tuple(5, 8) + if identifier == "pre-adolescent": + return cls.range_tuple(9, 12) + if identifier == "early adolescents": + return cls.range_tuple(13, 15) + if identifier == "all ages": + return cls.range_tuple(cls.ALL_AGES_AGE_CUTOFF, None) + strict_age = AgeClassifier.target_age(identifier, name, True) + if strict_age[0] or strict_age[1]: + return strict_age + + strict_grade = GradeLevelClassifier.target_age(identifier, name, True) + if strict_grade[0] or strict_grade[1]: + return strict_grade + + # Default to assuming it's an unmarked age. + return AgeClassifier.target_age(identifier, name, False) + + Classifier.classifiers[Classifier.AGE_RANGE] = AgeClassifier Classifier.classifiers[Classifier.GRADE_LEVEL] = GradeLevelClassifier Classifier.classifiers[Classifier.INTEREST_LEVEL] = InterestLevelClassifier +Classifier.classifiers[Classifier.FREEFORM_AUDIENCE] = FreeformAudienceClassifier +Classifier.classifiers[Classifier.AXIS_360_AUDIENCE] = AgeOrGradeClassifier diff --git a/src/palace/manager/core/classifier/bic.py b/src/palace/manager/core/classifier/bic.py index 7c5e82f772..867fd31018 100644 --- a/src/palace/manager/core/classifier/bic.py +++ b/src/palace/manager/core/classifier/bic.py @@ -1,113 +1,114 @@ -from palace.manager.core.classifier import * +from palace.manager.core import classifier +from palace.manager.core.classifier import Classifier class BICClassifier(Classifier): # These prefixes came from from http://editeur.dyndns.org/bic_categories LEVEL_1_PREFIXES = { - Art_Design: "A", - Biography_Memoir: "B", - Foreign_Language_Study: "C", - Literary_Criticism: "D", - Reference_Study_Aids: "G", - Social_Sciences: "J", - Personal_Finance_Business: "K", - Law: "L", - Medical: "M", - Science_Technology: "P", - Technology: "T", - Computers: "U", + classifier.Art_Design: "A", + classifier.Biography_Memoir: "B", + classifier.Foreign_Language_Study: "C", + classifier.Literary_Criticism: "D", + classifier.Reference_Study_Aids: "G", + classifier.Social_Sciences: "J", + classifier.Personal_Finance_Business: "K", + classifier.Law: "L", + classifier.Medical: "M", + classifier.Science_Technology: "P", + classifier.Technology: "T", + classifier.Computers: "U", } LEVEL_2_PREFIXES = { - Art_History: "AC", - Photography: "AJ", - Design: "AK", - Architecture: "AM", - Film_TV: "AP", - Performing_Arts: "AS", - Music: "AV", - Poetry: "DC", - Drama: "DD", - Classics: "FC", - Mystery: "FF", - Suspense_Thriller: "FH", - Adventure: "FJ", - Horror: "FK", - Science_Fiction: "FL", - Fantasy: "FM", - Erotica: "FP", - Romance: "FR", - Historical_Fiction: "FV", - Religious_Fiction: "FW", - Comics_Graphic_Novels: "FX", - History: "HB", - Philosophy: "HP", - Religion_Spirituality: "HR", - Psychology: "JM", - Education: "JN", - Political_Science: "JP", - Economics: "KC", - Business: "KJ", - Mathematics: "PB", - Science: "PD", - Self_Help: "VS", - Body_Mind_Spirit: "VX", - Food_Health: "WB", - Antiques_Collectibles: "WC", - Crafts_Hobbies: "WF", - Humorous_Nonfiction: "WH", - House_Home: "WK", - Gardening: "WM", - Nature: "WN", - Sports: "WS", - Travel: "WT", + classifier.Art_History: "AC", + classifier.Photography: "AJ", + classifier.Design: "AK", + classifier.Architecture: "AM", + classifier.Film_TV: "AP", + classifier.Performing_Arts: "AS", + classifier.Music: "AV", + classifier.Poetry: "DC", + classifier.Drama: "DD", + classifier.Classics: "FC", + classifier.Mystery: "FF", + classifier.Suspense_Thriller: "FH", + classifier.Adventure: "FJ", + classifier.Horror: "FK", + classifier.Science_Fiction: "FL", + classifier.Fantasy: "FM", + classifier.Erotica: "FP", + classifier.Romance: "FR", + classifier.Historical_Fiction: "FV", + classifier.Religious_Fiction: "FW", + classifier.Comics_Graphic_Novels: "FX", + classifier.History: "HB", + classifier.Philosophy: "HP", + classifier.Religion_Spirituality: "HR", + classifier.Psychology: "JM", + classifier.Education: "JN", + classifier.Political_Science: "JP", + classifier.Economics: "KC", + classifier.Business: "KJ", + classifier.Mathematics: "PB", + classifier.Science: "PD", + classifier.Self_Help: "VS", + classifier.Body_Mind_Spirit: "VX", + classifier.Food_Health: "WB", + classifier.Antiques_Collectibles: "WC", + classifier.Crafts_Hobbies: "WF", + classifier.Humorous_Nonfiction: "WH", + classifier.House_Home: "WK", + classifier.Gardening: "WM", + classifier.Nature: "WN", + classifier.Sports: "WS", + classifier.Travel: "WT", } LEVEL_3_PREFIXES = { - Historical_Mystery: "FFH", - Espionage: "FHD", - Westerns: "FJW", - Space_Opera: "FLS", - Historical_Romance: "FRH", - Short_Stories: "FYB", - World_History: "HBG", - Military_History: "HBW", - Christianity: "HRC", - Buddhism: "HRE", - Hinduism: "HRG", - Islam: "HRH", - Judaism: "HRJ", - Fashion: "WJF", - Poetry: "YDP", - Adventure: "YFC", - Horror: "YFD", - Science_Fiction: "YFG", - Fantasy: "YFH", - Romance: "YFM", - Humorous_Fiction: "YFQ", - Historical_Fiction: "YFT", - Comics_Graphic_Novels: "YFW", - Art: "YNA", - Music: "YNC", - Performing_Arts: "YND", - Film_TV: "YNF", - History: "YNH", - Nature: "YNN", - Religion_Spirituality: "YNR", - Science_Technology: "YNT", - Humorous_Nonfiction: "YNU", - Sports: "YNW", + classifier.Historical_Mystery: "FFH", + classifier.Espionage: "FHD", + classifier.Westerns: "FJW", + classifier.Space_Opera: "FLS", + classifier.Historical_Romance: "FRH", + classifier.Short_Stories: "FYB", + classifier.World_History: "HBG", + classifier.Military_History: "HBW", + classifier.Christianity: "HRC", + classifier.Buddhism: "HRE", + classifier.Hinduism: "HRG", + classifier.Islam: "HRH", + classifier.Judaism: "HRJ", + classifier.Fashion: "WJF", + classifier.Poetry: "YDP", + classifier.Adventure: "YFC", + classifier.Horror: "YFD", + classifier.Science_Fiction: "YFG", + classifier.Fantasy: "YFH", + classifier.Romance: "YFM", + classifier.Humorous_Fiction: "YFQ", + classifier.Historical_Fiction: "YFT", + classifier.Comics_Graphic_Novels: "YFW", + classifier.Art: "YNA", + classifier.Music: "YNC", + classifier.Performing_Arts: "YND", + classifier.Film_TV: "YNF", + classifier.History: "YNH", + classifier.Nature: "YNN", + classifier.Religion_Spirituality: "YNR", + classifier.Science_Technology: "YNT", + classifier.Humorous_Nonfiction: "YNU", + classifier.Sports: "YNW", } LEVEL_4_PREFIXES = { - European_History: "HBJD", - Asian_History: "HBJF", - African_History: "HBJH", - Ancient_History: "HBLA", - Modern_History: "HBLL", - Drama: "YNDS", - Comics_Graphic_Novels: "YNUC", + classifier.European_History: "HBJD", + classifier.Asian_History: "HBJF", + classifier.African_History: "HBJH", + classifier.Ancient_History: "HBLA", + classifier.Modern_History: "HBLL", + classifier.Drama: "YNDS", + classifier.Comics_Graphic_Novels: "YNUC", } PREFIX_LISTS = [ diff --git a/src/palace/manager/core/classifier/bisac.py b/src/palace/manager/core/classifier/bisac.py index 3fa4f8f92d..74b3a8d90e 100644 --- a/src/palace/manager/core/classifier/bisac.py +++ b/src/palace/manager/core/classifier/bisac.py @@ -1,7 +1,12 @@ import csv import re -from palace.manager.core.classifier import * +from palace.manager.core import classifier +from palace.manager.core.classifier import ( + Classifier, + Lowercased, + classifier_resources_dir, +) from palace.manager.core.classifier.keyword import KeywordBasedClassifier @@ -290,116 +295,126 @@ class BISACClassifier(Classifier): GENRE = [ # Put all erotica in Erotica, to keep the other lanes at # "Adult" level or lower. - m(Erotica, anything, "Erotica"), + m(classifier.Erotica, anything, "Erotica"), # Put all non-erotica comics into the same bucket, regardless # of their content. - m(Comics_Graphic_Novels, "Comics & Graphic Novels"), - m(Comics_Graphic_Novels, nonfiction, "Comics & Graphic Novels"), - m(Comics_Graphic_Novels, fiction, "Comics & Graphic Novels"), + m(classifier.Comics_Graphic_Novels, "Comics & Graphic Novels"), + m(classifier.Comics_Graphic_Novels, nonfiction, "Comics & Graphic Novels"), + m(classifier.Comics_Graphic_Novels, fiction, "Comics & Graphic Novels"), # "Literary Criticism / Foo" implies Literary Criticism, not Foo. - m(Literary_Criticism, anything, literary_criticism), + m(classifier.Literary_Criticism, anything, literary_criticism), # "Fiction / Christian / Foo" implies Religious Fiction # more strongly than it implies Foo. - m(Religious_Fiction, fiction, anything, "Christian"), + m(classifier.Religious_Fiction, fiction, anything, "Christian"), # "Fiction / Foo / Short Stories" implies Short Stories more # strongly than it implies Foo. This assumes that a short # story collection within a genre will also be classified # separately under that genre. This could definitely be # improved but would require a Subject to map to multiple # Genres. - m(Short_Stories, fiction, anything, RE("^Anthologies")), - m(Short_Stories, fiction, anything, RE("^Short Stories")), - m(Short_Stories, "Literary Collections"), - m(Short_Stories, fiction, anything, "Collections & Anthologies"), + m(classifier.Short_Stories, fiction, anything, RE("^Anthologies")), + m(classifier.Short_Stories, fiction, anything, RE("^Short Stories")), + m(classifier.Short_Stories, "Literary Collections"), + m(classifier.Short_Stories, fiction, anything, "Collections & Anthologies"), # Classify top-level fiction categories into fiction genres. # # First, handle large overarching genres that have subgenres # and adjacent genres. # # Fantasy - m(Epic_Fantasy, fiction, "Fantasy", "Epic"), - m(Historical_Fantasy, fiction, "Fantasy", "Historical"), - m(Urban_Fantasy, fiction, "Fantasy", "Urban"), - m(Fantasy, fiction, "Fantasy"), - m(Fantasy, fiction, "Romance", "Fantasy"), - m(Fantasy, fiction, "Sagas"), + m(classifier.Epic_Fantasy, fiction, "Fantasy", "Epic"), + m(classifier.Historical_Fantasy, fiction, "Fantasy", "Historical"), + m(classifier.Urban_Fantasy, fiction, "Fantasy", "Urban"), + m(classifier.Fantasy, fiction, "Fantasy"), + m(classifier.Fantasy, fiction, "Romance", "Fantasy"), + m(classifier.Fantasy, fiction, "Sagas"), # Mystery # n.b. no BISAC for Paranormal_Mystery m( - Crime_Detective_Stories, + classifier.Crime_Detective_Stories, fiction, "Mystery & Detective", "Private Investigators", ), - m(Crime_Detective_Stories, fiction, "Crime"), - m(Crime_Detective_Stories, fiction, "Thrillers", "Crime"), - m(Hard_Boiled_Mystery, fiction, "Mystery & Detective", "Hard-Boiled"), - m(Police_Procedural, fiction, "Mystery & Detective", "Police Procedural"), - m(Cozy_Mystery, fiction, "Mystery & Detective", "Cozy"), - m(Historical_Mystery, fiction, "Mystery & Detective", "Historical"), - m(Women_Detectives, fiction, "Mystery & Detective", "Women Sleuths"), - m(Mystery, fiction, anything, "Mystery & Detective"), + m(classifier.Crime_Detective_Stories, fiction, "Crime"), + m(classifier.Crime_Detective_Stories, fiction, "Thrillers", "Crime"), + m( + classifier.Hard_Boiled_Mystery, + fiction, + "Mystery & Detective", + "Hard-Boiled", + ), + m( + classifier.Police_Procedural, + fiction, + "Mystery & Detective", + "Police Procedural", + ), + m(classifier.Cozy_Mystery, fiction, "Mystery & Detective", "Cozy"), + m(classifier.Historical_Mystery, fiction, "Mystery & Detective", "Historical"), + m(classifier.Women_Detectives, fiction, "Mystery & Detective", "Women Sleuths"), + m(classifier.Mystery, fiction, anything, "Mystery & Detective"), # Horror - m(Ghost_Stories, fiction, "Ghost"), - m(Occult_Horror, fiction, "Occult & Supernatural"), - m(Gothic_Horror, fiction, "Gothic"), - m(Horror, fiction, "Horror"), + m(classifier.Ghost_Stories, fiction, "Ghost"), + m(classifier.Occult_Horror, fiction, "Occult & Supernatural"), + m(classifier.Gothic_Horror, fiction, "Gothic"), + m(classifier.Horror, fiction, "Horror"), # Romance # n.b. no BISAC for Gothic Romance - m(Contemporary_Romance, fiction, "Romance", "Contemporary"), - m(Historical_Romance, fiction, "Romance", "Historical"), - m(Paranormal_Romance, fiction, "Romance", "Paranormal"), - m(Western_Romance, fiction, "Romance", "Western"), - m(Romantic_Suspense, fiction, "Romance", "Suspense"), - m(Romantic_SF, fiction, "Romance", "Time Travel"), - m(Romantic_SF, fiction, "Romance", "Science Fiction"), - m(Romance, fiction, "Romance"), + m(classifier.Contemporary_Romance, fiction, "Romance", "Contemporary"), + m(classifier.Historical_Romance, fiction, "Romance", "Historical"), + m(classifier.Paranormal_Romance, fiction, "Romance", "Paranormal"), + m(classifier.Western_Romance, fiction, "Romance", "Western"), + m(classifier.Romantic_Suspense, fiction, "Romance", "Suspense"), + m(classifier.Romantic_SF, fiction, "Romance", "Time Travel"), + m(classifier.Romantic_SF, fiction, "Romance", "Science Fiction"), + m(classifier.Romance, fiction, "Romance"), # Science fiction # n.b. no BISAC for Cyberpunk - m(Dystopian_SF, fiction, "Dystopian"), - m(Space_Opera, fiction, "Science Fiction", "Space Opera"), - m(Military_SF, fiction, "Science Fiction", "Military"), - m(Alternative_History, fiction, "Alternative History"), + m(classifier.Dystopian_SF, fiction, "Dystopian"), + m(classifier.Space_Opera, fiction, "Science Fiction", "Space Opera"), + m(classifier.Military_SF, fiction, "Science Fiction", "Military"), + m(classifier.Alternative_History, fiction, "Alternative History"), # Juvenile steampunk is classified directly beneath 'fiction'. - m(Steampunk, fiction, anything, "Steampunk"), - m(Science_Fiction, fiction, "Science Fiction"), + m(classifier.Steampunk, fiction, anything, "Steampunk"), + m(classifier.Science_Fiction, fiction, "Science Fiction"), # Thrillers # n.b. no BISAC for Supernatural_Thriller - m(Historical_Thriller, fiction, "Thrillers", "Historical"), - m(Espionage, fiction, "Thrillers", "Espionage"), - m(Medical_Thriller, fiction, "Thrillers", "Medical"), - m(Political_Thriller, fiction, "Thrillers", "Political"), - m(Legal_Thriller, fiction, "Thrillers", "Legal"), - m(Technothriller, fiction, "Thrillers", "Technological"), - m(Military_Thriller, fiction, "Thrillers", "Military"), - m(Suspense_Thriller, fiction, "Thrillers"), + m(classifier.Historical_Thriller, fiction, "Thrillers", "Historical"), + m(classifier.Espionage, fiction, "Thrillers", "Espionage"), + m(classifier.Medical_Thriller, fiction, "Thrillers", "Medical"), + m(classifier.Political_Thriller, fiction, "Thrillers", "Political"), + m(classifier.Legal_Thriller, fiction, "Thrillers", "Legal"), + m(classifier.Technothriller, fiction, "Thrillers", "Technological"), + m(classifier.Military_Thriller, fiction, "Thrillers", "Military"), + m(classifier.Suspense_Thriller, fiction, "Thrillers"), # Then handle the less complicated genres of fiction. - m(Adventure, fiction, "Action & Adventure"), - m(Adventure, fiction, "Sea Stories"), - m(Adventure, fiction, "War & Military"), - m(Classics, fiction, "Classics"), - m(Folklore, fiction, "Fairy Tales, Folk Tales, Legends & Mythology"), - m(Historical_Fiction, anything, "Historical"), - m(Humorous_Fiction, fiction, "Humorous"), - m(Humorous_Fiction, fiction, "Satire"), - m(Literary_Fiction, fiction, "Literary"), - m(LGBTQ_Fiction, fiction, "Gay"), - m(LGBTQ_Fiction, fiction, "Lesbian"), - m(LGBTQ_Fiction, fiction, "Gay & Lesbian"), - m(Religious_Fiction, fiction, "Religious"), - m(Religious_Fiction, fiction, "Jewish"), - m(Religious_Fiction, fiction, "Visionary & Metaphysical"), - m(Womens_Fiction, fiction, anything, "Contemporary Women"), - m(Westerns, fiction, "Westerns"), + m(classifier.Adventure, fiction, "Action & Adventure"), + m(classifier.Adventure, fiction, "Sea Stories"), + m(classifier.Adventure, fiction, "War & Military"), + m(classifier.Classics, fiction, "Classics"), + m(classifier.Folklore, fiction, "Fairy Tales, Folk Tales, Legends & Mythology"), + m(classifier.Historical_Fiction, anything, "Historical"), + m(classifier.Humorous_Fiction, fiction, "Humorous"), + m(classifier.Humorous_Fiction, fiction, "Satire"), + m(classifier.Literary_Fiction, fiction, "Literary"), + m(classifier.LGBTQ_Fiction, fiction, "Gay"), + m(classifier.LGBTQ_Fiction, fiction, "Lesbian"), + m(classifier.LGBTQ_Fiction, fiction, "Gay & Lesbian"), + m(classifier.Religious_Fiction, fiction, "Religious"), + m(classifier.Religious_Fiction, fiction, "Jewish"), + m(classifier.Religious_Fiction, fiction, "Visionary & Metaphysical"), + m(classifier.Womens_Fiction, fiction, anything, "Contemporary Women"), + m(classifier.Westerns, fiction, "Westerns"), # n.b. BISAC "Fiction / Urban" is distinct from "Fiction / # African-American / Urban", and does not map to any of our # genres. - m(Urban_Fiction, fiction, "African American", "Urban"), + m(classifier.Urban_Fiction, fiction, "African American", "Urban"), # BISAC classifies these genres at the top level, which we # treat as 'nonfiction', but we classify them as fiction. It # doesn't matter because they're neither, really. - m(Drama, nonfiction, "Drama"), - m(Poetry, nonfiction, "Poetry"), + m(classifier.Drama, nonfiction, "Drama"), + m(classifier.Poetry, nonfiction, "Poetry"), # Now on to nonfiction. # Classify top-level nonfiction categories into fiction genres. # @@ -407,196 +422,232 @@ class BISACClassifier(Classifier): # and adjacent genres. # # Art & Design - m(Architecture, nonfiction, "Architecture"), - m(Art_Criticism_Theory, nonfiction, "Art", "Criticism & Theory"), - m(Art_History, nonfiction, "Art", "History"), - m(Fashion, nonfiction, "Design", "Fashion"), - m(Design, nonfiction, "Design"), - m(Art_Design, nonfiction, "Art"), - m(Photography, nonfiction, "Photography"), + m(classifier.Architecture, nonfiction, "Architecture"), + m(classifier.Art_Criticism_Theory, nonfiction, "Art", "Criticism & Theory"), + m(classifier.Art_History, nonfiction, "Art", "History"), + m(classifier.Fashion, nonfiction, "Design", "Fashion"), + m(classifier.Design, nonfiction, "Design"), + m(classifier.Art_Design, nonfiction, "Art"), + m(classifier.Photography, nonfiction, "Photography"), # Personal Finance & Business - m(Business, nonfiction, "Business & Economics", RE("^Business.*")), - m(Business, nonfiction, "Business & Economics", "Accounting"), - m(Economics, nonfiction, "Business & Economics", "Economics"), - m(Economics, nonfiction, "Business & Economics", "Environmental Economics"), - m(Economics, nonfiction, "Business & Economics", RE("^Econo.*")), - m(Management_Leadership, nonfiction, "Business & Economics", "Management"), + m(classifier.Business, nonfiction, "Business & Economics", RE("^Business.*")), + m(classifier.Business, nonfiction, "Business & Economics", "Accounting"), + m(classifier.Economics, nonfiction, "Business & Economics", "Economics"), m( - Management_Leadership, + classifier.Economics, + nonfiction, + "Business & Economics", + "Environmental Economics", + ), + m(classifier.Economics, nonfiction, "Business & Economics", RE("^Econo.*")), + m( + classifier.Management_Leadership, + nonfiction, + "Business & Economics", + "Management", + ), + m( + classifier.Management_Leadership, nonfiction, "Business & Economics", "Management Science", ), - m(Management_Leadership, nonfiction, "Business & Economics", "Leadership"), m( - Personal_Finance_Investing, + classifier.Management_Leadership, + nonfiction, + "Business & Economics", + "Leadership", + ), + m( + classifier.Personal_Finance_Investing, nonfiction, "Business & Economics", "Personal Finance", ), m( - Personal_Finance_Investing, + classifier.Personal_Finance_Investing, nonfiction, "Business & Economics", "Personal Success", ), m( - Personal_Finance_Investing, + classifier.Personal_Finance_Investing, nonfiction, "Business & Economics", "Investments & Securities", ), - m(Real_Estate, nonfiction, "Business & Economics", "Real Estate"), - m(Personal_Finance_Business, nonfiction, "Business & Economics"), + m(classifier.Real_Estate, nonfiction, "Business & Economics", "Real Estate"), + m(classifier.Personal_Finance_Business, nonfiction, "Business & Economics"), # Parenting & Family - m(Parenting, nonfiction, "Family & Relationships", "Parenting"), - m(Family_Relationships, nonfiction, "Family & Relationships"), + m(classifier.Parenting, nonfiction, "Family & Relationships", "Parenting"), + m(classifier.Family_Relationships, nonfiction, "Family & Relationships"), # Food & Health - m(Bartending_Cocktails, nonfiction, "Cooking", "Beverages"), - m(Health_Diet, nonfiction, "Cooking", "Health & Healing"), - m(Health_Diet, nonfiction, "Health & Fitness"), - m(Vegetarian_Vegan, nonfiction, "Cooking", "Vegetarian & Vegan"), - m(Cooking, nonfiction, "Cooking"), + m(classifier.Bartending_Cocktails, nonfiction, "Cooking", "Beverages"), + m(classifier.Health_Diet, nonfiction, "Cooking", "Health & Healing"), + m(classifier.Health_Diet, nonfiction, "Health & Fitness"), + m(classifier.Vegetarian_Vegan, nonfiction, "Cooking", "Vegetarian & Vegan"), + m(classifier.Cooking, nonfiction, "Cooking"), # History - m(African_History, nonfiction, "History", "Africa"), - m(Ancient_History, nonfiction, "History", "Ancient"), - m(Asian_History, nonfiction, "History", "Asia"), - m(Civil_War_History, nonfiction, "History", "United States", RE("^Civil War")), - m(European_History, nonfiction, "History", "Europe"), - m(Latin_American_History, nonfiction, "History", "Latin America"), - m(Medieval_History, nonfiction, "History", "Medieval"), - m(Military_History, nonfiction, "History", "Military"), - m(Renaissance_Early_Modern_History, nonfiction, "History", "Renaissance"), + m(classifier.African_History, nonfiction, "History", "Africa"), + m(classifier.Ancient_History, nonfiction, "History", "Ancient"), + m(classifier.Asian_History, nonfiction, "History", "Asia"), + m( + classifier.Civil_War_History, + nonfiction, + "History", + "United States", + RE("^Civil War"), + ), + m(classifier.European_History, nonfiction, "History", "Europe"), + m(classifier.Latin_American_History, nonfiction, "History", "Latin America"), + m(classifier.Medieval_History, nonfiction, "History", "Medieval"), + m(classifier.Military_History, nonfiction, "History", "Military"), + m( + classifier.Renaissance_Early_Modern_History, + nonfiction, + "History", + "Renaissance", + ), m( - Renaissance_Early_Modern_History, + classifier.Renaissance_Early_Modern_History, nonfiction, "History", "Modern", RE("^1[678]th Century"), ), - m(Modern_History, nonfiction, "History", "Modern"), - m(United_States_History, nonfiction, "History", "Native American"), - m(United_States_History, nonfiction, "History", "United States"), - m(World_History, nonfiction, "History", "World"), - m(World_History, nonfiction, "History", "Civilization"), - m(History, nonfiction, "History"), + m(classifier.Modern_History, nonfiction, "History", "Modern"), + m(classifier.United_States_History, nonfiction, "History", "Native American"), + m(classifier.United_States_History, nonfiction, "History", "United States"), + m(classifier.World_History, nonfiction, "History", "World"), + m(classifier.World_History, nonfiction, "History", "Civilization"), + m(classifier.History, nonfiction, "History"), # Hobbies & Home - m(Antiques_Collectibles, nonfiction, "Antiques & Collectibles"), - m(Crafts_Hobbies, nonfiction, "Crafts & Hobbies"), - m(Gardening, nonfiction, "Gardening"), - m(Games, nonfiction, "Games"), - m(House_Home, nonfiction, "House & Home"), - m(Pets, nonfiction, "Pets"), + m(classifier.Antiques_Collectibles, nonfiction, "Antiques & Collectibles"), + m(classifier.Crafts_Hobbies, nonfiction, "Crafts & Hobbies"), + m(classifier.Gardening, nonfiction, "Gardening"), + m(classifier.Games, nonfiction, "Games"), + m(classifier.House_Home, nonfiction, "House & Home"), + m(classifier.Pets, nonfiction, "Pets"), # Entertainment - m(Film_TV, nonfiction, "Performing Arts", "Film & Video"), - m(Film_TV, nonfiction, "Performing Arts", "Television"), - m(Music, nonfiction, "Music"), - m(Performing_Arts, nonfiction, "Performing Arts"), + m(classifier.Film_TV, nonfiction, "Performing Arts", "Film & Video"), + m(classifier.Film_TV, nonfiction, "Performing Arts", "Television"), + m(classifier.Music, nonfiction, "Music"), + m(classifier.Performing_Arts, nonfiction, "Performing Arts"), # Reference & Study Aids - m(Dictionaries, nonfiction, "Reference", "Dictionaries"), - m(Foreign_Language_Study, nonfiction, "Foreign Language Study"), - m(Law, nonfiction, "Law"), - m(Study_Aids, nonfiction, "Study Aids"), - m(Reference_Study_Aids, nonfiction, "Reference"), - m(Reference_Study_Aids, nonfiction, "Language Arts & Disciplines"), + m(classifier.Dictionaries, nonfiction, "Reference", "Dictionaries"), + m(classifier.Foreign_Language_Study, nonfiction, "Foreign Language Study"), + m(classifier.Law, nonfiction, "Law"), + m(classifier.Study_Aids, nonfiction, "Study Aids"), + m(classifier.Reference_Study_Aids, nonfiction, "Reference"), + m(classifier.Reference_Study_Aids, nonfiction, "Language Arts & Disciplines"), # Religion & Spirituality - m(Body_Mind_Spirit, nonfiction, body_mind_spirit), - m(Buddhism, nonfiction, "Religion", "Buddhism"), - m(Christianity, nonfiction, "Religion", RE("^Biblical")), - m(Christianity, nonfiction, "Religion", RE("^Christian")), - m(Christianity, nonfiction, "Bibles"), - m(Hinduism, nonfiction, "Religion", "Hinduism"), - m(Islam, nonfiction, "Religion", "Islam"), - m(Judaism, nonfiction, "Religion", "Judaism"), - m(Religion_Spirituality, nonfiction, "Religion"), + m(classifier.Body_Mind_Spirit, nonfiction, body_mind_spirit), + m(classifier.Buddhism, nonfiction, "Religion", "Buddhism"), + m(classifier.Christianity, nonfiction, "Religion", RE("^Biblical")), + m(classifier.Christianity, nonfiction, "Religion", RE("^Christian")), + m(classifier.Christianity, nonfiction, "Bibles"), + m(classifier.Hinduism, nonfiction, "Religion", "Hinduism"), + m(classifier.Islam, nonfiction, "Religion", "Islam"), + m(classifier.Judaism, nonfiction, "Religion", "Judaism"), + m(classifier.Religion_Spirituality, nonfiction, "Religion"), # Science & Technology - m(Computers, nonfiction, "Computers"), - m(Mathematics, nonfiction, "Mathematics"), - m(Medical, nonfiction, "Medical"), - m(Nature, nonfiction, "Nature"), - m(Psychology, nonfiction, psychology), - m(Political_Science, nonfiction, "Social Science", "Politics & Government"), - m(Social_Sciences, nonfiction, "Social Science"), - m(Technology, nonfiction, technology), - m(Technology, nonfiction, "Transportation"), - m(Science, nonfiction, "Science"), + m(classifier.Computers, nonfiction, "Computers"), + m(classifier.Mathematics, nonfiction, "Mathematics"), + m(classifier.Medical, nonfiction, "Medical"), + m(classifier.Nature, nonfiction, "Nature"), + m(classifier.Psychology, nonfiction, psychology), + m( + classifier.Political_Science, + nonfiction, + "Social Science", + "Politics & Government", + ), + m(classifier.Social_Sciences, nonfiction, "Social Science"), + m(classifier.Technology, nonfiction, technology), + m(classifier.Technology, nonfiction, "Transportation"), + m(classifier.Science, nonfiction, "Science"), # Then handle the less complicated genres of nonfiction. # n.b. no BISAC for Periodicals. # n.b. no BISAC for Humorous Nonfiction per se. - m(Music, nonfiction, "Biography & Autobiography", "Composers & Musicians"), m( - Entertainment, + classifier.Music, + nonfiction, + "Biography & Autobiography", + "Composers & Musicians", + ), + m( + classifier.Entertainment, nonfiction, "Biography & Autobiography", "Entertainment & Performing Arts", ), - m(Biography_Memoir, nonfiction, "Biography & Autobiography"), - m(Education, nonfiction, "Education"), - m(Philosophy, nonfiction, "Philosophy"), - m(Political_Science, nonfiction, "Political Science"), - m(Self_Help, nonfiction, "Self-Help"), - m(Sports, nonfiction, "Sports & Recreation"), - m(Travel, nonfiction, "Travel"), - m(True_Crime, nonfiction, "True Crime"), + m(classifier.Biography_Memoir, nonfiction, "Biography & Autobiography"), + m(classifier.Education, nonfiction, "Education"), + m(classifier.Philosophy, nonfiction, "Philosophy"), + m(classifier.Political_Science, nonfiction, "Political Science"), + m(classifier.Self_Help, nonfiction, "Self-Help"), + m(classifier.Sports, nonfiction, "Sports & Recreation"), + m(classifier.Travel, nonfiction, "Travel"), + m(classifier.True_Crime, nonfiction, "True Crime"), # Handle cases where Juvenile/YA uses different terms than # would be used for the same books for adults. - m(Business, nonfiction, "Careers"), - m(Christianity, nonfiction, "Religious", "Christian"), - m(Cooking, nonfiction, "Cooking & Food"), - m(Education, nonfiction, "School & Education"), - m(Family_Relationships, nonfiction, "Family"), - m(Fantasy, fiction, "Fantasy & Magic"), - m(Ghost_Stories, fiction, "Ghost Stories"), - m(Fantasy, fiction, "Magical Realism"), - m(Fantasy, fiction, "Mermaids"), - m(Fashion, nonfiction, "Fashion"), - m(Folklore, fiction, "Fairy Tales & Folklore"), - m(Folklore, fiction, "Legends, Myths, Fables"), - m(Games, nonfiction, "Games & Activities"), - m(Health_Diet, nonfiction, "Health & Daily Living"), - m(Horror, fiction, "Horror & Ghost Stories"), - m(Horror, fiction, "Monsters"), - m(Horror, fiction, "Paranormal"), - m(Horror, fiction, "Paranormal, Occult & Supernatural"), - m(Horror, fiction, "Vampires"), - m(Horror, fiction, "Werewolves & Shifters"), - m(Horror, fiction, "Zombies"), - m(Humorous_Fiction, fiction, "Humorous Stories"), - m(Humorous_Nonfiction, "Young Adult Nonfiction", "Humor"), - m(LGBTQ_Fiction, fiction, "LGBT"), - m(Law, nonfiction, "Law & Crime"), - m(Mystery, fiction, "Mysteries & Detective Stories"), - m(Nature, nonfiction, "Animals"), - m(Personal_Finance_Investing, nonfiction, "Personal Finance"), - m(Poetry, fiction, "Nursery Rhymes"), - m(Poetry, fiction, "Stories in Verse"), - m(Poetry, fiction, "Novels in Verse"), - m(Poetry, fiction, "Poetry"), - m(Reference_Study_Aids, nonfiction, "Language Arts"), - m(Romance, fiction, "Love & Romance"), - m(Science_Fiction, fiction, "Robots"), - m(Science_Fiction, fiction, "Time Travel"), - m(Social_Sciences, nonfiction, "Media Studies"), - m(Suspense_Thriller, fiction, "Superheroes"), - m(Suspense_Thriller, fiction, "Thrillers & Suspense"), + m(classifier.Business, nonfiction, "Careers"), + m(classifier.Christianity, nonfiction, "Religious", "Christian"), + m(classifier.Cooking, nonfiction, "Cooking & Food"), + m(classifier.Education, nonfiction, "School & Education"), + m(classifier.Family_Relationships, nonfiction, "Family"), + m(classifier.Fantasy, fiction, "Fantasy & Magic"), + m(classifier.Ghost_Stories, fiction, "Ghost Stories"), + m(classifier.Fantasy, fiction, "Magical Realism"), + m(classifier.Fantasy, fiction, "Mermaids"), + m(classifier.Fashion, nonfiction, "Fashion"), + m(classifier.Folklore, fiction, "Fairy Tales & Folklore"), + m(classifier.Folklore, fiction, "Legends, Myths, Fables"), + m(classifier.Games, nonfiction, "Games & Activities"), + m(classifier.Health_Diet, nonfiction, "Health & Daily Living"), + m(classifier.Horror, fiction, "Horror & Ghost Stories"), + m(classifier.Horror, fiction, "Monsters"), + m(classifier.Horror, fiction, "Paranormal"), + m(classifier.Horror, fiction, "Paranormal, Occult & Supernatural"), + m(classifier.Horror, fiction, "Vampires"), + m(classifier.Horror, fiction, "Werewolves & Shifters"), + m(classifier.Horror, fiction, "Zombies"), + m(classifier.Humorous_Fiction, fiction, "Humorous Stories"), + m(classifier.Humorous_Nonfiction, "Young Adult Nonfiction", "Humor"), + m(classifier.LGBTQ_Fiction, fiction, "LGBT"), + m(classifier.Law, nonfiction, "Law & Crime"), + m(classifier.Mystery, fiction, "Mysteries & Detective Stories"), + m(classifier.Nature, nonfiction, "Animals"), + m(classifier.Personal_Finance_Investing, nonfiction, "Personal Finance"), + m(classifier.Poetry, fiction, "Nursery Rhymes"), + m(classifier.Poetry, fiction, "Stories in Verse"), + m(classifier.Poetry, fiction, "Novels in Verse"), + m(classifier.Poetry, fiction, "Poetry"), + m(classifier.Reference_Study_Aids, nonfiction, "Language Arts"), + m(classifier.Romance, fiction, "Love & Romance"), + m(classifier.Science_Fiction, fiction, "Robots"), + m(classifier.Science_Fiction, fiction, "Time Travel"), + m(classifier.Social_Sciences, nonfiction, "Media Studies"), + m(classifier.Suspense_Thriller, fiction, "Superheroes"), + m(classifier.Suspense_Thriller, fiction, "Thrillers & Suspense"), # Most of the subcategories of 'Science & Nature' go into Nature, # but these go into Science. - m(Science, nonfiction, "Science & Nature", "Discoveries"), - m(Science, nonfiction, "Science & Nature", "Experiments & Projects"), - m(Science, nonfiction, "Science & Nature", "History of Science"), - m(Science, nonfiction, "Science & Nature", "Physics"), - m(Science, nonfiction, "Science & Nature", "Weights & Measures"), - m(Science, nonfiction, "Science & Nature", "General"), + m(classifier.Science, nonfiction, "Science & Nature", "Discoveries"), + m(classifier.Science, nonfiction, "Science & Nature", "Experiments & Projects"), + m(classifier.Science, nonfiction, "Science & Nature", "History of Science"), + m(classifier.Science, nonfiction, "Science & Nature", "Physics"), + m(classifier.Science, nonfiction, "Science & Nature", "Weights & Measures"), + m(classifier.Science, nonfiction, "Science & Nature", "General"), # Any other subcategory of 'Science & Nature' goes under Nature - m(Nature, nonfiction, "Science & Nature", something), + m(classifier.Nature, nonfiction, "Science & Nature", something), # Life Strategies is juvenile/YA-specific, and contains both # fiction and nonfiction. It's called "Social Issues" for # juvenile fiction/nonfiction, and "Social Topics" for YA # nonfiction. "Social Themes" in YA fiction is _not_ # classified as Life Strategies. - m(Life_Strategies, fiction, "social issues"), - m(Life_Strategies, nonfiction, "social issues"), - m(Life_Strategies, nonfiction, social_topics), + m(classifier.Life_Strategies, fiction, "social issues"), + m(classifier.Life_Strategies, nonfiction, "social issues"), + m(classifier.Life_Strategies, nonfiction, social_topics), ] @classmethod diff --git a/src/palace/manager/core/classifier/ddc.py b/src/palace/manager/core/classifier/ddc.py index 3a95f46229..7703a55a4f 100644 --- a/src/palace/manager/core/classifier/ddc.py +++ b/src/palace/manager/core/classifier/ddc.py @@ -1,11 +1,12 @@ import json -from palace.manager.core.classifier import * +from palace.manager.core import classifier +from palace.manager.core.classifier import Classifier class DeweyDecimalClassifier(Classifier): NAMES = json.loads( - classifier_resources_dir().joinpath("dewey_1000.json").read_text() + classifier.classifier_resources_dir().joinpath("dewey_1000.json").read_text() ) # Add some other values commonly found in MARC records. @@ -30,45 +31,47 @@ class DeweyDecimalClassifier(Classifier): # 398.7 Jokes and jests GENRES = { - African_History: list(range(960, 970)), - Architecture: list(range(710, 720)) + list(range(720, 730)), - Art: list(range(700, 710)) + list(range(730, 770)) + [774, 776], - Art_Criticism_Theory: [701], - Asian_History: list(range(950, 960)) + [995, 996, 997], - Biography_Memoir: ["B", 920], - Economics: list(range(330, 340)), - Christianity: [list(range(220, 230)) + list(range(230, 290))], - Cooking: [list(range(640, 642))], - Performing_Arts: [790, 791, 792], - Entertainment: 790, - Games: [793, 794, 795], - Drama: [812, 822, 832, 842, 852, 862, 872, 882], - Education: list(range(370, 380)) + [707], - European_History: list(range(940, 950)), - Folklore: [398], - History: [900], - Islam: [297], - Judaism: [296], - Latin_American_History: list(range(981, 990)), - Law: list(range(340, 350)) + [364], - Management_Leadership: [658], - Mathematics: list(range(510, 520)), - Medical: list(range(610, 620)), - Military_History: list(range(355, 360)), - Music: list(range(780, 789)), - Periodicals: list(range(50, 60)) + [105, 405, 505, 605, 705, 805, 905], - Philosophy: list(range(160, 200)), - Photography: [771, 772, 773, 775, 778, 779], - Poetry: [811, 821, 831, 841, 851, 861, 871, 874, 881, 884], - Political_Science: list(range(320, 330)) + list(range(351, 355)), - Psychology: list(range(150, 160)), - Foreign_Language_Study: list(range(430, 500)), - Reference_Study_Aids: list(range(10, 20)) + classifier.African_History: list(range(960, 970)), + classifier.Architecture: list(range(710, 720)) + list(range(720, 730)), + classifier.Art: list(range(700, 710)) + list(range(730, 770)) + [774, 776], + classifier.Art_Criticism_Theory: [701], + classifier.Asian_History: list(range(950, 960)) + [995, 996, 997], + classifier.Biography_Memoir: ["B", 920], + classifier.Economics: list(range(330, 340)), + classifier.Christianity: [list(range(220, 230)) + list(range(230, 290))], + classifier.Cooking: [list(range(640, 642))], + classifier.Performing_Arts: [790, 791, 792], + classifier.Entertainment: 790, + classifier.Games: [793, 794, 795], + classifier.Drama: [812, 822, 832, 842, 852, 862, 872, 882], + classifier.Education: list(range(370, 380)) + [707], + classifier.European_History: list(range(940, 950)), + classifier.Folklore: [398], + classifier.History: [900], + classifier.Islam: [297], + classifier.Judaism: [296], + classifier.Latin_American_History: list(range(981, 990)), + classifier.Law: list(range(340, 350)) + [364], + classifier.Management_Leadership: [658], + classifier.Mathematics: list(range(510, 520)), + classifier.Medical: list(range(610, 620)), + classifier.Military_History: list(range(355, 360)), + classifier.Music: list(range(780, 789)), + classifier.Periodicals: list(range(50, 60)) + + [105, 405, 505, 605, 705, 805, 905], + classifier.Philosophy: list(range(160, 200)), + classifier.Photography: [771, 772, 773, 775, 778, 779], + classifier.Poetry: [811, 821, 831, 841, 851, 861, 871, 874, 881, 884], + classifier.Political_Science: list(range(320, 330)) + list(range(351, 355)), + classifier.Psychology: list(range(150, 160)), + classifier.Foreign_Language_Study: list(range(430, 500)), + classifier.Reference_Study_Aids: list(range(10, 20)) + list(range(30, 40)) + [103, 203, 303, 403, 503, 603, 703, 803, 903] + list(range(410, 430)), - Religion_Spirituality: list(range(200, 220)) + [290, 292, 293, 294, 295, 299], - Science: ( + classifier.Religion_Spirituality: list(range(200, 220)) + + [290, 292, 293, 294, 295, 299], + classifier.Science: ( [500, 501, 502] + list(range(506, 510)) + list(range(520, 530)) @@ -80,14 +83,14 @@ class DeweyDecimalClassifier(Classifier): + list(range(580, 590)) + list(range(590, 600)) ), - Social_Sciences: ( + classifier.Social_Sciences: ( list(range(300, 310)) + list(range(360, 364)) + list(range(390, 397)) + [399] ), - Sports: list(range(796, 800)), - Technology: ( + classifier.Sports: list(range(796, 800)), + classifier.Technology: ( [600, 601, 602, 604] + list(range(606, 610)) + list(range(610, 640)) @@ -96,9 +99,9 @@ class DeweyDecimalClassifier(Classifier): + list(range(681, 690)) + list(range(690, 700)) ), - Travel: list(range(910, 920)), - United_States_History: list(range(973, 980)), - World_History: [909], + classifier.Travel: list(range(910, 920)), + classifier.United_States_History: list(range(973, 980)), + classifier.World_History: [909], } @classmethod diff --git a/src/palace/manager/core/classifier/gutenberg.py b/src/palace/manager/core/classifier/gutenberg.py index af08f3b365..af00690627 100644 --- a/src/palace/manager/core/classifier/gutenberg.py +++ b/src/palace/manager/core/classifier/gutenberg.py @@ -1,4 +1,5 @@ -from palace.manager.core.classifier import * +from palace.manager.core import classifier +from palace.manager.core.classifier import Classifier class GutenbergBookshelfClassifier(Classifier): @@ -17,66 +18,66 @@ class GutenbergBookshelfClassifier(Classifier): } GENRES = { - Adventure: [ + classifier.Adventure: [ "Adventure", "Pirates, Buccaneers, Corsairs, etc.", ], - # African_American : ["African American Writers"], - Ancient_History: ["Classical Antiquity"], - Architecture: [ + # African_American classifier.: ["African American Writers"], + classifier.Ancient_History: ["Classical Antiquity"], + classifier.Architecture: [ "Architecture", "The American Architect and Building News", ], - Art: ["Art"], - Biography_Memoir: [ + classifier.Art: ["Art"], + classifier.Biography_Memoir: [ "Biographies", "Children's Biography", ], - Christianity: ["Christianity"], - Civil_War_History: "US Civil War", - Classics: [ + classifier.Christianity: ["Christianity"], + classifier.Civil_War_History: "US Civil War", + classifier.Classics: [ "Best Books Ever Listings", "Harvard Classics", ], - Cooking: [ + classifier.Cooking: [ "Armour's Monthly Cook Book", "Cookery", ], - Drama: [ + classifier.Drama: [ "One Act Plays", "Opera", "Plays", ], - Erotica: "Erotic Fiction", - Fantasy: "Fantasy", - Foreign_Language_Study: [ + classifier.Erotica: "Erotic Fiction", + classifier.Fantasy: "Fantasy", + classifier.Foreign_Language_Study: [ "Language Education", ], - Gardening: [ + classifier.Gardening: [ "Garden and Forest", "Horticulture", ], - Historical_Fiction: "Historical Fiction", - History: [ + classifier.Historical_Fiction: "Historical Fiction", + classifier.History: [ "Children's History", ], - Horror: ["Gothic Fiction", "Horror"], - Humorous_Fiction: ["Humor"], - Islam: "Islam", - Judaism: "Judaism", - Law: [ + classifier.Horror: ["Gothic Fiction", "Horror"], + classifier.Humorous_Fiction: ["Humor"], + classifier.Islam: "Islam", + classifier.Judaism: "Judaism", + classifier.Law: [ "British Law", "Noteworthy Trials", "United States Law", ], - Literary_Criticism: ["Bibliomania"], - Mathematics: "Mathematics", - Medical: [ + classifier.Literary_Criticism: ["Bibliomania"], + classifier.Mathematics: "Mathematics", + classifier.Medical: [ "Medicine", "The North American Medical and Surgical Journal", "Physiology", ], - Military_History: [ + classifier.Military_History: [ "American Revolutionary War", "World War I", "World War II", @@ -84,22 +85,22 @@ class GutenbergBookshelfClassifier(Classifier): "Boer War", "Napoleonic", ], - Modern_History: "Current History", - Music: [ + classifier.Modern_History: "Current History", + classifier.Music: [ "Music", "Child's Own Book of Great Musicians", ], - Mystery: [ + classifier.Mystery: [ "Crime Fiction", "Detective Fiction", "Mystery Fiction", ], - Nature: [ + classifier.Nature: [ "Animal", "Animals-Wild", "Bird-Lore" "Birds, Illustrated by Color Photography", ], - Periodicals: [ + classifier.Periodicals: [ "Ainslee's", "Prairie Farmer", "Blackwood's Edinburgh Magazine", @@ -168,31 +169,31 @@ class GutenbergBookshelfClassifier(Classifier): "The Yellow Book", "Women's Travel Journals", ], - Pets: ["Animals-Domestic"], - Philosophy: ["Philosophy"], - Photography: "Photography", - Poetry: [ + classifier.Pets: ["Animals-Domestic"], + classifier.Philosophy: ["Philosophy"], + classifier.Photography: "Photography", + classifier.Poetry: [ "Poetry", "Poetry, A Magazine of Verse", "Children's Verse", ], - Political_Science: [ + classifier.Political_Science: [ "Anarchism", "Politics", ], - Psychology: ["Psychology"], - Reference_Study_Aids: [ + classifier.Psychology: ["Psychology"], + classifier.Reference_Study_Aids: [ "Reference", "CIA World Factbooks", ], - Religion_Spirituality: [ + classifier.Religion_Spirituality: [ "Atheism", "Bahá'í Faith", "Hinduism", "Paganism", "Children's Religion", ], - Science: [ + classifier.Science: [ "Astronomy", "Biology", "Botany", @@ -208,30 +209,30 @@ class GutenbergBookshelfClassifier(Classifier): "Physics", "Scientific American", ], - Science_Fiction: [ + classifier.Science_Fiction: [ "Astounding Stories", "Precursors of Science Fiction", "The Galaxy", "Science Fiction", ], - Social_Sciences: [ + classifier.Social_Sciences: [ "Anthropology", "Archaeology", "The American Journal of Archaeology", "Sociology", ], - Suspense_Thriller: [ + classifier.Suspense_Thriller: [ "Suspense", "Thriller", ], - Technology: [ + classifier.Technology: [ "Engineering", "Technology", "Transportation", ], - Travel: "Travel", - True_Crime: "Crime Nonfiction", - Westerns: "Western", + classifier.Travel: "Travel", + classifier.True_Crime: "Crime Nonfiction", + classifier.Westerns: "Western", } @classmethod diff --git a/src/palace/manager/core/classifier/keyword.py b/src/palace/manager/core/classifier/keyword.py index 89a6d60fea..b6c8bf8761 100644 --- a/src/palace/manager/core/classifier/keyword.py +++ b/src/palace/manager/core/classifier/keyword.py @@ -1,4 +1,9 @@ -from palace.manager.core.classifier import * +import re +from collections import Counter + +from palace.manager.core import classifier +from palace.manager.core.classifier import Classifier +from palace.manager.core.classifier.age import AgeOrGradeClassifier def match_kw(*l): @@ -104,7 +109,7 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): } CATCHALL_KEYWORDS = { - Adventure: match_kw( + classifier.Adventure: match_kw( "adventure", "adventurers", "adventure stories", @@ -114,47 +119,47 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("war stories"), Eg("men's adventure"), ), - African_History: match_kw( + classifier.African_History: match_kw( "african history", "history.*africa", ), - Ancient_History: match_kw( + classifier.Ancient_History: match_kw( "ancient.*history", "history.*ancient", "civilization, classical", ), - Antiques_Collectibles: match_kw( + classifier.Antiques_Collectibles: match_kw( "antiques", "collectibles", "collectors", "collecting", ), - Architecture: match_kw( + classifier.Architecture: match_kw( "architecture", "architectural", "architect", "architects", ), - Art: match_kw( + classifier.Art: match_kw( "art", "arts", "artist", "artists", "artistic", ), - Art_Criticism_Theory: match_kw( + classifier.Art_Criticism_Theory: match_kw( "art criticism", "art / criticism & theory", ), - Art_History: match_kw( + classifier.Art_History: match_kw( "art.*history", ), - Asian_History: match_kw( + classifier.Asian_History: match_kw( "asian history", "history.*asia", "australasian & pacific history", ), - Bartending_Cocktails: match_kw( + classifier.Bartending_Cocktails: match_kw( "cocktail", "cocktails", "bartending", @@ -164,7 +169,7 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("wine & spirits"), "spirits & cocktails", ), - Biography_Memoir: match_kw( + classifier.Biography_Memoir: match_kw( "autobiographies", "autobiography", "biographies", @@ -172,15 +177,15 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): "biographical", "personal memoirs", ), - Body_Mind_Spirit: match_kw( + classifier.Body_Mind_Spirit: match_kw( "body, mind & spirit", ), - Buddhism: match_kw( + classifier.Buddhism: match_kw( "buddhism", "buddhist", "buddha", ), - Business: match_kw( + classifier.Business: match_kw( "business", "businesspeople", "businesswomen", @@ -193,7 +198,7 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): "sales & selling", Eg("nonprofit"), ), - Christianity: match_kw( + classifier.Christianity: match_kw( Eg("schema:creativework:bible"), Eg("baptist"), Eg("bible"), @@ -211,15 +216,15 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("church"), Eg("christmas & advent"), ), - Civil_War_History: match_kw( + classifier.Civil_War_History: match_kw( "american civil war", "1861-1865", "civil war period", ), - Classics: match_kw( + classifier.Classics: match_kw( "classics", ), - Computers: match_kw( + classifier.Computers: match_kw( "computer", "computer science", "computational", @@ -234,13 +239,13 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("web"), Eg("world wide web"), ), - Contemporary_Romance: match_kw( + classifier.Contemporary_Romance: match_kw( "contemporary romance", "romance--contemporary", "romance / contemporary", "romance - contemporary", ), - Cooking: match_kw( + classifier.Cooking: match_kw( Eg("non-alcoholic"), Eg("baking"), "cookbook", @@ -250,7 +255,7 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): "home economics", "cuisine", ), - Crafts_Hobbies: match_kw( + classifier.Crafts_Hobbies: match_kw( "arts & crafts", "arts, crafts", Eg("beadwork"), @@ -290,14 +295,14 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("weaving"), Eg("woodwork"), ), - Design: match_kw( + classifier.Design: match_kw( "design", "designer", "designers", Eg("graphic design"), Eg("typography") ), - Dictionaries: match_kw( + classifier.Dictionaries: match_kw( "dictionaries", "dictionary", ), - Drama: match_kw( + classifier.Drama: match_kw( Eg("comedies"), "drama", "dramatist", @@ -308,14 +313,14 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("tragedies"), Eg("tragedy"), ), - Economics: match_kw( + classifier.Economics: match_kw( Eg("banking"), "economy", "economies", "economic", "economics", ), - Education: match_kw( + classifier.Education: match_kw( # TODO: a lot of these don't work well because of # the huge amount of fiction about students. This # will be fixed when we institute the @@ -337,14 +342,14 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("university"), Eg("universities"), ), - Epic_Fantasy: match_kw( + classifier.Epic_Fantasy: match_kw( "epic fantasy", "fantasy - epic", "fantasy / epic", "fantasy--epic", "fantasy/epic", ), - Espionage: match_kw( + classifier.Espionage: match_kw( "espionage", "intrigue", "spies", @@ -353,12 +358,12 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): "spy fiction", "spy thriller", ), - Erotica: match_kw( + classifier.Erotica: match_kw( "erotic", "erotica", ), # TODO: history _plus_ a place - European_History: match_kw( + classifier.European_History: match_kw( "europe.*history", "history.*europe", Eg("france.*history"), @@ -371,14 +376,14 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("history.*germany"), # etc. etc. etc. ), - Family_Relationships: match_kw( + classifier.Family_Relationships: match_kw( "family & relationships", "relationships", "family relationships", "human sexuality", "sexuality", ), - Fantasy: match_kw( + classifier.Fantasy: match_kw( "fantasy", Eg("magic"), Eg("wizards"), @@ -390,12 +395,12 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("wizardry"), Eg("unicorns"), ), - Fashion: match_kw( + classifier.Fashion: match_kw( "fashion", "fashion design", "fashion designers", ), - Film_TV: match_kw( + classifier.Film_TV: match_kw( Eg("director"), Eg("directors"), "film", @@ -412,24 +417,24 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): "tv", "video", ), - Foreign_Language_Study: match_kw( + classifier.Foreign_Language_Study: match_kw( Eg("english as a foreign language"), Eg("english as a second language"), Eg("esl"), "foreign language study", Eg("multi-language dictionaries"), ), - Games: match_kw( + classifier.Games: match_kw( "games", Eg("video games"), "gaming", Eg("gambling"), ), - Gardening: match_kw( + classifier.Gardening: match_kw( "gardening", "horticulture", ), - Comics_Graphic_Novels: match_kw( + classifier.Comics_Graphic_Novels: match_kw( "comics", "comic strip", "comic strips", @@ -446,11 +451,11 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("manga"), Eg("yaoi"), ), - Hard_Boiled_Mystery: match_kw( + classifier.Hard_Boiled_Mystery: match_kw( "hard-boiled", "noir", ), - Health_Diet: match_kw( + classifier.Health_Diet: match_kw( # ! "health services" ? "fitness", "health", @@ -462,29 +467,29 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): "diets", "weight loss", ), - Hinduism: match_kw( + classifier.Hinduism: match_kw( "hinduism", "hindu", "hindus", ), - Historical_Fiction: match_kw( + classifier.Historical_Fiction: match_kw( "historical fiction", "fiction.*historical", "^historical$", ), - Historical_Romance: match_kw( + classifier.Historical_Romance: match_kw( "historical romance", Eg("regency romance"), Eg("romance.*regency"), ), - History: match_kw( + classifier.History: match_kw( "histories", "history", "historiography", "historical period", Eg("pre-confederation"), ), - Horror: match_kw( + classifier.Horror: match_kw( "horror", Eg("occult"), Eg("ghost"), @@ -495,7 +500,7 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("supernatural"), "scary", ), - House_Home: match_kw( + classifier.House_Home: match_kw( "house and home", "house & home", Eg("remodeling"), @@ -503,7 +508,7 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("caretaking"), Eg("interior decorating"), ), - Humorous_Fiction: match_kw( + classifier.Humorous_Fiction: match_kw( "comedy", "funny", "humor", @@ -513,7 +518,7 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("satire"), "wit", ), - Humorous_Nonfiction: match_kw( + classifier.Humorous_Nonfiction: match_kw( "comedy", "funny", "humor", @@ -522,13 +527,13 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): "humourous", "wit", ), - Entertainment: match_kw( + classifier.Entertainment: match_kw( # Almost a pure top-level category "entertainment", ), # These might be a problem because they might pick up # hateful books. Not sure if this will be a problem. - Islam: match_kw( + classifier.Islam: match_kw( "islam", "islamic", "muslim", @@ -536,14 +541,14 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("halal"), "islamic studies", ), - Judaism: match_kw( + classifier.Judaism: match_kw( "judaism", "jewish", Eg("kosher"), "jews", "jewish studies", ), - LGBTQ_Fiction: match_kw( + classifier.LGBTQ_Fiction: match_kw( "lgbt", "lgbtq", Eg("lesbian"), @@ -558,8 +563,8 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): "homosexuality", "queer", ), - Latin_American_History: match_kw(), - Law: match_kw( + classifier.Latin_American_History: match_kw(), + classifier.Law: match_kw( "court", "judicial", "law", @@ -567,27 +572,27 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): "legislation", "legal", ), - Legal_Thriller: match_kw( + classifier.Legal_Thriller: match_kw( "legal thriller", "legal thrillers", ), - Literary_Criticism: match_kw( + classifier.Literary_Criticism: match_kw( "criticism, interpretation", ), - Literary_Fiction: match_kw( + classifier.Literary_Fiction: match_kw( "literary", "literary fiction", "general fiction", "fiction[^a-z]+general", "fiction[^a-z]+literary", ), - Management_Leadership: match_kw( + classifier.Management_Leadership: match_kw( "management", "business & economics / leadership", "business & economics -- leadership", "management science", ), - Mathematics: match_kw( + classifier.Mathematics: match_kw( Eg("algebra"), Eg("arithmetic"), Eg("calculus"), @@ -606,7 +611,7 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("statistics"), Eg("trigonometry"), ), - Medical: match_kw( + classifier.Medical: match_kw( Eg("anatomy"), Eg("disease"), Eg("diseases"), @@ -622,16 +627,16 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("vaccines"), Eg("virus"), ), - Medieval_History: match_kw( + classifier.Medieval_History: match_kw( "civilization, medieval", "medieval period", "history.*medieval", ), - Middle_East_History: match_kw( + classifier.Middle_East_History: match_kw( "middle east.*history", "history.*middle east", ), - Military_History: match_kw( + classifier.Military_History: match_kw( "military science", "warfare", "military", @@ -639,7 +644,7 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("1939-1945"), Eg("world war"), ), - Modern_History: match_kw( + classifier.Modern_History: match_kw( Eg("1900 - 1999"), Eg("2000-2099"), "modern history", @@ -653,13 +658,13 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): # This one is difficult because it takes effect if book # has subject "media tie-in" *and* "science fiction" or # "fantasy" - Media_Tie_in_SF: match_kw( + classifier.Media_Tie_in_SF: match_kw( "science fiction & fantasy gaming", Eg("star trek"), Eg("star wars"), Eg("jedi"), ), - Music: match_kw( + classifier.Music: match_kw( "music", "musician", "musicians", @@ -673,7 +678,7 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("rock music"), Eg("punk rock"), ), - Mystery: match_kw( + classifier.Mystery: match_kw( Eg("crime"), Eg("detective"), Eg("murder"), @@ -684,18 +689,18 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("poirot, hercule"), Eg("schema:person:holmes, sherlock"), ), - Nature: match_kw( + classifier.Nature: match_kw( # TODO: not sure about this one "nature", ), - Body_Mind_Spirit: match_kw( + classifier.Body_Mind_Spirit: match_kw( "new age", ), - Paranormal_Romance: match_kw( + classifier.Paranormal_Romance: match_kw( "paranormal romance", "romance.*paranormal", ), - Parenting: match_kw( + classifier.Parenting: match_kw( # "children" isn't here because the vast majority of # "children" tags indicate books _for_ children. # "family" isn't here because the vast majority @@ -707,10 +712,10 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("motherhood"), Eg("fatherhood"), ), - Parenting_Family: match_kw( + classifier.Parenting_Family: match_kw( # Pure top-level category ), - Performing_Arts: match_kw( + classifier.Performing_Arts: match_kw( "theatre", "theatrical", "performing arts", @@ -718,23 +723,23 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("farce"), Eg("tragicomedy"), ), - Periodicals: match_kw( + classifier.Periodicals: match_kw( "periodicals", "periodical", ), - Personal_Finance_Investing: match_kw( + classifier.Personal_Finance_Investing: match_kw( "personal finance", "financial planning", "investing", Eg("retirement planning"), "money management", ), - Pets: match_kw( + classifier.Pets: match_kw( "pets", Eg("dogs"), Eg("cats"), ), - Philosophy: match_kw( + classifier.Philosophy: match_kw( "philosophy", "philosophical", "philosopher", @@ -742,17 +747,17 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("epistemology"), Eg("metaphysics"), ), - Photography: match_kw( + classifier.Photography: match_kw( "photography", "photographer", "photographers", "photographic", ), - Police_Procedural: match_kw( + classifier.Police_Procedural: match_kw( "police[^a-z]+procedural", "police[^a-z]+procedurals", ), - Poetry: match_kw( + classifier.Poetry: match_kw( "poetry", "poet", "poets", @@ -761,7 +766,7 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("sonnet"), Eg("sonnets"), ), - Political_Science: match_kw( + classifier.Political_Science: match_kw( Eg("american government"), Eg("anarchism"), Eg("censorship"), @@ -785,17 +790,17 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): "political", Eg("current events"), ), - Psychology: match_kw( + classifier.Psychology: match_kw( "psychology", Eg("psychiatry"), "psychological aspects", Eg("psychiatric"), Eg("psychoanalysis"), ), - Real_Estate: match_kw( + classifier.Real_Estate: match_kw( "real estate", ), - Reference_Study_Aids: match_kw( + classifier.Reference_Study_Aids: match_kw( Eg("catalogs"), Eg("handbooks"), Eg("manuals"), @@ -827,7 +832,7 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("vocabulary"), Eg("writing systems"), ), - Religion_Spirituality: match_kw( + classifier.Religion_Spirituality: match_kw( "religion", "religious", Eg("taoism"), @@ -835,7 +840,7 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("confucianism"), Eg("inspirational nonfiction"), ), - Renaissance_Early_Modern_History: match_kw( + classifier.Renaissance_Early_Modern_History: match_kw( "early modern period", "early modern history", "early modern, 1500-1700", @@ -843,13 +848,13 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): "renaissance.*history", "history.*renaissance", ), - Romance: match_kw( + classifier.Romance: match_kw( "love stories", "romance", "love & romance", "romances", ), - Science: match_kw( + classifier.Science: match_kw( Eg("aeronautics"), Eg("astronomy"), Eg("biology"), @@ -882,22 +887,22 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("virology"), Eg("cytology"), ), - Science_Fiction: match_kw( + classifier.Science_Fiction: match_kw( "speculative fiction", "sci-fi", "sci fi", Eg("time travel"), ), - # Science_Fiction_Fantasy: match_kw( + # classifier.Science_Fiction_Fantasy: match_kw( # "science fiction.*fantasy", # ), - Self_Help: match_kw( + classifier.Self_Help: match_kw( "self help", "self-help", "self improvement", "self-improvement", ), - Folklore: match_kw( + classifier.Folklore: match_kw( "fables", "folklore", "folktales", @@ -905,11 +910,11 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): "myth", "legends", ), - Short_Stories: match_kw( + classifier.Short_Stories: match_kw( "short stories", Eg("literary collections"), ), - Social_Sciences: match_kw( + classifier.Social_Sciences: match_kw( Eg("anthropology"), Eg("archaeology"), Eg("sociology"), @@ -929,7 +934,7 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("customs & traditions"), Eg("criminology"), ), - Sports: match_kw( + classifier.Sports: match_kw( # Ton of specific sports here since 'players' # doesn't work. TODO: Why? I don't remember. "sports", @@ -939,7 +944,7 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("soccer"), Eg("skating"), ), - Study_Aids: match_kw( + classifier.Study_Aids: match_kw( Eg("act"), Eg("advanced placement"), Eg("bar exam"), @@ -965,7 +970,7 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("toefl"), "workbooks", ), - Romantic_Suspense: match_kw( + classifier.Romantic_Suspense: match_kw( "romantic.*suspense", "suspense.*romance", "romance.*suspense", @@ -973,7 +978,7 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): "romance.*thriller", "thriller.*romance", ), - Technology: match_kw( + classifier.Technology: match_kw( "technology", Eg("engineering"), Eg("bioengineering"), @@ -986,17 +991,17 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): Eg("ships & shipbuilding"), Eg("cars & trucks"), ), - Suspense_Thriller: match_kw( + classifier.Suspense_Thriller: match_kw( "thriller", "thrillers", "suspense", ), - Technothriller: match_kw( + classifier.Technothriller: match_kw( "techno-thriller", "technothriller", "technothrillers", ), - Travel: match_kw( + classifier.Travel: match_kw( Eg("discovery"), "exploration", "travel", @@ -1006,32 +1011,32 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): "travelers", "description.*travel", ), - United_States_History: match_kw( + classifier.United_States_History: match_kw( "united states history", "u.s. history", Eg("american revolution"), Eg("1775-1783"), Eg("revolutionary period"), ), - Urban_Fantasy: match_kw( + classifier.Urban_Fantasy: match_kw( "urban fantasy", "fantasy.*urban", ), - Urban_Fiction: match_kw( + classifier.Urban_Fiction: match_kw( "urban fiction", Eg("fiction.*african american.*urban"), ), - Vegetarian_Vegan: match_kw( + classifier.Vegetarian_Vegan: match_kw( "vegetarian", Eg("vegan"), Eg("veganism"), "vegetarianism", ), - Westerns: match_kw( + classifier.Westerns: match_kw( "western stories", "westerns", ), - Women_Detectives: match_kw( + classifier.Women_Detectives: match_kw( "women detectives", "women detective", "women private investigators", @@ -1039,67 +1044,69 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): "women sleuths", "women sleuth", ), - Womens_Fiction: match_kw( + classifier.Womens_Fiction: match_kw( "contemporary women", "chick lit", "womens fiction", "women's fiction", ), - World_History: match_kw( + classifier.World_History: match_kw( "world history", "history[^a-z]*world", ), } LEVEL_2_KEYWORDS = { - Reference_Study_Aids: match_kw( + classifier.Reference_Study_Aids: match_kw( # Formerly in 'Language Arts & Disciplines' Eg("language arts & disciplines"), Eg("language arts and disciplines"), Eg("language arts"), ), - Design: match_kw( + classifier.Design: match_kw( "arts and crafts movement", ), - Drama: match_kw( + classifier.Drama: match_kw( Eg("opera"), ), - Erotica: match_kw( + classifier.Erotica: match_kw( Eg("erotic poetry"), Eg("gay erotica"), Eg("lesbian erotica"), Eg("erotic photography"), ), - Games: match_kw(Eg("games.*fantasy")), - Historical_Fiction: match_kw( + classifier.Games: match_kw(Eg("games.*fantasy")), + classifier.Historical_Fiction: match_kw( Eg("arthurian romance.*"), # This is "romance" in the old # sense of a story. ), - Literary_Criticism: match_kw( + classifier.Literary_Criticism: match_kw( Eg("literary history"), # Not History Eg("romance language"), # Not Romance ), - Media_Tie_in_SF: match_kw("tv, movie, video game adaptations"), # Not Film & TV + classifier.Media_Tie_in_SF: match_kw( + "tv, movie, video game adaptations" + ), # Not Film & TV # We need to match these first so that the 'military'/'warfare' # part doesn't match Military History. - Military_SF: match_kw( + classifier.Military_SF: match_kw( "science fiction.*military", "military.*science fiction", Eg("space warfare"), # Thankfully Eg("interstellar warfare"), ), - Military_Thriller: match_kw( + classifier.Military_Thriller: match_kw( "military thrillers", "thrillers.*military", ), - Pets: match_kw( + classifier.Pets: match_kw( "human-animal relationships", ), - Political_Science: match_kw( + classifier.Political_Science: match_kw( Eg("health care reform"), ), # Stop the 'religious' from matching Religion/Spirituality. - Religious_Fiction: match_kw( + classifier.Religious_Fiction: match_kw( Eg("christian fiction"), Eg("inspirational fiction"), Eg("fiction.*christian"), @@ -1107,7 +1114,7 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): "fiction.*religious", Eg("Oriental religions and wisdom"), ), - Romantic_Suspense: match_kw( + classifier.Romantic_Suspense: match_kw( "romantic.*suspense", "suspense.*romance", "romance.*suspense", @@ -1116,25 +1123,25 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): "thriller.*romance", ), # Stop from showing up as 'science' - Social_Sciences: match_kw( + classifier.Social_Sciences: match_kw( "social sciences", "social science", "human science", ), - Science_Fiction: match_kw( + classifier.Science_Fiction: match_kw( "science fiction", "science fiction.*general", ), - Supernatural_Thriller: match_kw( + classifier.Supernatural_Thriller: match_kw( "thriller.*supernatural", "supernatural.*thriller", ), # Stop from going into Mystery due to 'crime' - True_Crime: match_kw( + classifier.True_Crime: match_kw( "true crime", ), # Otherwise fiction.*urban turns Urban Fantasy into Urban Fiction - Urban_Fantasy: match_kw( + classifier.Urban_Fantasy: match_kw( "fiction.*fantasy.*urban", ), # Stop the 'children' in 'children of' from matching Parenting. @@ -1144,7 +1151,7 @@ class KeywordBasedClassifier(AgeOrGradeClassifier): } LEVEL_3_KEYWORDS = { - Space_Opera: match_kw( + classifier.Space_Opera: match_kw( "space opera", ), } diff --git a/src/palace/manager/core/classifier/lcc.py b/src/palace/manager/core/classifier/lcc.py index 2abc37399d..248441b575 100644 --- a/src/palace/manager/core/classifier/lcc.py +++ b/src/palace/manager/core/classifier/lcc.py @@ -1,9 +1,10 @@ import json +import re -from palace.manager.core.classifier import * +from palace.manager.core import classifier -class LCCClassifier(Classifier): +class LCCClassifier(classifier.Classifier): TOP_LEVEL = re.compile("^([A-Z]{1,2})") FICTION = {"PN", "PQ", "PR", "PS", "PT", "PZ"} JUVENILE = {"PZ"} @@ -24,19 +25,19 @@ class LCCClassifier(Classifier): # Sports: GV557-1198.995 # TODO: E and F are actually "the Americas". # United_States_History is E151-E909, F1-F975 but not E456-E655 - African_History: ["DT"], - Ancient_History: ["DE"], - Architecture: ["NA"], - Art_Criticism_Theory: ["BH"], - Asian_History: ["DS", "DU"], - Biography_Memoir: ["CT"], - Business: ["HC", "HF", "HJ"], - Christianity: ["BR", "BS", "BT", "BV", "BX"], - Cooking: ["TX"], - Crafts_Hobbies: ["TT"], - Economics: ["HB"], - Education: ["L"], - European_History: [ + classifier.African_History: ["DT"], + classifier.Ancient_History: ["DE"], + classifier.Architecture: ["NA"], + classifier.Art_Criticism_Theory: ["BH"], + classifier.Asian_History: ["DS", "DU"], + classifier.Biography_Memoir: ["CT"], + classifier.Business: ["HC", "HF", "HJ"], + classifier.Christianity: ["BR", "BS", "BT", "BV", "BX"], + classifier.Cooking: ["TX"], + classifier.Crafts_Hobbies: ["TT"], + classifier.Economics: ["HB"], + classifier.Education: ["L"], + classifier.European_History: [ "DA", "DAW", "DB", @@ -51,24 +52,24 @@ class LCCClassifier(Classifier): "DQ", "DR", ], - Folklore: ["GR"], - Games: ["GV"], - Islam: ["BP"], - Judaism: ["BM"], - Literary_Criticism: ["Z"], - Mathematics: ["QA", "HA", "GA"], - Medical: ["QM", "R"], - Military_History: ["U", "V"], - Music: ["M"], - Parenting_Family: ["HQ"], - Periodicals: ["AP", "AN"], - Philosophy: ["BC", "BD", "BJ"], - Photography: ["TR"], - Political_Science: ["J", "HX"], - Psychology: ["BF"], - Reference_Study_Aids: ["AE", "AG", "AI"], - Religion_Spirituality: ["BL", "BQ"], - Science: [ + classifier.Folklore: ["GR"], + classifier.Games: ["GV"], + classifier.Islam: ["BP"], + classifier.Judaism: ["BM"], + classifier.Literary_Criticism: ["Z"], + classifier.Mathematics: ["QA", "HA", "GA"], + classifier.Medical: ["QM", "R"], + classifier.Military_History: ["U", "V"], + classifier.Music: ["M"], + classifier.Parenting_Family: ["HQ"], + classifier.Periodicals: ["AP", "AN"], + classifier.Philosophy: ["BC", "BD", "BJ"], + classifier.Photography: ["TR"], + classifier.Political_Science: ["J", "HX"], + classifier.Psychology: ["BF"], + classifier.Reference_Study_Aids: ["AE", "AG", "AI"], + classifier.Religion_Spirituality: ["BL", "BQ"], + classifier.Science: [ "QB", "QC", "QD", @@ -82,7 +83,7 @@ class LCCClassifier(Classifier): "GC", "QP", ], - Social_Sciences: [ + classifier.Social_Sciences: [ "HD", "HE", "HF", @@ -95,26 +96,26 @@ class LCCClassifier(Classifier): "GF", "GT", ], - Sports: ["SK"], - World_History: ["CB"], + classifier.Sports: ["SK"], + classifier.World_History: ["CB"], } LEFTOVERS = dict( - B=Philosophy, - T=Technology, - Q=Science, - S=Science, - H=Social_Sciences, - D=History, - N=Art, - L=Education, - E=United_States_History, - F=United_States_History, - BP=Religion_Spirituality, + B=classifier.Philosophy, + T=classifier.Technology, + Q=classifier.Science, + S=classifier.Science, + H=classifier.Social_Sciences, + D=classifier.History, + N=classifier.Art, + L=classifier.Education, + E=classifier.United_States_History, + F=classifier.United_States_History, + BP=classifier.Religion_Spirituality, ) NAMES = json.loads( - classifier_resources_dir().joinpath("lcc_one_level.json").read_text() + classifier.classifier_resources_dir().joinpath("lcc_one_level.json").read_text() ) @classmethod @@ -159,4 +160,4 @@ def audience(cls, identifier, name): return None -Classifier.classifiers[Classifier.LCC] = LCCClassifier +classifier.Classifier.classifiers[classifier.Classifier.LCC] = LCCClassifier diff --git a/src/palace/manager/core/classifier/overdrive.py b/src/palace/manager/core/classifier/overdrive.py index 5179f85762..d0bd01a7d8 100644 --- a/src/palace/manager/core/classifier/overdrive.py +++ b/src/palace/manager/core/classifier/overdrive.py @@ -1,7 +1,7 @@ -from palace.manager.core.classifier import * +from palace.manager.core import classifier -class OverdriveClassifier(Classifier): +class OverdriveClassifier(classifier.Classifier): # These genres are only used to describe video titles. VIDEO_GENRES = [ "Action", @@ -84,84 +84,89 @@ class OverdriveClassifier(Classifier): ) GENRES = { - Antiques_Collectibles: "Antiques", - Architecture: "Architecture", - Art: "Art", - Biography_Memoir: "Biography & Autobiography", - Business: ["Business", "Marketing & Sales", "Careers"], - Christianity: "Christian Nonfiction", - Computers: ["Computer Technology", "Social Media"], - Classics: "Classic Literature", - Cooking: "Cooking & Food", - Crafts_Hobbies: "Crafts", - Games: "Games", - Drama: "Drama", - Economics: "Economics", - Education: "Education", - Erotica: "Erotic Literature", - Fantasy: "Fantasy", - Folklore: ["Folklore", "Mythology"], - Foreign_Language_Study: "Foreign Language Study", - Gardening: "Gardening", - Comics_Graphic_Novels: "Comic and Graphic Books", - Health_Diet: "Health & Fitness", - Historical_Fiction: ["Historical Fiction", "Antiquarian"], - History: "History", - Horror: "Horror", - House_Home: "Home Design & Décor", - Humorous_Fiction: "Humor (Fiction)", - Humorous_Nonfiction: "Humor (Nonfiction)", - Entertainment: "Entertainment", - Judaism: "Judaica", - Law: "Law", - Literary_Criticism: [ + classifier.Antiques_Collectibles: "Antiques", + classifier.Architecture: "Architecture", + classifier.Art: "Art", + classifier.Biography_Memoir: "Biography & Autobiography", + classifier.Business: ["Business", "Marketing & Sales", "Careers"], + classifier.Christianity: "Christian Nonfiction", + classifier.Computers: ["Computer Technology", "Social Media"], + classifier.Classics: "Classic Literature", + classifier.Cooking: "Cooking & Food", + classifier.Crafts_Hobbies: "Crafts", + classifier.Games: "Games", + classifier.Drama: "Drama", + classifier.Economics: "Economics", + classifier.Education: "Education", + classifier.Erotica: "Erotic Literature", + classifier.Fantasy: "Fantasy", + classifier.Folklore: ["Folklore", "Mythology"], + classifier.Foreign_Language_Study: "Foreign Language Study", + classifier.Gardening: "Gardening", + classifier.Comics_Graphic_Novels: "Comic and Graphic Books", + classifier.Health_Diet: "Health & Fitness", + classifier.Historical_Fiction: ["Historical Fiction", "Antiquarian"], + classifier.History: "History", + classifier.Horror: "Horror", + classifier.House_Home: "Home Design & Décor", + classifier.Humorous_Fiction: "Humor (Fiction)", + classifier.Humorous_Nonfiction: "Humor (Nonfiction)", + classifier.Entertainment: "Entertainment", + classifier.Judaism: "Judaica", + classifier.Law: "Law", + classifier.Literary_Criticism: [ "Literary Criticism", "Criticism", "Language Arts", "Writing", ], - Management_Leadership: "Management", - Mathematics: "Mathematics", - Medical: "Medical", - Military_History: "Military", - Music: ["Music", "Songbook"], - Mystery: "Mystery", - Nature: "Nature", - Body_Mind_Spirit: "New Age", - Parenting_Family: ["Family & Relationships", "Child Development"], - Performing_Arts: "Performing Arts", - Personal_Finance_Investing: "Finance", - Pets: "Pets", - Philosophy: ["Philosophy", "Ethics"], - Photography: "Photography", - Poetry: "Poetry", - Political_Science: ["Politics", "Current Events"], - Psychology: ["Psychology", "Psychiatry", "Psychiatry & Psychology"], - Reference_Study_Aids: ["Reference", "Grammar & Language Usage"], - Religious_Fiction: ["Christian Fiction"], - Religion_Spirituality: "Religion & Spirituality", - Romance: "Romance", - Science: ["Science", "Physics", "Chemistry", "Biology"], - Science_Fiction: "Science Fiction", + classifier.Management_Leadership: "Management", + classifier.Mathematics: "Mathematics", + classifier.Medical: "Medical", + classifier.Military_History: "Military", + classifier.Music: ["Music", "Songbook"], + classifier.Mystery: "Mystery", + classifier.Nature: "Nature", + classifier.Body_Mind_Spirit: "New Age", + classifier.Parenting_Family: ["Family & Relationships", "Child Development"], + classifier.Performing_Arts: "Performing Arts", + classifier.Personal_Finance_Investing: "Finance", + classifier.Pets: "Pets", + classifier.Philosophy: ["Philosophy", "Ethics"], + classifier.Photography: "Photography", + classifier.Poetry: "Poetry", + classifier.Political_Science: ["Politics", "Current Events"], + classifier.Psychology: ["Psychology", "Psychiatry", "Psychiatry & Psychology"], + classifier.Reference_Study_Aids: ["Reference", "Grammar & Language Usage"], + classifier.Religious_Fiction: ["Christian Fiction"], + classifier.Religion_Spirituality: "Religion & Spirituality", + classifier.Romance: "Romance", + classifier.Science: ["Science", "Physics", "Chemistry", "Biology"], + classifier.Science_Fiction: "Science Fiction", # Science_Fiction_Fantasy : "Science Fiction & Fantasy", - Self_Help: ["Self-Improvement", "Self-Help", "Self Help", "Recovery"], - Short_Stories: ["Literary Anthologies", "Short Stories"], - Social_Sciences: [ + classifier.Self_Help: [ + "Self-Improvement", + "Self-Help", + "Self Help", + "Recovery", + ], + classifier.Short_Stories: ["Literary Anthologies", "Short Stories"], + classifier.Social_Sciences: [ "Sociology", "Gender Studies", "Genealogy", "Media Studies", "Social Studies", ], - Sports: "Sports & Recreations", - Study_Aids: ["Study Aids & Workbooks", "Text Book"], - Technology: ["Technology", "Engineering", "Transportation"], - Suspense_Thriller: ["Suspense", "Thriller"], - Travel: ["Travel", "Travel Literature", "Outdoor Recreation"], - True_Crime: "True Crime", - Urban_Fiction: ["African American Fiction", "Urban Fiction"], - Westerns: "Western", - Womens_Fiction: "Chick Lit Fiction", + classifier.Sports: "Sports & Recreations", + classifier.Study_Aids: ["Study Aids & Workbooks", "Text Book"], + classifier.Technology: ["Technology", "Engineering", "Transportation"], + classifier.Suspense_Thriller: ["Suspense", "Thriller"], + classifier.Travel: ["Travel", "Travel Literature", "Outdoor Recreation"], + classifier.True_Crime: "True Crime", + classifier.Urban_Fiction: ["African American Fiction", "Urban Fiction"], + classifier.Westerns: "Western", + classifier.Womens_Fiction: "Chick Lit Fiction", } @classmethod @@ -226,8 +231,8 @@ def genre(cls, identifier, name, fiction=None, audience=None): if identifier == v or (isinstance(v, list) and identifier in v): return l if identifier == "Gay/Lesbian" and fiction: - return LGBTQ_Fiction + return classifier.LGBTQ_Fiction return None -Classifier.classifiers[Classifier.OVERDRIVE] = OverdriveClassifier +classifier.Classifier.classifiers[classifier.Classifier.OVERDRIVE] = OverdriveClassifier diff --git a/src/palace/manager/core/classifier/simplified.py b/src/palace/manager/core/classifier/simplified.py index c0583b2041..a8246c7ab9 100644 --- a/src/palace/manager/core/classifier/simplified.py +++ b/src/palace/manager/core/classifier/simplified.py @@ -1,6 +1,7 @@ from urllib.parse import unquote -from palace.manager.core.classifier import * +from palace.manager.core import classifier +from palace.manager.core.classifier import NO_VALUE, Classifier, Lowercased class SimplifiedGenreClassifier(Classifier): @@ -20,27 +21,27 @@ def scrub_identifier(cls, identifier): @classmethod def genre(cls, identifier, name, fiction=None, audience=None): if fiction == True: - all_genres = fiction_genres + all_genres = classifier.fiction_genres elif fiction == False: - all_genres = nonfiction_genres + all_genres = classifier.nonfiction_genres else: - all_genres = fiction_genres + nonfiction_genres + all_genres = classifier.fiction_genres + classifier.nonfiction_genres return cls._genre_by_name(identifier.original, all_genres) @classmethod def is_fiction(cls, identifier, name): - if not globals()["genres"].get(identifier.original): + if not classifier.genres.get(identifier.original): return None - return globals()["genres"][identifier.original].is_fiction + return classifier.genres[identifier.original].is_fiction @classmethod def _genre_by_name(cls, name, genres): for genre in genres: if genre == name: - return globals()["genres"][name] + return classifier.genres[name] elif isinstance(genre, dict): if name == genre["name"] or name in genre.get("subgenres", []): - return globals()["genres"][name] + return classifier.genres[name] return None diff --git a/src/palace/manager/core/classifier/work.py b/src/palace/manager/core/classifier/work.py new file mode 100644 index 0000000000..1f2575762d --- /dev/null +++ b/src/palace/manager/core/classifier/work.py @@ -0,0 +1,614 @@ +from __future__ import annotations + +import logging +from collections import Counter + +from sqlalchemy.orm import Session + +from palace.manager.core import classifier +from palace.manager.core.classifier import Classifier, GenreData, genres +from palace.manager.core.classifier.simplified import SimplifiedGenreClassifier + + +class WorkClassifier: + """Boil down a bunch of Classification objects into a few values.""" + + # TODO: This needs a lot of additions. + genre_publishers = { + "Harlequin": classifier.Romance, + "Pocket Books/Star Trek": classifier.Media_Tie_in_SF, + "Kensington": classifier.Urban_Fiction, + "Fodor's Travel Publications": classifier.Travel, + "Marvel Entertainment, LLC": classifier.Comics_Graphic_Novels, + } + + genre_imprints = { + "Harlequin Intrigue": classifier.Romantic_Suspense, + "Love Inspired Suspense": classifier.Romantic_Suspense, + "Harlequin Historical": classifier.Historical_Romance, + "Harlequin Historical Undone": classifier.Historical_Romance, + "Frommers": classifier.Travel, + "LucasBooks": classifier.Media_Tie_in_SF, + } + + audience_imprints = { + "Harlequin Teen": Classifier.AUDIENCE_YOUNG_ADULT, + "HarperTeen": Classifier.AUDIENCE_YOUNG_ADULT, + "Open Road Media Teen & Tween": Classifier.AUDIENCE_YOUNG_ADULT, + "Rosen Young Adult": Classifier.AUDIENCE_YOUNG_ADULT, + } + + not_adult_publishers = { + "Scholastic Inc.", + "Random House Children's Books", + "Little, Brown Books for Young Readers", + "Penguin Young Readers Group", + "Hachette Children's Books", + "Nickelodeon Publishing", + } + + not_adult_imprints = { + "Scholastic", + "Scholastic Paperbacks", + "Random House Books for Young Readers", + "HMH Books for Young Readers", + "Knopf Books for Young Readers", + "Delacorte Books for Young Readers", + "Open Road Media Young Readers", + "Macmillan Young Listeners", + "Bloomsbury Childrens", + "NYR Children's Collection", + "Bloomsbury USA Childrens", + "National Geographic Children's Books", + } + + fiction_imprints = {"Del Rey"} + nonfiction_imprints = {"Harlequin Nonfiction"} + + nonfiction_publishers = {"Wiley"} + fiction_publishers: set[str] = set() + + def __init__(self, work, test_session=None, debug=False): + self._db = Session.object_session(work) + if test_session: + self._db = test_session + self.work = work + self.fiction_weights = Counter() + self.audience_weights = Counter() + self.target_age_lower_weights = Counter() + self.target_age_upper_weights = Counter() + self.genre_weights = Counter() + self.direct_from_license_source = set() + self.prepared = False + self.debug = debug + self.classifications = [] + self.seen_classifications = set() + self.log = logging.getLogger("Classifier (workid=%d)" % self.work.id) + self.using_staff_genres = False + self.using_staff_fiction_status = False + self.using_staff_audience = False + self.using_staff_target_age = False + + # Keep track of whether we've seen one of Overdrive's generic + # "Juvenile" classifications, as well as its more specific + # subsets like "Picture Books" and "Beginning Readers" + self.overdrive_juvenile_generic = False + self.overdrive_juvenile_with_target_age = False + + def add(self, classification): + """Prepare a single Classification for consideration.""" + + # We only consider a given classification once from a given + # data source. + key = (classification.subject, classification.data_source) + if key in self.seen_classifications: + return + self.seen_classifications.add(key) + if self.debug: + self.classifications.append(classification) + + # Make sure the Subject is ready to be used in calculations. + if not classification.subject.checked: # or self.debug + classification.subject.assign_to_genre() + + if classification.comes_from_license_source: + self.direct_from_license_source.add(classification) + else: + if classification.subject.describes_format: + # TODO: This is a bit of a hack. + # + # Only accept a classification having to do with + # format (e.g. 'comic books') if that classification + # comes direct from the license source. Otherwise it's + # really easy for a graphic adaptation of a novel to + # get mixed up with the original novel, whereupon the + # original book is classified as a graphic novel. + return + + # Put the weight of the classification behind various + # considerations. + weight = classification.scaled_weight + subject = classification.subject + from palace.manager.sqlalchemy.model.datasource import DataSource + + from_staff = classification.data_source.name == DataSource.LIBRARY_STAFF + + # if classification is genre or NONE from staff, ignore all non-staff genres + is_genre = subject.genre != None + from palace.manager.sqlalchemy.model.classification import Subject + + is_none = ( + from_staff + and subject.type == Subject.SIMPLIFIED_GENRE + and subject.identifier == SimplifiedGenreClassifier.NONE + ) + if is_genre or is_none: + if not from_staff and self.using_staff_genres: + return + if from_staff and not self.using_staff_genres: + # first encounter with staff genre, so throw out existing genre weights + self.using_staff_genres = True + self.genre_weights = Counter() + if is_genre: + self.weigh_genre(subject.genre, weight) + + # if staff classification is fiction or nonfiction, ignore all other fictions + if not self.using_staff_fiction_status: + if from_staff and subject.type == Subject.SIMPLIFIED_FICTION_STATUS: + # encountering first staff fiction status, + # so throw out existing fiction weights + self.using_staff_fiction_status = True + self.fiction_weights = Counter() + self.fiction_weights[subject.fiction] += weight + + # if staff classification is about audience, ignore all other audience classifications + if not self.using_staff_audience: + if from_staff and subject.type == Subject.FREEFORM_AUDIENCE: + self.using_staff_audience = True + self.audience_weights = Counter() + self.audience_weights[subject.audience] += weight + else: + if classification.generic_juvenile_audience: + # We have a generic 'juvenile' classification. The + # audience might say 'Children' or it might say 'Young + # Adult' but we don't actually know which it is. + # + # We're going to split the difference, with a slight + # preference for YA, to bias against showing + # age-inappropriate material to children. To + # counterbalance the fact that we're splitting up the + # weight this way, we're also going to treat this + # classification as evidence _against_ an 'adult' + # classification. + self.audience_weights[Classifier.AUDIENCE_YOUNG_ADULT] += ( + weight * 0.6 + ) + self.audience_weights[Classifier.AUDIENCE_CHILDREN] += weight * 0.4 + for audience in Classifier.AUDIENCES_ADULT: + if audience != Classifier.AUDIENCE_ALL_AGES: + # 'All Ages' is considered an adult audience, + # but a generic 'juvenile' classification + # is not evidence against it. + self.audience_weights[audience] -= weight * 0.5 + else: + self.audience_weights[subject.audience] += weight + + if not self.using_staff_target_age: + if from_staff and subject.type == Subject.AGE_RANGE: + self.using_staff_target_age = True + self.target_age_lower_weights = Counter() + self.target_age_upper_weights = Counter() + if subject.target_age: + # Figure out how reliable this classification really is as + # an indicator of a target age. + scaled_weight = classification.weight_as_indicator_of_target_age + target_min = subject.target_age.lower + target_max = subject.target_age.upper + if target_min is not None: + if not subject.target_age.lower_inc: + target_min += 1 + self.target_age_lower_weights[target_min] += scaled_weight + if target_max is not None: + if not subject.target_age.upper_inc: + target_max -= 1 + self.target_age_upper_weights[target_max] += scaled_weight + + if not self.using_staff_audience and not self.using_staff_target_age: + if ( + subject.type == "Overdrive" + and subject.audience == Classifier.AUDIENCE_CHILDREN + ): + if subject.target_age and ( + subject.target_age.lower or subject.target_age.upper + ): + # This is a juvenile classification like "Picture + # Books" which implies a target age. + self.overdrive_juvenile_with_target_age = classification + else: + # This is a generic juvenile classification like + # "Juvenile Fiction". + self.overdrive_juvenile_generic = classification + + def weigh_metadata(self): + """Modify the weights according to the given Work's metadata. + + Use work metadata to simulate classifications. + + This is basic stuff, like: Harlequin tends to publish + romances. + """ + if self.work.title and ( + "Star Trek:" in self.work.title + or "Star Wars:" in self.work.title + or ("Jedi" in self.work.title and self.work.imprint == "Del Rey") + ): + self.weigh_genre(classifier.Media_Tie_in_SF, 100) + + publisher = self.work.publisher + imprint = self.work.imprint + if ( + imprint in self.nonfiction_imprints + or publisher in self.nonfiction_publishers + ): + self.fiction_weights[False] = 100 + elif imprint in self.fiction_imprints or publisher in self.fiction_publishers: + self.fiction_weights[True] = 100 + + if imprint in self.genre_imprints: + self.weigh_genre(self.genre_imprints[imprint], 100) + elif publisher in self.genre_publishers: + self.weigh_genre(self.genre_publishers[publisher], 100) + + if imprint in self.audience_imprints: + self.audience_weights[self.audience_imprints[imprint]] += 100 + elif ( + publisher in self.not_adult_publishers or imprint in self.not_adult_imprints + ): + for audience in [ + Classifier.AUDIENCE_ADULT, + Classifier.AUDIENCE_ADULTS_ONLY, + ]: + self.audience_weights[audience] -= 100 + + def prepare_to_classify(self): + """Called the first time classify() is called. Does miscellaneous + one-time prep work that requires all data to be in place. + """ + self.weigh_metadata() + + explicitly_indicated_audiences = ( + Classifier.AUDIENCE_CHILDREN, + Classifier.AUDIENCE_YOUNG_ADULT, + Classifier.AUDIENCE_ADULTS_ONLY, + ) + audiences_from_license_source = { + classification.subject.audience + for classification in self.direct_from_license_source + } + if ( + self.direct_from_license_source + and not self.using_staff_audience + and not any( + audience in explicitly_indicated_audiences + for audience in audiences_from_license_source + ) + ): + # If this was erotica, or a book for children or young + # adults, the distributor would have given some indication + # of that fact. In the absense of any such indication, we + # can assume very strongly that this is a regular old book + # for adults. + # + # 3M is terrible at distinguishing between childrens' + # books and YA books, but books for adults can be + # distinguished by their _lack_ of childrens/YA + # classifications. + self.audience_weights[Classifier.AUDIENCE_ADULT] += 500 + + if ( + self.overdrive_juvenile_generic + and not self.overdrive_juvenile_with_target_age + ): + # This book is classified under 'Juvenile Fiction' but not + # under 'Picture Books' or 'Beginning Readers'. The + # implicit target age here is 9-12 (the portion of + # Overdrive's 'juvenile' age range not covered by 'Picture + # Books' or 'Beginning Readers'. + weight = self.overdrive_juvenile_generic.weight_as_indicator_of_target_age + self.target_age_lower_weights[9] += weight + self.target_age_upper_weights[12] += weight + + self.prepared = True + + def classify(self, default_fiction=None, default_audience=None): + # Do a little prep work. + if not self.prepared: + self.prepare_to_classify() + + if self.debug: + for c in self.classifications: + self.log.debug( + "%d %r (via %s)", c.weight, c.subject, c.data_source.name + ) + + # Actually figure out the classifications + fiction = self.fiction(default_fiction=default_fiction) + genres = self.genres(fiction) + audience = self.audience(genres, default_audience=default_audience) + target_age = self.target_age(audience) + if self.debug: + self.log.debug("Fiction weights:") + for k, v in self.fiction_weights.most_common(): + self.log.debug(" %s: %s", v, k) + self.log.debug("Genre weights:") + for k, v in self.genre_weights.most_common(): + self.log.debug(" %s: %s", v, k) + self.log.debug("Audience weights:") + for k, v in self.audience_weights.most_common(): + self.log.debug(" %s: %s", v, k) + return genres, fiction, audience, target_age + + def fiction(self, default_fiction=None): + """Is it more likely this is a fiction or nonfiction book?""" + if not self.fiction_weights: + # We have absolutely no idea one way or the other, and it + # would be irresponsible to guess. + return default_fiction + is_fiction = default_fiction + if self.fiction_weights[True] > self.fiction_weights[False]: + is_fiction = True + elif self.fiction_weights[False] > 0: + is_fiction = False + return is_fiction + + def audience(self, genres=[], default_audience=None): + """What's the most likely audience for this book? + :param default_audience: To avoid embarassing situations we will + classify works as being intended for adults absent convincing + evidence to the contrary. In some situations (like the metadata + wrangler), it's better to state that we have no information, so + default_audience can be set to None. + """ + + # If we determined that Erotica was a significant enough + # component of the classification to count as a genre, the + # audience will always be 'Adults Only', even if the audience + # weights would indicate something else. + if classifier.Erotica in genres: + return Classifier.AUDIENCE_ADULTS_ONLY + + w = self.audience_weights + if not self.audience_weights: + # We have absolutely no idea, and it would be + # irresponsible to guess. + return default_audience + + children_weight = w.get(Classifier.AUDIENCE_CHILDREN, 0) + ya_weight = w.get(Classifier.AUDIENCE_YOUNG_ADULT, 0) + adult_weight = w.get(Classifier.AUDIENCE_ADULT, 0) + adults_only_weight = w.get(Classifier.AUDIENCE_ADULTS_ONLY, 0) + all_ages_weight = w.get(Classifier.AUDIENCE_ALL_AGES, 0) + research_weight = w.get(Classifier.AUDIENCE_RESEARCH, 0) + + total_adult_weight = adult_weight + adults_only_weight + total_weight = sum(w.values()) + + audience = default_audience + + # A book will be classified as a young adult or childrens' + # book when the weight of that audience is more than twice the + # combined weight of the 'adult' and 'adults only' audiences. + # If that combined weight is zero, then any amount of evidence + # is sufficient. + threshold = total_adult_weight * 2 + + # If both the 'children' weight and the 'YA' weight pass the + # threshold, we go with the one that weighs more. + # If the 'children' weight passes the threshold on its own + # we go with 'children'. + total_juvenile_weight = children_weight + ya_weight + if ( + research_weight > (total_adult_weight + all_ages_weight) + and research_weight > (total_juvenile_weight + all_ages_weight) + and research_weight > threshold + ): + audience = Classifier.AUDIENCE_RESEARCH + elif ( + all_ages_weight > total_adult_weight + and all_ages_weight > total_juvenile_weight + ): + audience = Classifier.AUDIENCE_ALL_AGES + elif children_weight > threshold and children_weight > ya_weight: + audience = Classifier.AUDIENCE_CHILDREN + elif ya_weight > threshold: + audience = Classifier.AUDIENCE_YOUNG_ADULT + elif total_juvenile_weight > threshold: + # Neither weight passes the threshold on its own, but + # combined they do pass the threshold. Go with + # 'Young Adult' to be safe. + audience = Classifier.AUDIENCE_YOUNG_ADULT + elif total_adult_weight > 0: + audience = Classifier.AUDIENCE_ADULT + + # If the 'adults only' weight is more than 1/4 of the total adult + # weight, classify as 'adults only' to be safe. + # + # TODO: This has not been calibrated. + if ( + audience == Classifier.AUDIENCE_ADULT + and adults_only_weight > total_adult_weight / 4 + ): + audience = Classifier.AUDIENCE_ADULTS_ONLY + + return audience + + @classmethod + def top_tier_values(self, counter): + """Given a Counter mapping values to their frequency of occurance, + return all values that are as common as the most common value. + """ + top_frequency = None + top_tier = set() + for age, freq in counter.most_common(): + if not top_frequency: + top_frequency = freq + if freq != top_frequency: + # We've run out of candidates + break + else: + # This candidate occurs with the maximum frequency. + top_tier.add(age) + return top_tier + + def target_age(self, audience): + """Derive a target age from the gathered data.""" + if audience not in ( + Classifier.AUDIENCE_CHILDREN, + Classifier.AUDIENCE_YOUNG_ADULT, + ): + # This is not a children's or YA book. Assertions about + # target age are irrelevant and the default value rules. + return Classifier.default_target_age_for_audience(audience) + + # Only consider the most reliable classifications. + + # Try to reach consensus on the lower and upper bounds of the + # age range. + if self.debug: + if self.target_age_lower_weights: + self.log.debug("Possible target age minima:") + for k, v in self.target_age_lower_weights.most_common(): + self.log.debug(" %s: %s", v, k) + if self.target_age_upper_weights: + self.log.debug("Possible target age maxima:") + for k, v in self.target_age_upper_weights.most_common(): + self.log.debug(" %s: %s", v, k) + + target_age_min = None + target_age_max = None + if self.target_age_lower_weights: + # Find the youngest age in the top tier of values. + candidates = self.top_tier_values(self.target_age_lower_weights) + target_age_min = min(candidates) + + if self.target_age_upper_weights: + # Find the oldest age in the top tier of values. + candidates = self.top_tier_values(self.target_age_upper_weights) + target_age_max = max(candidates) + + if not target_age_min and not target_age_max: + # We found no opinions about target age. Use the default. + return Classifier.default_target_age_for_audience(audience) + + if target_age_min is None: + target_age_min = target_age_max + + if target_age_max is None: + target_age_max = target_age_min + + # Err on the side of setting the minimum age too high. + if target_age_min > target_age_max: + target_age_max = target_age_min + return Classifier.range_tuple(target_age_min, target_age_max) + + def genres(self, fiction, cutoff=0.15): + """Consolidate genres and apply a low-pass filter.""" + # Remove any genres whose fiction status is inconsistent with the + # (independently determined) fiction status of the book. + # + # It doesn't matter if a book is classified as 'science + # fiction' 100 times; if we know it's nonfiction, it can't be + # science fiction. (It's probably a history of science fiction + # or something.) + genres = dict(self.genre_weights) + if not genres: + # We have absolutely no idea, and it would be + # irresponsible to guess. + return {} + + for genre in list(genres.keys()): + # If we have a fiction determination, that lets us eliminate + # possible genres that conflict with that determination. + # + # TODO: If we don't have a fiction determination, the + # genres we end up with may help us make one. + if fiction is not None and (genre.default_fiction != fiction): + del genres[genre] + + # Consolidate parent genres into their heaviest subgenre. + genres = self.consolidate_genre_weights(genres) + total_weight = float(sum(genres.values())) + + # Strip out the stragglers. + for g, score in list(genres.items()): + affinity = score / total_weight + if affinity < cutoff: + total_weight -= score + del genres[g] + return genres + + def weigh_genre(self, genre_data, weight): + """A helper method that ensure we always use database Genre + objects, not GenreData objects, when weighting genres. + """ + from palace.manager.sqlalchemy.model.classification import Genre + + genre, ignore = Genre.lookup(self._db, genre_data.name) + self.genre_weights[genre] += weight + + @classmethod + def consolidate_genre_weights(cls, weights, subgenre_swallows_parent_at=0.03): + """If a genre and its subgenres both show up, examine the subgenre + with the highest weight. If its weight exceeds a certain + proportion of the weight of the parent genre, assign the + parent's weight to the subgenre and remove the parent. + """ + # print("Before consolidation:") + # for genre, weight in weights.items(): + # print("", genre, weight) + + # Convert Genre objects to GenreData. + consolidated = Counter() + for genre, weight in list(weights.items()): + if not isinstance(genre, GenreData): + genre = genres[genre.name] + consolidated[genre] += weight + + heaviest_child = dict() + for genre, weight in list(consolidated.items()): + for parent in genre.parents: + if parent in consolidated: + if (not parent in heaviest_child) or weight > heaviest_child[ + parent + ][1]: + heaviest_child[parent] = (genre, weight) + # print("Heaviest child:") + # for parent, (genre, weight) in heaviest_child.items(): + # print("", parent, genre, weight) + made_it = False + while not made_it: + for parent, (child, weight) in sorted( + heaviest_child.items(), key=lambda genre: genre[1][1], reverse=True + ): + parent_weight = consolidated.get(parent, 0) + if weight > (subgenre_swallows_parent_at * parent_weight): + consolidated[child] += parent_weight + del consolidated[parent] + changed = False + for parent in parent.parents: + if parent in heaviest_child: + heaviest_child[parent] = (child, consolidated[child]) + changed = True + if changed: + # We changed the dict, so we need to restart + # the iteration. + break + # We made it all the way through the dict without changing it. + made_it = True + # print("Final heaviest child:") + # for parent, (genre, weight) in heaviest_child.items(): + # print("", parent, genre, weight) + # print("After consolidation:") + # for genre, weight in consolidated.items(): + # print("", genre, weight) + return consolidated diff --git a/src/palace/manager/search/external_search.py b/src/palace/manager/search/external_search.py index 1818eed723..44191b8aea 100644 --- a/src/palace/manager/search/external_search.py +++ b/src/palace/manager/search/external_search.py @@ -26,12 +26,9 @@ from opensearch_dsl.query import Range, Regexp, Term, Terms from spellchecker import SpellChecker -from palace.manager.core.classifier import ( - AgeClassifier, - Classifier, - GradeLevelClassifier, - KeywordBasedClassifier, -) +from palace.manager.core.classifier import Classifier +from palace.manager.core.classifier.age import AgeClassifier, GradeLevelClassifier +from palace.manager.core.classifier.keyword import KeywordBasedClassifier from palace.manager.core.exceptions import BasePalaceException from palace.manager.core.facets import FacetConstants from palace.manager.core.metadata_layer import IdentifierData diff --git a/src/palace/manager/sqlalchemy/model/classification.py b/src/palace/manager/sqlalchemy/model/classification.py index d066487e5b..aeb132567f 100644 --- a/src/palace/manager/sqlalchemy/model/classification.py +++ b/src/palace/manager/sqlalchemy/model/classification.py @@ -20,12 +20,7 @@ from sqlalchemy.sql.functions import func from palace.manager.core import classifier -from palace.manager.core.classifier import ( # type: ignore[attr-defined] - COMICS_AND_GRAPHIC_NOVELS, - Classifier, - Erotica, - GenreData, -) +from palace.manager.core.classifier import Classifier, Erotica, GenreData from palace.manager.sqlalchemy.constants import DataSourceConstants from palace.manager.sqlalchemy.hassessioncache import HasSessionCache from palace.manager.sqlalchemy.model.base import Base diff --git a/src/palace/manager/sqlalchemy/model/work.py b/src/palace/manager/sqlalchemy/model/work.py index 38bae8304a..80d6af970a 100644 --- a/src/palace/manager/sqlalchemy/model/work.py +++ b/src/palace/manager/sqlalchemy/model/work.py @@ -32,7 +32,8 @@ from sqlalchemy.sql.expression import and_, case, literal_column, select from sqlalchemy.sql.functions import func -from palace.manager.core.classifier import Classifier, WorkClassifier +from palace.manager.core.classifier import Classifier +from palace.manager.core.classifier.work import WorkClassifier from palace.manager.core.exceptions import BasePalaceException from palace.manager.search.service import SearchDocument from palace.manager.service.redis.redis import Redis diff --git a/tests/manager/api/admin/controller/test_work_editor.py b/tests/manager/api/admin/controller/test_work_editor.py index f15d559668..f62480c532 100644 --- a/tests/manager/api/admin/controller/test_work_editor.py +++ b/tests/manager/api/admin/controller/test_work_editor.py @@ -23,7 +23,7 @@ UNKNOWN_ROLE, ) from palace.manager.api.problem_details import LIBRARY_NOT_FOUND -from palace.manager.core.classifier import SimplifiedGenreClassifier +from palace.manager.core.classifier.simplified import SimplifiedGenreClassifier from palace.manager.feed.annotator.admin import AdminAnnotator from palace.manager.sqlalchemy.constants import IdentifierType from palace.manager.sqlalchemy.model.admin import AdminRole diff --git a/tests/manager/core/classifiers/test_age.py b/tests/manager/core/classifiers/test_age.py index 34bbd96099..00aef3e933 100644 --- a/tests/manager/core/classifiers/test_age.py +++ b/tests/manager/core/classifiers/test_age.py @@ -1,10 +1,11 @@ -from palace.manager.core.classifier import AgeOrGradeClassifier, Classifier -from palace.manager.core.classifier import LCSHClassifier as LCSH +from palace.manager.core.classifier import Classifier from palace.manager.core.classifier.age import ( AgeClassifier, + AgeOrGradeClassifier, GradeLevelClassifier, InterestLevelClassifier, ) +from palace.manager.core.classifier.keyword import LCSHClassifier as LCSH class TestTargetAge: diff --git a/tests/manager/core/classifiers/test_bic.py b/tests/manager/core/classifiers/test_bic.py index 40114a891a..b113597286 100644 --- a/tests/manager/core/classifiers/test_bic.py +++ b/tests/manager/core/classifiers/test_bic.py @@ -1,5 +1,5 @@ from palace.manager.core import classifier -from palace.manager.core.classifier import * +from palace.manager.core.classifier import Classifier from palace.manager.core.classifier.bic import BICClassifier as BIC diff --git a/tests/manager/core/classifiers/test_bisac.py b/tests/manager/core/classifiers/test_bisac.py index caa5abfff8..86e959093f 100644 --- a/tests/manager/core/classifiers/test_bisac.py +++ b/tests/manager/core/classifiers/test_bisac.py @@ -1,8 +1,9 @@ import pytest -from palace.manager.core.classifier import BISACClassifier, Classifier +from palace.manager.core.classifier import Classifier from palace.manager.core.classifier.bisac import ( RE, + BISACClassifier, MatchingRule, anything, fiction, diff --git a/tests/manager/core/classifiers/test_classifier.py b/tests/manager/core/classifiers/test_classifier.py index d5adf95f84..df34b5f146 100644 --- a/tests/manager/core/classifiers/test_classifier.py +++ b/tests/manager/core/classifiers/test_classifier.py @@ -8,15 +8,13 @@ from palace.manager.core import classifier from palace.manager.core.classifier import ( Classifier, - FreeformAudienceClassifier, - GenreData, Lowercased, - WorkClassifier, - fiction_genres, - nonfiction_genres, + Science, + Science_Fiction, ) from palace.manager.core.classifier.age import ( AgeClassifier, + FreeformAudienceClassifier, GradeLevelClassifier, InterestLevelClassifier, ) @@ -25,15 +23,13 @@ from palace.manager.core.classifier.keyword import LCSHClassifier as LCSH from palace.manager.core.classifier.lcc import LCCClassifier as LCC from palace.manager.core.classifier.simplified import SimplifiedGenreClassifier +from palace.manager.core.classifier.work import WorkClassifier from palace.manager.sqlalchemy.model.classification import Genre, Subject from palace.manager.sqlalchemy.model.datasource import DataSource from palace.manager.sqlalchemy.model.identifier import Identifier from palace.manager.sqlalchemy.model.work import Work from tests.fixtures.database import DatabaseTransactionFixture -genres = dict() -GenreData.populate(globals(), genres, fiction_genres, nonfiction_genres) - class TestLowercased: def test_constructor(self): @@ -1187,10 +1183,8 @@ def test_staff_target_age_overrides_others( staff_source = DataSource.lookup(session, DataSource.LIBRARY_STAFF) subject1 = data.transaction.subject(type="type1", identifier="subject1") subject1.target_age = NumericRange(6, 8, "[)") - subject1.weight_as_indicator_of_target_age = 1 subject2 = data.transaction.subject(type="type2", identifier="subject2") subject2.target_age = NumericRange(6, 8, "[)") - subject2.weight_as_indicator_of_target_age = 1 subject3 = data.transaction.subject(type=Subject.AGE_RANGE, identifier="10-13") classification1 = data.transaction.classification( identifier=data.identifier, subject=subject1, data_source=source, weight=10 diff --git a/tests/manager/core/classifiers/test_ddc.py b/tests/manager/core/classifiers/test_ddc.py index ad936e0823..bbfb33942d 100644 --- a/tests/manager/core/classifiers/test_ddc.py +++ b/tests/manager/core/classifiers/test_ddc.py @@ -1,5 +1,5 @@ from palace.manager.core import classifier -from palace.manager.core.classifier import * +from palace.manager.core.classifier import Classifier from palace.manager.core.classifier.ddc import DeweyDecimalClassifier as DDC diff --git a/tests/manager/core/classifiers/test_keyword.py b/tests/manager/core/classifiers/test_keyword.py index 3a83435d14..cfe38c5b66 100644 --- a/tests/manager/core/classifiers/test_keyword.py +++ b/tests/manager/core/classifiers/test_keyword.py @@ -1,5 +1,5 @@ from palace.manager.core import classifier -from palace.manager.core.classifier import * +from palace.manager.core.classifier import Classifier from palace.manager.core.classifier.keyword import KeywordBasedClassifier as Keyword from palace.manager.core.classifier.keyword import LCSHClassifier as LCSH diff --git a/tests/manager/core/classifiers/test_lcc.py b/tests/manager/core/classifiers/test_lcc.py index 2708889c01..803c81a32e 100644 --- a/tests/manager/core/classifiers/test_lcc.py +++ b/tests/manager/core/classifiers/test_lcc.py @@ -1,4 +1,4 @@ -from palace.manager.core.classifier import * +from palace.manager.core.classifier import Classifier from palace.manager.core.classifier.lcc import LCCClassifier as LCC diff --git a/tests/manager/core/classifiers/test_overdrive.py b/tests/manager/core/classifiers/test_overdrive.py index a023edaf72..5ff44062eb 100644 --- a/tests/manager/core/classifiers/test_overdrive.py +++ b/tests/manager/core/classifiers/test_overdrive.py @@ -1,4 +1,4 @@ -from palace.manager.core.classifier import * +from palace.manager.core.classifier import Classifier from palace.manager.core.classifier.overdrive import OverdriveClassifier as Overdrive diff --git a/tests/manager/core/classifiers/test_simplified.py b/tests/manager/core/classifiers/test_simplified.py index 005a2b7525..fba9487e01 100644 --- a/tests/manager/core/classifiers/test_simplified.py +++ b/tests/manager/core/classifiers/test_simplified.py @@ -1,4 +1,5 @@ -from palace.manager.core.classifier import * +from palace.manager.core.classifier import genres +from palace.manager.core.classifier.simplified import SimplifiedGenreClassifier class TestSimplifiedGenreClassifier: @@ -18,10 +19,10 @@ def test_genre(self): scrubbed = SimplifiedGenreClassifier.scrub_identifier(genre_name) genre = SimplifiedGenreClassifier.genre(scrubbed, genre_name, fiction=True) - assert genre.name == globals()["genres"][genre_name].name + assert genre.name == genres[genre_name].name genre = SimplifiedGenreClassifier.genre(scrubbed, genre_name) - assert genre.name == globals()["genres"][genre_name].name + assert genre.name == genres[genre_name].name genre = SimplifiedGenreClassifier.genre(scrubbed, genre_name, fiction=False) assert genre == None diff --git a/tests/manager/core/test_opds_import.py b/tests/manager/core/test_opds_import.py index 1641006d87..81b1c067f9 100644 --- a/tests/manager/core/test_opds_import.py +++ b/tests/manager/core/test_opds_import.py @@ -861,7 +861,7 @@ def test_import(self, opds_importer_fixture: OPDSImporterFixture): assert Subject.AGE_RANGE == seven.subject.type from palace.manager.core.classifier import Classifier - classifier = Classifier.classifiers.get(seven.subject.type, None) + classifier = Classifier.classifiers[seven.subject.type] classifier.classify(seven.subject) def sort_key(x: LicensePool) -> str: diff --git a/tests/manager/feed/test_library_annotator.py b/tests/manager/feed/test_library_annotator.py index b4ed8a62d6..c0276960cf 100644 --- a/tests/manager/feed/test_library_annotator.py +++ b/tests/manager/feed/test_library_annotator.py @@ -16,11 +16,7 @@ ) from palace.manager.api.lanes import ContributorLane from palace.manager.api.metadata.novelist import NoveListAPI, NoveListApiSettings -from palace.manager.core.classifier import ( # type: ignore[attr-defined] - Classifier, - Fantasy, - Urban_Fantasy, -) +from palace.manager.core.classifier import Classifier, Fantasy, Urban_Fantasy from palace.manager.core.entrypoint import ( AudiobooksEntryPoint, EbooksEntryPoint, diff --git a/tests/manager/feed/test_loan_and_hold_annotator.py b/tests/manager/feed/test_loan_and_hold_annotator.py index 662b31fffe..08a149cc70 100644 --- a/tests/manager/feed/test_loan_and_hold_annotator.py +++ b/tests/manager/feed/test_loan_and_hold_annotator.py @@ -3,11 +3,6 @@ from palace.manager.api.app import app from palace.manager.api.opds_for_distributors import OPDSForDistributorsAPI from palace.manager.api.problem_details import NOT_FOUND_ON_REMOTE -from palace.manager.core.classifier import ( # type: ignore[attr-defined] - Classifier, - Fantasy, - Urban_Fantasy, -) from palace.manager.feed.acquisition import OPDSAcquisitionFeed from palace.manager.feed.annotator.loan_and_hold import LibraryLoanAndHoldAnnotator from palace.manager.feed.types import WorkEntry, WorkEntryData