Reorganized into a module with separate scripts.

cumberworth · Nov 7, 2018 · f05e0ed · f05e0ed
1 parent c7b6476
commit f05e0ed
Show file tree

Hide file tree

Showing 8 changed files with 225 additions and 65 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+*__pycache__/
+prereqs.txt
+tags
diff --git a/mybiblib/__init__.py b/mybiblib/__init__.py
diff --git a/searchRefs.py → mybiblib/bib.py b/searchRefs.py → mybiblib/bib.py
@@ -1,24 +1,44 @@
-#!/usr/bin/env python
+"""Custom classes for bib files"""
 
-"""Search reference .bib files for keywords and return requisted info.
 
-"""
-
-import argparse
+import csv
+from collections import OrderedDict
 import os
-import pytest
-import pdb
 
 
-BIB_DIRECTORY = '/home/alexc/refs/bibs/'
+import biblib.bib
 
 
-class SearchString:
+class Abbreviations:
+    def __init__(self, abb_filename):
+        raw = csv.reader(open(abb_filename))
+
+        self._full_to_abb = {}
+        self._abb_to_full = {}
+        for row in raw:
+            self._full_to_abb[row[0]] = row[1]
+            self._abb_to_full[row[1]] = row[0]
+
+    def abbreviate(self, full):
+        try:
+            abb = self._full_to_abb[full]
+        except KeyError:
+            print('Abbreviation not in database for journal {}'.format(full))
+            raise
+
+        return abb
+
+    def unabbreviate(self, abb):
+        try:
+            abb = self._abb_to_full[abb]
+        except KeyError:
+            print('Abbreviation not in database for journal {}'.format(full))
+            raise
+
+        return abb
 
-#_fields
-#_terms
-#_operators
 
+class SearchString:
     def __init__(self, input_list):
 
         # Parse search term
@@ -99,27 +119,20 @@ def fields_match(self, tested_fields):
 
 class BibFile:
 
-    def __init__(self, file_name):
-        with open(file_name) as file:
-            file_lines = file.readlines()
-            #file_lines = [file_line.lower() for file_line in file_lines]
-
-        # There are a lot of better ways to parse the file
-        self._file_lines = file_lines
+    def __init__(self, entry):
+        self._entry = entry
 
     def search_string_match(self, search_string):
-        # I am using a fragile method to this, probably also slow
         tested_fields = [False] * len(search_string.fields)
-        for line in self._file_lines:
-            line = line.lower()
-            for field_index, (field, terms) in enumerate(search_string):
-                if field + ' =' in line:
-                    if all(term in line for term in terms):
-                        tested_fields[field_index] = True
-                    else:
-                        pass
+        for field_index, (field, terms) in enumerate(search_string):
+            if field in self._entry.keys():
+                field_entry = self._entry[field].lower()
+                if all(term in field_entry for term in terms):
+                    tested_fields[field_index] = True
                 else:
                     pass
+            else:
+                pass
 
         match = search_string.fields_match(tested_fields)
 
@@ -128,34 +141,76 @@ def search_string_match(self, search_string):
     def get_field_texts(self, fields):
         field_texts = []
         for field in fields:
-            for line in self._file_lines:
-                # make this a seperate method
-                if field + ' =' in line:
-                    field_start = line.find('{') + 1
-                    field_end = line.rfind('}')
-                    field_text = line[field_start:field_end]
-                    field_texts.append(field_text)
-                    break
+            if field in self._entry.keys():
+                field_texts.append(self._entry[field])
 
         return field_texts
 
+    def standarize_order_and_fields(self):
+        key = self._entry.key
+        typ = self._entry.typ
+        standard = OrderedDict()
+        if typ == 'article':
+            fields = ['author', 'title', 'journal', 'volume', 'pages', 'year',
+                    'doi']
+            for field in fields:
+                try:
+                    standard[field] = self._entry[field]
+                except KeyError:
+                    print('Entry {} missing field {}'.format(key, field))
+
+        else:
+            print('Standard not defined for entry type {}'.format(typ))
+
+        self._entry = biblib.bib.Entry(standard, typ=typ, key=key)
+
+    def abbreviate_journal(self, abbreviations):
+        if self._entry.typ == 'article':
+            journal = self._entry['journal']
+            abb = abbreviations.abbreviate(journal)
+            self._entry['journal'] = abb
+
+    def unabbreviate_journal(self, abbreviations):
+        if self._typ == 'article':
+            journal = self._entry['journal']
+            if '.' in journal:
+                full = abbreviations.unabbreviate(journal)
+                self._entry['journal'] = full
+
+    def write_to_file(self, filename=None):
+        if filename == None:
+            filename = '{}.tex'.format(self._entry.key)
+
+        with open(filename, 'w') as f:
+            f.write(self._entry.to_bib())
 
-class Bibliography:
 
+class Bibliography:
+    """Bibliography composed of individual bib files in a directory"""
     def __init__(self, bib_directory):
         bibfile_names = []
         for bibfile_name in os.listdir(bib_directory):
+
+            # Ignore hidden files
             if bibfile_name[0] == '.':
                 continue
             bibfile_name_full = bib_directory + bibfile_name
             bibfile_names.append(bibfile_name_full)
 
         self._bibfile_names = bibfile_names
 
+        # Create biblib database
+        bibparser = biblib.bib.Parser()
+        for filename in bibfile_names:
+            bibfile = open(filename)
+            bibparser.parse(bibfile)
+
+        self._entries = bibparser.get_entries()
+
     def match_and_print_fields(self, search_string, fields):
         print('')
-        for bibfile_name in self._bibfile_names:
-            bibfile = BibFile(bibfile_name)
+        for entry in self._entries.values():
+            bibfile = BibFile(entry)
             match = bibfile.search_string_match(search_string)
             if match:
                 field_texts = bibfile.get_field_texts(fields)
@@ -168,29 +223,3 @@ def _print_field_texts(self, field_texts):
             print(field_text)
 
         print('')
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-            '-s',
-            type=str,
-            nargs='+',
-            dest='search_string',
-            help='Search string')
-    parser.add_argument(
-            '-t',
-            type=str,
-            nargs='+',
-            default = ['title', 'year', 'author', 'annote'],
-            dest='terms',
-            help='Terms to print')
-    args = parser.parse_args()
-
-    bibliography = Bibliography(BIB_DIRECTORY)
-    search_string = SearchString(args.search_string)
-    bibliography.match_and_print_fields(search_string, args.terms)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/profiling/profile_bib.py b/profiling/profile_bib.py
@@ -0,0 +1,37 @@
+#/usr/env python
+
+import cProfile
+import pstats
+import searchRefs
+import pyximport
+pyximport.install()
+import searchRefs_cython
+import sys
+
+# Command line arguments
+input_string = ['field:keywords', 'review']
+terms = ['title']
+
+# Setup objects
+bibliography = searchRefs.Bibliography(searchRefs.BIB_DIRECTORY)
+bibliography_cython = searchRefs_cython.Bibliography(searchRefs_cython.BIB_DIRECTORY)
+search_string = searchRefs.SearchString(input_string)
+search_string_cython = searchRefs_cython.SearchString(input_string)
+command_string = 'bibliography.match_and_print_fields(search_string, terms)'
+command_string_cython = 'bibliography_cython.match_and_print_fields(search_string_cython, terms)'
+
+# Profile
+profile_file = 'python_profile.stats'
+profile_file_cython = 'cython_profile.stats'
+output_dump = '/tmp/searcRefs.txt'
+sys.stdout = open(output_dump, 'w')
+cProfile.run(command_string, profile_file)
+cProfile.run(command_string_cython, profile_file_cython)
+
+# Customize and write statistics
+stats_output = 'python_profile.txt'
+sys.stdout = open(stats_output, 'w')
+python_profile_stats = pstats.Stats(profile_file)
+python_profile_stats.strip_dirs()
+python_profile_stats.sort_stats('cumtime')
+python_profile_stats.print_stats()
diff --git a/scripts/parsebib.py b/scripts/parsebib.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+
+"""Parse bib files and output in standard format; warn if missing information"""
+
+# For now assumes bib file is article
+
+import argparse
+
+
+FIELDS = ['title', 'author', 'journal', 'volume', 'number', 'pages', 'year', 'issn', 'doi', 'url', 'abstract']
+
+
+def find_entry(line, field_dic):
+    field_entry = line.split('=')
+    field = field_entry[0].split()[0].lower()
+    if field in FIELDS:
+        entry = field_entry[1]
+        while entry[0] in [' ', '{', '"']:
+            entry = entry[1:]
+
+        while entry[-1] in [' ', '}', '"', ',', '\n']:
+            entry = entry[:-1]
+
+        field_dic[field] = entry
+
+    return field_dic
+
+
+def output_standard_bib(filebase, field_dic):
+    print('@article{{{},'.format(filebase))
+    for field, entry in field_dic.items():
+        print('    {} = {{{}}},'.format(field, entry))
+
+    print('    keywords = {},')
+    print('    annote = {}')
+    print('}')
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('filebase', help='Filebase of bib file')
+    args = parser.parse_args()
+    filebase = args.filebase
+
+    filename = filebase + '.bib'
+
+
+    with open(filename) as inp:
+        lines = inp.readlines()
+
+    field_dic = {}
+    for line in lines[1:]:
+        if line == '\n':
+            continue
+        else:
+            field_dic = find_entry(line, field_dic)
+
+    #modify field contents (capitilization, brackets, etc.)
+    output_standard_bib(filebase, field_dic)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/searchbibs.py b/scripts/searchbibs.py
@@ -0,0 +1,20 @@
+#!/usr/env python
+
+#import pyximport
+#pyximport.install()
+
+from searchRefs_cython import *
+
+def main():
+    argument_parser = argparse.ArgumentParser()
+    argument_parser.add_argument('-s', type=str, nargs='+', dest='search_string', help='Search string')
+    argument_parser.add_argument('-t', type=str, nargs='+', dest='terms', help='Terms to print')
+    arguments = argument_parser.parse_args()
+
+    bibliography = Bibliography(BIB_DIRECTORY)
+    search_string = SearchString(arguments.search_string)
+    bibliography.match_and_print_fields(search_string, arguments.terms)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/setup.py b/setup.py
@@ -0,0 +1,8 @@
+import glob
+from setuptools import setup
+
+setup(
+    name='mybiblib',
+    packages=['mybiblib'],
+    scripts=glob.glob('scripts/*.py')
+)
diff --git a/test_searchRefs.py → tests/test_bib.py b/test_searchRefs.py → tests/test_bib.py