Skip to content

Commit

Permalink
Reorganized into a module with separate scripts.
Browse files Browse the repository at this point in the history
  • Loading branch information
cumberworth committed Nov 7, 2018
1 parent c7b6476 commit f05e0ed
Show file tree
Hide file tree
Showing 8 changed files with 225 additions and 65 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*__pycache__/
prereqs.txt
tags
Empty file added mybiblib/__init__.py
Empty file.
159 changes: 94 additions & 65 deletions searchRefs.py → mybiblib/bib.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,44 @@
#!/usr/bin/env python
"""Custom classes for bib files"""

"""Search reference .bib files for keywords and return requisted info.

"""

import argparse
import csv
from collections import OrderedDict
import os
import pytest
import pdb


BIB_DIRECTORY = '/home/alexc/refs/bibs/'
import biblib.bib


class SearchString:
class Abbreviations:
def __init__(self, abb_filename):
raw = csv.reader(open(abb_filename))

self._full_to_abb = {}
self._abb_to_full = {}
for row in raw:
self._full_to_abb[row[0]] = row[1]
self._abb_to_full[row[1]] = row[0]

def abbreviate(self, full):
try:
abb = self._full_to_abb[full]
except KeyError:
print('Abbreviation not in database for journal {}'.format(full))
raise

return abb

def unabbreviate(self, abb):
try:
abb = self._abb_to_full[abb]
except KeyError:
print('Abbreviation not in database for journal {}'.format(full))
raise

return abb

#_fields
#_terms
#_operators

class SearchString:
def __init__(self, input_list):

# Parse search term
Expand Down Expand Up @@ -99,27 +119,20 @@ def fields_match(self, tested_fields):

class BibFile:

def __init__(self, file_name):
with open(file_name) as file:
file_lines = file.readlines()
#file_lines = [file_line.lower() for file_line in file_lines]

# There are a lot of better ways to parse the file
self._file_lines = file_lines
def __init__(self, entry):
self._entry = entry

def search_string_match(self, search_string):
# I am using a fragile method to this, probably also slow
tested_fields = [False] * len(search_string.fields)
for line in self._file_lines:
line = line.lower()
for field_index, (field, terms) in enumerate(search_string):
if field + ' =' in line:
if all(term in line for term in terms):
tested_fields[field_index] = True
else:
pass
for field_index, (field, terms) in enumerate(search_string):
if field in self._entry.keys():
field_entry = self._entry[field].lower()
if all(term in field_entry for term in terms):
tested_fields[field_index] = True
else:
pass
else:
pass

match = search_string.fields_match(tested_fields)

Expand All @@ -128,34 +141,76 @@ def search_string_match(self, search_string):
def get_field_texts(self, fields):
field_texts = []
for field in fields:
for line in self._file_lines:
# make this a seperate method
if field + ' =' in line:
field_start = line.find('{') + 1
field_end = line.rfind('}')
field_text = line[field_start:field_end]
field_texts.append(field_text)
break
if field in self._entry.keys():
field_texts.append(self._entry[field])

return field_texts

def standarize_order_and_fields(self):
key = self._entry.key
typ = self._entry.typ
standard = OrderedDict()
if typ == 'article':
fields = ['author', 'title', 'journal', 'volume', 'pages', 'year',
'doi']
for field in fields:
try:
standard[field] = self._entry[field]
except KeyError:
print('Entry {} missing field {}'.format(key, field))

else:
print('Standard not defined for entry type {}'.format(typ))

self._entry = biblib.bib.Entry(standard, typ=typ, key=key)

def abbreviate_journal(self, abbreviations):
if self._entry.typ == 'article':
journal = self._entry['journal']
abb = abbreviations.abbreviate(journal)
self._entry['journal'] = abb

def unabbreviate_journal(self, abbreviations):
if self._typ == 'article':
journal = self._entry['journal']
if '.' in journal:
full = abbreviations.unabbreviate(journal)
self._entry['journal'] = full

def write_to_file(self, filename=None):
if filename == None:
filename = '{}.tex'.format(self._entry.key)

with open(filename, 'w') as f:
f.write(self._entry.to_bib())

class Bibliography:

class Bibliography:
"""Bibliography composed of individual bib files in a directory"""
def __init__(self, bib_directory):
bibfile_names = []
for bibfile_name in os.listdir(bib_directory):

# Ignore hidden files
if bibfile_name[0] == '.':
continue
bibfile_name_full = bib_directory + bibfile_name
bibfile_names.append(bibfile_name_full)

self._bibfile_names = bibfile_names

# Create biblib database
bibparser = biblib.bib.Parser()
for filename in bibfile_names:
bibfile = open(filename)
bibparser.parse(bibfile)

self._entries = bibparser.get_entries()

def match_and_print_fields(self, search_string, fields):
print('')
for bibfile_name in self._bibfile_names:
bibfile = BibFile(bibfile_name)
for entry in self._entries.values():
bibfile = BibFile(entry)
match = bibfile.search_string_match(search_string)
if match:
field_texts = bibfile.get_field_texts(fields)
Expand All @@ -168,29 +223,3 @@ def _print_field_texts(self, field_texts):
print(field_text)

print('')


def main():
parser = argparse.ArgumentParser()
parser.add_argument(
'-s',
type=str,
nargs='+',
dest='search_string',
help='Search string')
parser.add_argument(
'-t',
type=str,
nargs='+',
default = ['title', 'year', 'author', 'annote'],
dest='terms',
help='Terms to print')
args = parser.parse_args()

bibliography = Bibliography(BIB_DIRECTORY)
search_string = SearchString(args.search_string)
bibliography.match_and_print_fields(search_string, args.terms)


if __name__ == '__main__':
main()
37 changes: 37 additions & 0 deletions profiling/profile_bib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#/usr/env python

import cProfile
import pstats
import searchRefs
import pyximport
pyximport.install()
import searchRefs_cython
import sys

# Command line arguments
input_string = ['field:keywords', 'review']
terms = ['title']

# Setup objects
bibliography = searchRefs.Bibliography(searchRefs.BIB_DIRECTORY)
bibliography_cython = searchRefs_cython.Bibliography(searchRefs_cython.BIB_DIRECTORY)
search_string = searchRefs.SearchString(input_string)
search_string_cython = searchRefs_cython.SearchString(input_string)
command_string = 'bibliography.match_and_print_fields(search_string, terms)'
command_string_cython = 'bibliography_cython.match_and_print_fields(search_string_cython, terms)'

# Profile
profile_file = 'python_profile.stats'
profile_file_cython = 'cython_profile.stats'
output_dump = '/tmp/searcRefs.txt'
sys.stdout = open(output_dump, 'w')
cProfile.run(command_string, profile_file)
cProfile.run(command_string_cython, profile_file_cython)

# Customize and write statistics
stats_output = 'python_profile.txt'
sys.stdout = open(stats_output, 'w')
python_profile_stats = pstats.Stats(profile_file)
python_profile_stats.strip_dirs()
python_profile_stats.sort_stats('cumtime')
python_profile_stats.print_stats()
63 changes: 63 additions & 0 deletions scripts/parsebib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env python3

"""Parse bib files and output in standard format; warn if missing information"""

# For now assumes bib file is article

import argparse


FIELDS = ['title', 'author', 'journal', 'volume', 'number', 'pages', 'year', 'issn', 'doi', 'url', 'abstract']


def find_entry(line, field_dic):
field_entry = line.split('=')
field = field_entry[0].split()[0].lower()
if field in FIELDS:
entry = field_entry[1]
while entry[0] in [' ', '{', '"']:
entry = entry[1:]

while entry[-1] in [' ', '}', '"', ',', '\n']:
entry = entry[:-1]

field_dic[field] = entry

return field_dic


def output_standard_bib(filebase, field_dic):
print('@article{{{},'.format(filebase))
for field, entry in field_dic.items():
print(' {} = {{{}}},'.format(field, entry))

print(' keywords = {},')
print(' annote = {}')
print('}')


def main():
parser = argparse.ArgumentParser()
parser.add_argument('filebase', help='Filebase of bib file')
args = parser.parse_args()
filebase = args.filebase

filename = filebase + '.bib'


with open(filename) as inp:
lines = inp.readlines()

field_dic = {}
for line in lines[1:]:
if line == '\n':
continue
else:
field_dic = find_entry(line, field_dic)

#modify field contents (capitilization, brackets, etc.)
output_standard_bib(filebase, field_dic)


if __name__ == '__main__':
main()
20 changes: 20 additions & 0 deletions scripts/searchbibs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/env python

#import pyximport
#pyximport.install()

from searchRefs_cython import *

def main():
argument_parser = argparse.ArgumentParser()
argument_parser.add_argument('-s', type=str, nargs='+', dest='search_string', help='Search string')
argument_parser.add_argument('-t', type=str, nargs='+', dest='terms', help='Terms to print')
arguments = argument_parser.parse_args()

bibliography = Bibliography(BIB_DIRECTORY)
search_string = SearchString(arguments.search_string)
bibliography.match_and_print_fields(search_string, arguments.terms)


if __name__ == '__main__':
main()
8 changes: 8 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import glob
from setuptools import setup

setup(
name='mybiblib',
packages=['mybiblib'],
scripts=glob.glob('scripts/*.py')
)
File renamed without changes.

0 comments on commit f05e0ed

Please sign in to comment.