Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Various fixes to allow running on GPUs / changing compute levels / etc. #30

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).

## [Unreleased] 2018-05-03
### Added
- Discussion on units in FAQ docs

### Fixed
- Jacobian ordering discussion in documentation


## [1.0.6] - 2018-02-21
### Added
- DOI for 1.0.4
Expand Down
101 changes: 55 additions & 46 deletions pyjac/libgen/libgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
import sys
import multiprocessing
import platform

from .. import utils


def lib_ext(shared):
"""Returns the appropriate library extension based on the shared flag"""
return '.a' if not shared else '.so'
Expand All @@ -23,6 +23,43 @@ def lib_ext(shared):
)


def which(file):
"""A substitute for the `which` command, searches the PATH for
a given file"""
for path in os.environ["PATH"].split(os.pathsep):
if os.path.exists(os.path.join(path, file)):
return os.path.join(path, file)

return None


def get_cuda_path(lib=True):
"""Returns location of CUDA (nvcc) on the system.

Parameters
----------
None

Returns
-------
cuda_path : str
Path where CUDA (nvcc) is found on the system.

"""
cuda_path = which('nvcc')
if cuda_path is None:
print('nvcc not found!')
sys.exit(-1)

sixtyfourbit = platform.architecture()[0] == '64bit'
cuda_path = os.path.dirname(os.path.dirname(cuda_path))
if lib:
cuda_path = os.path.join(cuda_path,
'lib{}'.format('64' if sixtyfourbit else '')
)
return cuda_path


def cmd_lib(lang, shared):
"""Returns the appropriate compilation command for creation of the library based on the
language and shared flag"""
Expand All @@ -35,14 +72,13 @@ def cmd_lib(lang, shared):


includes = dict(c=['/usr/local/include/'], icc=['/usr/local/include/'],
cuda=['/usr/local/cuda/include/',
'/usr/local/cuda/samples/common/inc/'
]
cuda=[os.path.join(get_cuda_path(False), 'include'),
os.path.join(get_cuda_path(False), 'samples', 'common', 'inc')]
)

flags = dict(c=['-std=c99', '-O3', '-mtune=native'],
icc=['-std=c99', '-O3', '-xhost', '-fp-model', 'precise', '-ipo'],
cuda=['-O3', '-arch=sm_20']
cuda=['-O3', '-arch=sm_{cl}']
)

shared_flags = dict(c=['-fPIC'],
Expand All @@ -56,16 +92,6 @@ def cmd_lib(lang, shared):
)


def which(file):
"""A substitute for the `which` command, searches the PATH for
a given file"""
for path in os.environ["PATH"].split(os.pathsep):
if os.path.exists(os.path.join(path, file)):
return os.path.join(path, file)

return None


def compiler(fstruct):
"""Given a file structure, this method will compile the source file for the
language and options specified
Expand All @@ -87,7 +113,10 @@ def compiler(fstruct):
args = [cmd_compile[fstruct.build_lang]]
if fstruct.auto_diff:
args = ['g++']
args.extend(flags[fstruct.build_lang])
fl = flags[fstruct.build_lang]
if fstruct.build_lang == 'cuda':
fl = [f.format(cl=fstruct.cl) for f in fl]
args.extend(fl)
if fstruct.auto_diff:
args = [x for x in args if 'std=c99' not in x]

Expand Down Expand Up @@ -120,32 +149,6 @@ def compiler(fstruct):
return 0


def get_cuda_path():
"""Returns location of CUDA (nvcc) on the system.

Parameters
----------
None

Returns
-------
cuda_path : str
Path where CUDA (nvcc) is found on the system.

"""
cuda_path = which('nvcc')
if cuda_path is None:
print('nvcc not found!')
sys.exit(-1)

sixtyfourbit = platform.architecture()[0] == '64bit'
cuda_path = os.path.dirname(os.path.dirname(cuda_path))
cuda_path = os.path.join(cuda_path,
'lib{}'.format('64' if sixtyfourbit else '')
)
return cuda_path


def libgen(lang, obj_dir, out_dir, filelist, shared, auto_diff):
"""Create a library from a list of compiled files

Expand Down Expand Up @@ -219,7 +222,7 @@ class file_struct(object):
"""A simple structure designed to enable multiprocess compilation
"""
def __init__(self, lang, build_lang, filename, i_dirs, args,
source_dir, obj_dir, shared
source_dir, obj_dir, shared, cl=20
):
"""
Parameters
Expand All @@ -240,6 +243,8 @@ def __init__(self, lang, build_lang, filename, i_dirs, args,
The directory to place the compiled object file in
shared : bool
If true, this is creating a shared library
cl : int [20]
The default compute level
"""

self.lang = lang
Expand All @@ -250,7 +255,8 @@ def __init__(self, lang, build_lang, filename, i_dirs, args,
self.source_dir = source_dir
self.obj_dir = obj_dir
self.shared = shared
self.auto_diff=False
self.auto_diff = False
self.cl = cl


def get_file_list(source_dir, pmod, lang, FD=False, AD=False):
Expand Down Expand Up @@ -321,7 +327,8 @@ def get_file_list(source_dir, pmod, lang, FD=False, AD=False):

def generate_library(lang, source_dir, obj_dir=None,
out_dir=None, shared=None,
finite_difference=False, auto_diff=False
finite_difference=False, auto_diff=False,
compute_level=20
):
"""Generate shared/static library for pyJac files.

Expand All @@ -339,6 +346,8 @@ def generate_library(lang, source_dir, obj_dir=None,
If ``True``, include finite differences
auto_diff : bool
If ``True``, include autodifferentiation
compute_level: int [20]
If specified, the CUDA compute level to use. Defaults to 20

Returns
-------
Expand Down Expand Up @@ -395,7 +404,7 @@ def generate_library(lang, source_dir, obj_dir=None,
# Compile generated source code
structs = [file_struct(lang, build_lang, f, i_dirs,
(['-DFINITE_DIFF'] if finite_difference else []),
source_dir, obj_dir, shared) for f in files
source_dir, obj_dir, shared, cl=compute_level) for f in files
]
for x in structs:
x.auto_diff=auto_diff
Expand Down
12 changes: 11 additions & 1 deletion pyjac/performance_tester/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,20 @@ def main(args=None):
help='If True, allows performance_tester to use '
'any old optimization files found'
)
parser.add_argument('-cl', '--compute_level',
default=20,
type=int,
required=False,
choices=[20, 21, 30, 32, 35, 37, 50, 52, 53, 60, 61,
62, 70, 71],
help='If True, allows performance_tester to use '
'any old optimization files found'
)
args = parser.parse_args()
pt.performance_tester(os.path.dirname(os.path.abspath(pt.__file__)),
args.working_directory,
args.use_old_opt)
args.use_old_opt,
args.compute_level)

if __name__ == '__main__':
sys.exit(main())
20 changes: 11 additions & 9 deletions pyjac/performance_tester/performance_tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,10 @@ def cmd_link(lang, shared):
return cmd


def linker(lang, temp_lang, test_dir, filelist, lib=None):
def linker(lang, temp_lang, test_dir, filelist, lib=None, cl=20):
args = cmd_link(temp_lang, not STATIC)
if lang == 'cuda' or (not STATIC):
args.extend(flags[temp_lang])
args.extend([f.format(cl=cl) for f in flags[temp_lang]])
args.extend([os.path.join(test_dir, getf(f) + '.o') for f in filelist])
args.extend(['-o', os.path.join(test_dir, 'speedtest')])
if temp_lang == 'cuda':
Expand Down Expand Up @@ -210,7 +210,7 @@ def linker(lang, temp_lang, test_dir, filelist, lib=None):
sys.exit(1)


def performance_tester(home, work_dir, use_old_opt):
def performance_tester(home, work_dir, use_old_opt, cl_level=20):
"""Runs performance testing for pyJac, TChem, and finite differences.

Parameters
Expand All @@ -221,6 +221,8 @@ def performance_tester(home, work_dir, use_old_opt):
Working directory with mechanisms and for data
use_old_opt : bool
If ``True``, use old optimization files found
cl_level: int [20]
The cuda compute level to use

Returns
-------
Expand Down Expand Up @@ -275,7 +277,7 @@ def false_factory():
import multiprocessing #for cpu count
max_cpu = multiprocessing.cpu_count()
num_threads = [1]
while num_threads < max_cpu:
while num_threads[-1] < max_cpu:
num_threads.append(min(max_cpu, num_threads[-1] * 2))
c_params = {'lang' : 'c',
'cache_opt' : [False],
Expand Down Expand Up @@ -461,8 +463,8 @@ def false_factory():
#now build the library
if lang != 'tchem':
lib = generate_library(lang, build_dir, test_dir,
finite_difference=FD, shared=not STATIC
)
finite_difference=FD, shared=not STATIC,
compute_level=cl_level)

lib = os.path.normpath(lib)
lib = (lib[lib.index('lib') +
Expand All @@ -474,8 +476,8 @@ def false_factory():
# Compile generated source code
structs = [file_struct(lang, temp_lang, f, i_dirs,
(['-DFINITE_DIFF'] if FD else []),
build_dir, test_dir, not STATIC
) for f in files
build_dir, test_dir, not STATIC,
cl=cl_level) for f in files
]
if lang != 'cuda':
for s in structs:
Expand All @@ -488,7 +490,7 @@ def false_factory():
if any(r == -1 for r in results):
sys.exit(-1)

linker(lang, temp_lang, test_dir, files, lib)
linker(lang, temp_lang, test_dir, files, lib, cl=cl_level)

if lang == 'tchem':
#copy periodic table and mechanisms in
Expand Down