diff --git a/CHANGELOG.md b/CHANGELOG.md index 794136d..ebf380d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [Unreleased] 2018-05-03 +### Added +- Discussion on units in FAQ docs + +### Fixed +- Jacobian ordering discussion in documentation + + ## [1.0.6] - 2018-02-21 ### Added - DOI for 1.0.4 diff --git a/pyjac/libgen/libgen.py b/pyjac/libgen/libgen.py index 93402ec..a30970b 100644 --- a/pyjac/libgen/libgen.py +++ b/pyjac/libgen/libgen.py @@ -9,9 +9,9 @@ import sys import multiprocessing import platform - from .. import utils + def lib_ext(shared): """Returns the appropriate library extension based on the shared flag""" return '.a' if not shared else '.so' @@ -23,6 +23,43 @@ def lib_ext(shared): ) +def which(file): + """A substitute for the `which` command, searches the PATH for + a given file""" + for path in os.environ["PATH"].split(os.pathsep): + if os.path.exists(os.path.join(path, file)): + return os.path.join(path, file) + + return None + + +def get_cuda_path(lib=True): + """Returns location of CUDA (nvcc) on the system. + + Parameters + ---------- + None + + Returns + ------- + cuda_path : str + Path where CUDA (nvcc) is found on the system. + + """ + cuda_path = which('nvcc') + if cuda_path is None: + print('nvcc not found!') + sys.exit(-1) + + sixtyfourbit = platform.architecture()[0] == '64bit' + cuda_path = os.path.dirname(os.path.dirname(cuda_path)) + if lib: + cuda_path = os.path.join(cuda_path, + 'lib{}'.format('64' if sixtyfourbit else '') + ) + return cuda_path + + def cmd_lib(lang, shared): """Returns the appropriate compilation command for creation of the library based on the language and shared flag""" @@ -35,14 +72,13 @@ def cmd_lib(lang, shared): includes = dict(c=['/usr/local/include/'], icc=['/usr/local/include/'], - cuda=['/usr/local/cuda/include/', - '/usr/local/cuda/samples/common/inc/' - ] + cuda=[os.path.join(get_cuda_path(False), 'include'), + os.path.join(get_cuda_path(False), 'samples', 'common', 'inc')] ) flags = dict(c=['-std=c99', '-O3', '-mtune=native'], icc=['-std=c99', '-O3', '-xhost', '-fp-model', 'precise', '-ipo'], - cuda=['-O3', '-arch=sm_20'] + cuda=['-O3', '-arch=sm_{cl}'] ) shared_flags = dict(c=['-fPIC'], @@ -56,16 +92,6 @@ def cmd_lib(lang, shared): ) -def which(file): - """A substitute for the `which` command, searches the PATH for - a given file""" - for path in os.environ["PATH"].split(os.pathsep): - if os.path.exists(os.path.join(path, file)): - return os.path.join(path, file) - - return None - - def compiler(fstruct): """Given a file structure, this method will compile the source file for the language and options specified @@ -87,7 +113,10 @@ def compiler(fstruct): args = [cmd_compile[fstruct.build_lang]] if fstruct.auto_diff: args = ['g++'] - args.extend(flags[fstruct.build_lang]) + fl = flags[fstruct.build_lang] + if fstruct.build_lang == 'cuda': + fl = [f.format(cl=fstruct.cl) for f in fl] + args.extend(fl) if fstruct.auto_diff: args = [x for x in args if 'std=c99' not in x] @@ -120,32 +149,6 @@ def compiler(fstruct): return 0 -def get_cuda_path(): - """Returns location of CUDA (nvcc) on the system. - - Parameters - ---------- - None - - Returns - ------- - cuda_path : str - Path where CUDA (nvcc) is found on the system. - - """ - cuda_path = which('nvcc') - if cuda_path is None: - print('nvcc not found!') - sys.exit(-1) - - sixtyfourbit = platform.architecture()[0] == '64bit' - cuda_path = os.path.dirname(os.path.dirname(cuda_path)) - cuda_path = os.path.join(cuda_path, - 'lib{}'.format('64' if sixtyfourbit else '') - ) - return cuda_path - - def libgen(lang, obj_dir, out_dir, filelist, shared, auto_diff): """Create a library from a list of compiled files @@ -219,7 +222,7 @@ class file_struct(object): """A simple structure designed to enable multiprocess compilation """ def __init__(self, lang, build_lang, filename, i_dirs, args, - source_dir, obj_dir, shared + source_dir, obj_dir, shared, cl=20 ): """ Parameters @@ -240,6 +243,8 @@ def __init__(self, lang, build_lang, filename, i_dirs, args, The directory to place the compiled object file in shared : bool If true, this is creating a shared library + cl : int [20] + The default compute level """ self.lang = lang @@ -250,7 +255,8 @@ def __init__(self, lang, build_lang, filename, i_dirs, args, self.source_dir = source_dir self.obj_dir = obj_dir self.shared = shared - self.auto_diff=False + self.auto_diff = False + self.cl = cl def get_file_list(source_dir, pmod, lang, FD=False, AD=False): @@ -321,7 +327,8 @@ def get_file_list(source_dir, pmod, lang, FD=False, AD=False): def generate_library(lang, source_dir, obj_dir=None, out_dir=None, shared=None, - finite_difference=False, auto_diff=False + finite_difference=False, auto_diff=False, + compute_level=20 ): """Generate shared/static library for pyJac files. @@ -339,6 +346,8 @@ def generate_library(lang, source_dir, obj_dir=None, If ``True``, include finite differences auto_diff : bool If ``True``, include autodifferentiation + compute_level: int [20] + If specified, the CUDA compute level to use. Defaults to 20 Returns ------- @@ -395,7 +404,7 @@ def generate_library(lang, source_dir, obj_dir=None, # Compile generated source code structs = [file_struct(lang, build_lang, f, i_dirs, (['-DFINITE_DIFF'] if finite_difference else []), - source_dir, obj_dir, shared) for f in files + source_dir, obj_dir, shared, cl=compute_level) for f in files ] for x in structs: x.auto_diff=auto_diff diff --git a/pyjac/performance_tester/__main__.py b/pyjac/performance_tester/__main__.py index eac263c..caa9a69 100644 --- a/pyjac/performance_tester/__main__.py +++ b/pyjac/performance_tester/__main__.py @@ -22,10 +22,20 @@ def main(args=None): help='If True, allows performance_tester to use ' 'any old optimization files found' ) + parser.add_argument('-cl', '--compute_level', + default=20, + type=int, + required=False, + choices=[20, 21, 30, 32, 35, 37, 50, 52, 53, 60, 61, + 62, 70, 71], + help='If True, allows performance_tester to use ' + 'any old optimization files found' + ) args = parser.parse_args() pt.performance_tester(os.path.dirname(os.path.abspath(pt.__file__)), args.working_directory, - args.use_old_opt) + args.use_old_opt, + args.compute_level) if __name__ == '__main__': sys.exit(main()) diff --git a/pyjac/performance_tester/performance_tester.py b/pyjac/performance_tester/performance_tester.py index 145765e..0210748 100644 --- a/pyjac/performance_tester/performance_tester.py +++ b/pyjac/performance_tester/performance_tester.py @@ -175,10 +175,10 @@ def cmd_link(lang, shared): return cmd -def linker(lang, temp_lang, test_dir, filelist, lib=None): +def linker(lang, temp_lang, test_dir, filelist, lib=None, cl=20): args = cmd_link(temp_lang, not STATIC) if lang == 'cuda' or (not STATIC): - args.extend(flags[temp_lang]) + args.extend([f.format(cl=cl) for f in flags[temp_lang]]) args.extend([os.path.join(test_dir, getf(f) + '.o') for f in filelist]) args.extend(['-o', os.path.join(test_dir, 'speedtest')]) if temp_lang == 'cuda': @@ -210,7 +210,7 @@ def linker(lang, temp_lang, test_dir, filelist, lib=None): sys.exit(1) -def performance_tester(home, work_dir, use_old_opt): +def performance_tester(home, work_dir, use_old_opt, cl_level=20): """Runs performance testing for pyJac, TChem, and finite differences. Parameters @@ -221,6 +221,8 @@ def performance_tester(home, work_dir, use_old_opt): Working directory with mechanisms and for data use_old_opt : bool If ``True``, use old optimization files found + cl_level: int [20] + The cuda compute level to use Returns ------- @@ -275,7 +277,7 @@ def false_factory(): import multiprocessing #for cpu count max_cpu = multiprocessing.cpu_count() num_threads = [1] - while num_threads < max_cpu: + while num_threads[-1] < max_cpu: num_threads.append(min(max_cpu, num_threads[-1] * 2)) c_params = {'lang' : 'c', 'cache_opt' : [False], @@ -461,8 +463,8 @@ def false_factory(): #now build the library if lang != 'tchem': lib = generate_library(lang, build_dir, test_dir, - finite_difference=FD, shared=not STATIC - ) + finite_difference=FD, shared=not STATIC, + compute_level=cl_level) lib = os.path.normpath(lib) lib = (lib[lib.index('lib') + @@ -474,8 +476,8 @@ def false_factory(): # Compile generated source code structs = [file_struct(lang, temp_lang, f, i_dirs, (['-DFINITE_DIFF'] if FD else []), - build_dir, test_dir, not STATIC - ) for f in files + build_dir, test_dir, not STATIC, + cl=cl_level) for f in files ] if lang != 'cuda': for s in structs: @@ -488,7 +490,7 @@ def false_factory(): if any(r == -1 for r in results): sys.exit(-1) - linker(lang, temp_lang, test_dir, files, lib) + linker(lang, temp_lang, test_dir, files, lib, cl=cl_level) if lang == 'tchem': #copy periodic table and mechanisms in