Template with dynamic resources (CPU app and GPU app)

Launches either the CPU-only or GPU version of the forces MPI app and auto-assigns ranks and GPU resources as requested by the generator.

This makes efficient use of each node as the expensive GPU simulations will use the GPUs on the node/s, while the rest of the CPU cores are assigned to the simple CPU-only simulations.

See this publication for a real-world demonstration of these capabilities.

forces_multi_app.forces_simf.run_forces(H, persis_info, sim_specs, libE_info)

Launches the either the CPU-only or GPU version of the forces MPI app and auto-assigns ranks and GPU resources as requested by the generator.

forces_simf.py
 1import numpy as np
 2
 3# Optional status codes to display in libE_stats.txt for each gen or sim
 4from libensemble.message_numbers import TASK_FAILED, WORKER_DONE
 5
 6# Optional - to print GPU settings
 7from libensemble.tools.test_support import check_gpu_setting
 8
 9
10def run_forces(H, persis_info, sim_specs, libE_info):
11    """Launches the either the CPU-only or GPU version of the forces MPI app
12    and auto-assigns ranks and GPU resources as requested by the generator.
13    """
14
15    calc_status = 0
16
17    # Parse out num particles, from generator function
18    particles = str(int(H["x"][0][0]))
19
20    # app arguments: num particles, timesteps, also using num particles as seed
21    args = particles + " " + str(10) + " " + particles
22
23    # Retrieve our MPI Executor
24    exctr = libE_info["executor"]
25
26    app_type = H["app_type"][0].decode()
27
28    # Submit our forces app for execution.
29    task = exctr.submit(
30        app_name=app_type,
31        app_args=args,
32    )
33
34    # Block until the task finishes
35    task.wait()
36
37    # Optional - prints GPU assignment (method and numbers)
38    check_gpu_setting(task, assert_setting=False, print_setting=True, desc=app_type)
39
40    # Stat file to check for bad runs
41    statfile = "forces.stat"
42
43    # Try loading final energy reading, set the sim's status
44    try:
45        data = np.loadtxt(statfile)
46        final_energy = data[-1]
47        calc_status = WORKER_DONE
48    except Exception:
49        final_energy = np.nan
50        calc_status = TASK_FAILED
51
52    # Define our output array,  populate with energy reading
53    outspecs = sim_specs["out"]
54    output = np.zeros(1, dtype=outspecs)
55    output["energy"] = final_energy
56
57    # Return final information to worker, for reporting to manager
58    return output, persis_info, calc_status

The generator in the example below assigns to each simulation either the CPU application or the GPU application and also randomly assigns the number of processors for each simulation. For the GPU application, one GPU is used for each MPI rank. As many nodes as necessary will be used for each application.

The special generator output fields “num_procs” and “num_gpus” are automatically picked up by each worker and these will be used when the simulation is run, unless overridden.

Example usage
  1#!/usr/bin/env python
  2
  3"""
  4This example runs different applications, one that uses only CPUs and one
  5that uses GPUs. Both use a variable number of processors. The GPU application
  6uses one GPU per processor. As the generator creates simulations, it randomly
  7assigns between one and max_proc processors to each simulation, and also randomly
  8assigns which application is to be run.
  9
 10The forces.c application should be compiled for the CPU to `forces_cpu.x`, and
 11for the GPU (setting the GPU preprocessor condition) to `forces_gpu.x`.
 12
 13For compile lines, see examples in ../forces_app/build_forces.sh.
 14
 15It is recommended to run this test such that:
 16    ((nworkers - 1) - gpus_on_node) >= gen_specs["user"][max_procs]
 17
 18E.g., if running on one node with four GPUs, then use:
 19    python run_libE_forces.py --nworkers 9
 20
 21E.g., if running on one node with eight GPUs, then use:
 22    python run_libE_forces.py --nworkers 17
 23"""
 24
 25import os
 26import sys
 27
 28import numpy as np
 29from forces_simf import run_forces  # Sim func from current dir
 30
 31from libensemble import Ensemble
 32from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
 33from libensemble.executors import MPIExecutor
 34from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample_diff_simulations as gen_f
 35from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs
 36
 37if __name__ == "__main__":
 38    # Initialize MPI Executor instance
 39    exctr = MPIExecutor()
 40
 41    # Register simulation executable with executor
 42    cpu_app = os.path.join(os.getcwd(), "../forces_app/forces_cpu.x")
 43    gpu_app = os.path.join(os.getcwd(), "../forces_app/forces_gpu.x")
 44
 45    if not os.path.isfile(cpu_app):
 46        sys.exit(f"{cpu_app} not found - please build first in ../forces_app dir")
 47    if not os.path.isfile(gpu_app):
 48        sys.exit(f"{gpu_app} not found - please build first in ../forces_app dir")
 49
 50    exctr.register_app(full_path=cpu_app, app_name="cpu_app")
 51    exctr.register_app(full_path=gpu_app, app_name="gpu_app")
 52
 53    # Parse number of workers, comms type, etc. from arguments
 54    ensemble = Ensemble(parse_args=True, executor=exctr)
 55    nsim_workers = ensemble.nworkers - 1  # One worker is for persistent generator
 56
 57    # Persistent gen does not need resources
 58    ensemble.libE_specs = LibeSpecs(
 59        num_resource_sets=nsim_workers,
 60        sim_dirs_make=True,
 61        stats_fmt={"show_resource_sets": True},  # see resource sets in libE_stats.txt
 62        # resource_info = {"gpus_on_node": 4},  # for mocking GPUs
 63    )
 64
 65    ensemble.sim_specs = SimSpecs(
 66        sim_f=run_forces,
 67        inputs=["x", "app_type"],
 68        outputs=[("energy", float)],
 69    )
 70
 71    ensemble.gen_specs = GenSpecs(
 72        gen_f=gen_f,
 73        inputs=[],  # No input when starting persistent generator
 74        persis_in=["sim_id"],  # Return sim_ids of evaluated points to generator
 75        outputs=[
 76            ("x", float, (1,)),
 77            ("num_procs", int),  # num_procs auto given to sim when using MPIExecutor
 78            ("num_gpus", int),  # num_gpus auto given to sim when using MPIExecutor
 79            ("app_type", "S10"),  # select app type (cpu_app or gpu_app)
 80        ],
 81        user={
 82            "initial_batch_size": nsim_workers,
 83            "lb": np.array([5000]),  # min particles
 84            "ub": np.array([10000]),  # max particles
 85            "max_procs": (nsim_workers) // 2,  # Any sim created can req. 1 worker up to max
 86        },
 87    )
 88
 89    # Starts one persistent generator. Simulated values are returned in batch.
 90    ensemble.alloc_specs = AllocSpecs(
 91        alloc_f=alloc_f,
 92        user={
 93            "async_return": False,  # False causes batch returns
 94        },
 95    )
 96
 97    # Instruct libEnsemble to exit after this many simulations.
 98    ensemble.exit_criteria = ExitCriteria(sim_max=nsim_workers * 2)
 99
100    # Seed random streams for each worker, particularly for gen_f.
101    ensemble.add_random_streams()
102
103    # Run ensemble
104    ensemble.run()
105
106    if ensemble.is_manager:
107        # Note, this will change if changing sim_max, nworkers, lb, ub, etc.
108        chksum = np.sum(ensemble.H["energy"])
109        print(f"Final energy checksum: {chksum}")
110
111        exp_chksums = {16: -21935405.696289998, 32: -26563930.6356}
112        exp_chksum = exp_chksums.get(ensemble.exit_criteria.sim_max)
113
114        if exp_chksum is not None:
115            assert np.isclose(chksum, exp_chksum), f"energy check sum is {chksum}"
116            print("Checksum passed")
117        else:
118            print("Run complete. An expected checksum has not been provided for the given sim_max")

More information is available in the Forces GPU tutorial and the video demonstration on Frontier.