Template with dynamic resources (CPU app and GPU app)
Launches either the CPU-only or GPU version of the forces MPI app and auto-assigns ranks and GPU resources as requested by the generator.
This makes efficient use of each node as the expensive GPU simulations will use the GPUs on the node/s, while the rest of the CPU cores are assigned to the simple CPU-only simulations.
See this publication for a real-world demonstration of these capabilities.
- forces_multi_app.forces_simf.run_forces(H, persis_info, sim_specs, libE_info)
Launches the either the CPU-only or GPU version of the forces MPI app and auto-assigns ranks and GPU resources as requested by the generator.
forces_simf.py
1import numpy as np
2
3# Optional status codes to display in libE_stats.txt for each gen or sim
4from libensemble.message_numbers import TASK_FAILED, WORKER_DONE
5
6# Optional - to print GPU settings
7from libensemble.tools.test_support import check_gpu_setting
8
9
10def run_forces(H, persis_info, sim_specs, libE_info):
11 """Launches the either the CPU-only or GPU version of the forces MPI app
12 and auto-assigns ranks and GPU resources as requested by the generator.
13 """
14
15 calc_status = 0
16
17 # Parse out num particles, from generator function
18 particles = str(int(H["x"][0][0]))
19
20 # app arguments: num particles, timesteps, also using num particles as seed
21 args = particles + " " + str(10) + " " + particles
22
23 # Retrieve our MPI Executor
24 exctr = libE_info["executor"]
25
26 app_type = H["app_type"][0].decode()
27
28 # Submit our forces app for execution.
29 task = exctr.submit(
30 app_name=app_type,
31 app_args=args,
32 )
33
34 # Block until the task finishes
35 task.wait()
36
37 # Optional - prints GPU assignment (method and numbers)
38 check_gpu_setting(task, assert_setting=False, print_setting=True, desc=app_type)
39
40 # Stat file to check for bad runs
41 statfile = "forces.stat"
42
43 # Try loading final energy reading, set the sim's status
44 try:
45 data = np.loadtxt(statfile)
46 final_energy = data[-1]
47 calc_status = WORKER_DONE
48 except Exception:
49 final_energy = np.nan
50 calc_status = TASK_FAILED
51
52 # Define our output array, populate with energy reading
53 outspecs = sim_specs["out"]
54 output = np.zeros(1, dtype=outspecs)
55 output["energy"] = final_energy
56
57 # Return final information to worker, for reporting to manager
58 return output, persis_info, calc_status
The generator in the example below assigns to each simulation either the CPU application or the GPU application and also randomly assigns the number of processors for each simulation. For the GPU application, one GPU is used for each MPI rank. As many nodes as necessary will be used for each application.
The special generator output fields “num_procs” and “num_gpus” are automatically picked up by each worker and these will be used when the simulation is run, unless overridden.
Example usage
1#!/usr/bin/env python
2
3"""
4This example runs different applications, one that uses only CPUs and one
5that uses GPUs. Both use a variable number of processors. The GPU application
6uses one GPU per processor. As the generator creates simulations, it randomly
7assigns between one and max_proc processors to each simulation, and also randomly
8assigns which application is to be run.
9
10The forces.c application should be compiled for the CPU to `forces_cpu.x`, and
11for the GPU (setting the GPU preprocessor condition) to `forces_gpu.x`.
12
13For compile lines, see examples in ../forces_app/build_forces.sh.
14
15It is recommended to run this test such that:
16 ((nworkers - 1) - gpus_on_node) >= gen_specs["user"][max_procs]
17
18E.g., if running on one node with four GPUs, then use:
19 python run_libE_forces.py --nworkers 9
20
21E.g., if running on one node with eight GPUs, then use:
22 python run_libE_forces.py --nworkers 17
23"""
24
25import os
26import sys
27
28import numpy as np
29from forces_simf import run_forces # Sim func from current dir
30
31from libensemble import Ensemble
32from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
33from libensemble.executors import MPIExecutor
34from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample_diff_simulations as gen_f
35from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs
36
37if __name__ == "__main__":
38 # Initialize MPI Executor instance
39 exctr = MPIExecutor()
40
41 # Register simulation executable with executor
42 cpu_app = os.path.join(os.getcwd(), "../forces_app/forces_cpu.x")
43 gpu_app = os.path.join(os.getcwd(), "../forces_app/forces_gpu.x")
44
45 if not os.path.isfile(cpu_app):
46 sys.exit(f"{cpu_app} not found - please build first in ../forces_app dir")
47 if not os.path.isfile(gpu_app):
48 sys.exit(f"{gpu_app} not found - please build first in ../forces_app dir")
49
50 exctr.register_app(full_path=cpu_app, app_name="cpu_app")
51 exctr.register_app(full_path=gpu_app, app_name="gpu_app")
52
53 # Parse number of workers, comms type, etc. from arguments
54 ensemble = Ensemble(parse_args=True, executor=exctr)
55 nsim_workers = ensemble.nworkers - 1 # One worker is for persistent generator
56
57 # Persistent gen does not need resources
58 ensemble.libE_specs = LibeSpecs(
59 num_resource_sets=nsim_workers,
60 sim_dirs_make=True,
61 stats_fmt={"show_resource_sets": True}, # see resource sets in libE_stats.txt
62 # resource_info = {"gpus_on_node": 4}, # for mocking GPUs
63 )
64
65 ensemble.sim_specs = SimSpecs(
66 sim_f=run_forces,
67 inputs=["x", "app_type"],
68 outputs=[("energy", float)],
69 )
70
71 ensemble.gen_specs = GenSpecs(
72 gen_f=gen_f,
73 inputs=[], # No input when starting persistent generator
74 persis_in=["sim_id"], # Return sim_ids of evaluated points to generator
75 outputs=[
76 ("x", float, (1,)),
77 ("num_procs", int), # num_procs auto given to sim when using MPIExecutor
78 ("num_gpus", int), # num_gpus auto given to sim when using MPIExecutor
79 ("app_type", "S10"), # select app type (cpu_app or gpu_app)
80 ],
81 user={
82 "initial_batch_size": nsim_workers,
83 "lb": np.array([5000]), # min particles
84 "ub": np.array([10000]), # max particles
85 "max_procs": (nsim_workers) // 2, # Any sim created can req. 1 worker up to max
86 },
87 )
88
89 # Starts one persistent generator. Simulated values are returned in batch.
90 ensemble.alloc_specs = AllocSpecs(
91 alloc_f=alloc_f,
92 user={
93 "async_return": False, # False causes batch returns
94 },
95 )
96
97 # Instruct libEnsemble to exit after this many simulations.
98 ensemble.exit_criteria = ExitCriteria(sim_max=nsim_workers * 2)
99
100 # Seed random streams for each worker, particularly for gen_f.
101 ensemble.add_random_streams()
102
103 # Run ensemble
104 ensemble.run()
105
106 if ensemble.is_manager:
107 # Note, this will change if changing sim_max, nworkers, lb, ub, etc.
108 chksum = np.sum(ensemble.H["energy"])
109 print(f"Final energy checksum: {chksum}")
110
111 exp_chksums = {16: -21935405.696289998, 32: -26563930.6356}
112 exp_chksum = exp_chksums.get(ensemble.exit_criteria.sim_max)
113
114 if exp_chksum is not None:
115 assert np.isclose(chksum, exp_chksum), f"energy check sum is {chksum}"
116 print("Checksum passed")
117 else:
118 print("Run complete. An expected checksum has not been provided for the given sim_max")
More information is available in the Forces GPU tutorial and the video demonstration on Frontier.