Examples#

Hyperparameter sweeps#

This example shows how you can run a hyperparameter sweep in Beaker using beaker-py.

Note

You can find the source code for this example on GitHub.

Setup#

Add the following files to a directory of your choosing:

Dockerfile

# This Dockerfile defines the image that we'll use for all of the sweep experiments
# that we submit to Beaker.

FROM python:3.9

COPY entrypoint.py .

ENTRYPOINT ["python", "entrypoint.py"]

entrypoint.py

"""
This is the script that will run on Beaker as the Docker image's "entrypoint".

All it does is write out a simple JSON file with a random number in it to
the experiment's result directory. This is just meant to simulate the results
of a training/evaluation pipeline.
"""

import json
import random
import sys

# NOTE: it's important that this file is called 'metrics.json'. That tells Beaker
# to collect metrics for the task from this file.
OUTPUT_PATH = "/output/metrics.json"


def main(x: int):
    random.seed(x)
    with open(OUTPUT_PATH, "w") as out_file:
        json.dump({"result": random.random()}, out_file)


if __name__ == "__main__":
    main(int(sys.argv[1]))

run.py

"""
This script will upload an image to Beaker and then submit a bunch
of experiments with different inputs. It will wait for all experiments to finish
and then collect the results.

See the output of 'python run.py --help' for usage.
"""

import argparse
import uuid

import petname
from rich import print, progress, table, traceback

from beaker import *


def unique_name() -> str:
    """Helper function to generate a unique name for the image, group, and each experiment."""
    return petname.generate() + "-" + str(uuid.uuid4())[:8]  # type: ignore


def main(image: str, workspace: str):
    beaker = Beaker.from_env(default_workspace=workspace)
    sweep_name = unique_name()
    print(f"Starting sweep '{sweep_name}'...\n")

    # Using the `beaker.session()` context manager is not necessary, but it does
    # speed things up since it allows the Beaker client to reuse the same TCP connection
    # for all requests made within-context.
    with beaker.session():
        # Upload image to Beaker.
        print(f"Uploading image '{image}' to Beaker...")
        beaker_image = beaker.image.create(unique_name(), image)
        print(
            f"Image uploaded as '{beaker_image.full_name}', view at {beaker.image.url(beaker_image)}\n"
        )

        # Launch experiments.
        experiments = []
        for x in progress.track(range(5), description="Launching experiments..."):
            spec = ExperimentSpec.new(
                "ai2/allennlp",
                description=f"Run {x+1} of sweep {sweep_name}",
                beaker_image=beaker_image.full_name,
                result_path="/output",
                priority=Priority.preemptible,
                arguments=[str(x)],
            )
            experiment = beaker.experiment.create(f"{sweep_name}-{x+1}", spec)
            experiments.append(experiment)
        print()

        # Create group.
        print("Creating group for sweep...")
        group = beaker.group.create(
            sweep_name, *experiments, description="Group for sweep {sweep_name}"
        )
        print(f"Group '{group.full_name}' created, view at {beaker.group.url(group)}\n")

        # Wait for experiments to finish.
        print("Waiting for experiments to finalize...\n")
        experiments = beaker.experiment.wait_for(*experiments)
        print()

        # Display results as a table.
        results_table = table.Table(title="Results for sweep")
        results_table.add_column("Input")
        results_table.add_column("Output")
        for x, experiment in enumerate(
            progress.track(experiments, description="Gathering results...")
        ):
            metrics = beaker.experiment.metrics(experiment)
            assert metrics is not None
            results_table.add_row(f"x = {x}", f"{metrics['result']:.4f}")
        print()
        print(results_table)


if __name__ == "__main__":
    traceback.install()

    parser = argparse.ArgumentParser(description="Run a hyperparameter sweep in Beaker")
    parser.add_argument(
        "image", type=str, help="""The tag of the local Docker image built from the Dockerfile."""
    )
    parser.add_argument("workspace", type=str, help="""The Beaker workspace to use.""")
    opts = parser.parse_args()

    main(image=opts.image, workspace=opts.workspace)

Running it#

To run it, first build the Docker image:

image=sweep
docker build -t $image .

Then launch the sweep with:

workspace=ai2/my-sweep  # change this to the workspace of your choosing
cluster=ai2/petew-cpu  # change this to the cluster of your choosing
python run.py $image $workspace $cluster