Skip to content

Commit 1be130f

Browse files
committed
integrated colocated docker changes in Dockerfile
1 parent 6973808 commit 1be130f

File tree

3 files changed

+42
-3
lines changed

3 files changed

+42
-3
lines changed

axlearn/cloud/gcp/bundler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def _build_and_push(self, *args, **kwargs):
140140
cfg.target=None
141141

142142
colocated_bundler_class = ColocatedArtifactRegistryBundler(cfg=cfg)
143-
colocated_image_name = colocated_bundler_class.bundle(tag="latest")
143+
colocated_image_name = colocated_bundler_class.bundle(tag=cfg.image)
144144

145145
cfg.dockerfile=actual_dockerfile
146146
cfg.image=actual_name

axlearn/cloud/gcp/pathways_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@
8989

9090
def get_colocated_python_image(colocated_image_name, fv: flags.FlagValues = FLAGS) -> str:
9191
repo = gcp_settings("docker_repo", required=False, fv=fv)
92-
return repo+"/"+colocated_image_name+":latest"
92+
return repo+"/"+colocated_image_name+":"+colocated_image_name
9393

9494

9595
def parse_xla_flag_value(value: str) -> Union[int, bool, str]:

colocated_commands.txt

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
export NAME=axlearn-img
55
export COLOCATED_NAME=colocated-img
66
export CKPT_BUCKET_NAME=<>
7+
export CLUSTER_NAME=<>
78

89
axlearn gcp bundle --name=$NAME \
910
--bundler_spec=allow_dirty=True \
@@ -17,7 +18,7 @@ axlearn gcp bundle --name=$NAME \
1718

1819

1920

20-
axlearn gcp launch run --cluster=mlperf-v5p \
21+
axlearn gcp launch run --cluster=$CLUSTER_NAME \
2122
--runner_name gke_tpu_pathways \
2223
--name=$NAME \
2324
--instance_type=tpu-v5p-32 \
@@ -29,3 +30,41 @@ axlearn gcp launch run --cluster=mlperf-v5p \
2930
--bundler_spec=target=tpu \
3031
--colocated_image=$COLOCATED_NAME \
3132
-- TPU_PREMAPPED_BUFFER_SIZE=34359738368 python3 test_benchmark.py --ckpt_path $CKPT_BUCKET_NAME
33+
34+
35+
#### Commands to build images separately ######
36+
37+
export NAME=lk-axlearnimg13
38+
export COLOCATED_NAME=colocated-image23
39+
export CKPT_BUCKET_NAME=<>
40+
export CLUSTER_NAME=<>
41+
42+
### colocated image #####
43+
axlearn gcp bundle --name=$COLOCATED_NAME \
44+
--bundler_spec=allow_dirty=True \
45+
--bundler_type=artifactregistry \
46+
--bundler_spec=dockerfile=Dockerfile \
47+
--bundler_spec=image=$COLOCATED_NAME \
48+
--bundler_spec=target=colocated-python
49+
50+
### axlearn image #####
51+
axlearn gcp bundle --name=$NAME \
52+
--bundler_spec=allow_dirty=True \
53+
--bundler_type=artifactregistry \
54+
--bundler_spec=dockerfile=Dockerfile \
55+
--bundler_spec=image=tpu \
56+
--bundler_spec=target=tpu
57+
58+
axlearn gcp launch run --cluster=$CLUSTER_NAME \
59+
--runner_name gke_tpu_pathways \
60+
--name=$NAME \
61+
--instance_type=tpu-v5p-32 \
62+
--num_replicas=1 \
63+
--bundler_spec=allow_dirty=True \
64+
--bundler_type=artifactregistry \
65+
--bundler_spec=image=tpu \
66+
--bundler_spec=dockerfile=Dockerfile \
67+
--bundler_spec=target=tpu \
68+
--colocated_image=$COLOCATED_NAME \
69+
-- TPU_PREMAPPED_BUFFER_SIZE=34359738368 python3 test_benchmark.py --ckpt_path $CKPT_BUCKET_NAME
70+

0 commit comments

Comments
 (0)