From bbc5e48713e322416786877d61f4571c8b6ac050 Mon Sep 17 00:00:00 2001 From: Siddharth Jain Date: Sun, 14 Jan 2024 00:57:53 +0530 Subject: [PATCH 01/52] Cleaned up POSET-RL python side code (cherry picked from commit 3e77ea6f47a70330d6281dd86c5c1e9cef432798) --- model/LoopDistribution/src/inference.py | 437 +++++++++++++++++ model/POSET-RL/Environment_pipe.py | 614 ++++++++++++++++++++++++ model/POSET-RL/Filesystem.py | 38 ++ model/POSET-RL/experiment.py | 171 +++++++ model/POSET-RL/inference.py | 283 +++++++++++ model/POSET-RL/model.py | 41 ++ 6 files changed, 1584 insertions(+) create mode 100644 model/LoopDistribution/src/inference.py create mode 100755 model/POSET-RL/Environment_pipe.py create mode 100755 model/POSET-RL/Filesystem.py create mode 100755 model/POSET-RL/experiment.py create mode 100755 model/POSET-RL/inference.py create mode 100755 model/POSET-RL/model.py diff --git a/model/LoopDistribution/src/inference.py b/model/LoopDistribution/src/inference.py new file mode 100644 index 000000000000..572515b5c18a --- /dev/null +++ b/model/LoopDistribution/src/inference.py @@ -0,0 +1,437 @@ +import argparse + +# import collections +from argparse import Namespace +from atexit import register +from distutils.command.config import config +from itertools import count + +# from email import parser +import grpc +from concurrent import futures +from tqdm import tqdm +import os +import json +import glob +from ld_config import MODEL_PATH, TEST_DIR, BUILD_DIR, MODEL_DIR +import traceback +import sys + + +sys.path.extend( + [ + f"{BUILD_DIR}/MLCompilerBridge/MLModelRunner/gRPCModelRunner/Python-Utilities", + f"{MODEL_DIR}", + # f"{REPO_DIR}/llvm/lib/Transforms/models" + ] +) +import LoopDistribution_pb2, LoopDistribution_pb2_grpc +import ray +from ray import tune +from ray.rllib.agents import ppo + +from simple_q import SimpleQTrainer, DEFAULT_CONFIG +from multiagentEnv import DistributeLoopEnv +# from register_action_space import RegisterActionSpace +from ray.rllib.models import ModelCatalog +from model import SelectNodeNetwork, DistributionTask +import logging +import SerDes + +from gym.spaces import Discrete, Box, Dict +import numpy as np +from ray.tune import function +from ray.rllib.utils.torch_ops import FLOAT_MIN, FLOAT_MAX + +logger = logging.getLogger(__file__) +logging.basicConfig( + filename="inference.log", + format="%(levelname)s - %(filename)s - %(message)s", + level=logging.DEBUG, +) + +import networkx +import json + +# from dqn_agent import Agent +import torch +from argparse import Namespace +import pydot +from networkx.readwrite import json_graph + +from typing import Callable, List, Union, Optional +import io +import math +import ctypes +import log_reader +from log_reader import TensorSpec +from functools import reduce +import operator + + +parser = argparse.ArgumentParser() +parser.add_argument( + "--use_pipe", + action="store_true", + help="Use pipe communication", + required=False, + default=False, +) +parser.add_argument( + "--use_grpc", + action="store_true", + help="Use grpc communication", + required=False, + default=False, +) +parser.add_argument( + "--data_format", + type=str, + choices=["json", "protobuf", "bytes"], + help="Data format to use for communication", +) +parser.add_argument("--pipe_name", type=str, help="Pipe name to use for communication", default="loopdistppipe") +parser.add_argument("--server_port", type=str, help="Server port") + +class DistributionInference: + def __init__(self, model_path, use_pipe=False, data_format=None): + logdir = "/tmp" + logger = logging.getLogger(__file__) + logging.basicConfig( + filename="running.log", + format="%(levelname)s - %(filename)s - %(message)s", + level=logging.DEBUG, + ) + + logger = logging.getLogger(__file__) + logging.basicConfig( + filename=os.path.join(logdir, "loop-distribution.log"), + format="%(levelname)s - %(filename)s - %(message)s", + level=logging.DEBUG, + ) + + config = DEFAULT_CONFIG.copy() + config["num_workers"] = 0 + config["explore"] = False + + from ray.tune.registry import register_env + + config["env_config"]["target"] = "X86" + config["env_config"]["state_size"] = 300 + + config["env_config"]["mode"] = "inference" + config["env_config"]["dump_type"] = "One" + config["env_config"]["intermediate_data"] = "./temp" + config["env_config"]["use_pipe"] = use_pipe + config["env_config"]["data_format"] = data_format + + ModelCatalog.register_custom_model("select_node_model", SelectNodeNetwork) + ModelCatalog.register_custom_model("distribution_model", DistributionTask) + + box_obs = Box( + FLOAT_MIN, + FLOAT_MAX, + shape=(config["env_config"]["state_size"],), + dtype=np.float32, + ) + box_obs_select_node = Box( + FLOAT_MIN, + FLOAT_MAX, + shape=( + config["env_config"]["max_number_nodes"], + config["env_config"]["state_size"], + ), + dtype=np.float32, + ) + + obs_select_node = Dict( + { + "action_mask": Box( + 0, 1, shape=(config["env_config"]["max_number_nodes"],) + ), + "state": box_obs_select_node, + } + ) + + obs_distribute_node = Dict( + { + "prev_Node": box_obs, + "curr_Node": box_obs, + "dist_flag": Box(0, 1, shape=(1,)), + "action_mask": Box(0, 1, shape=(2,)), + # "state": box_obs + } + ) + + def policy_mapping_fn(agent_id, episode=None, **kwargs): + if agent_id.startswith("select_node_agent"): + return "select_node_policy" + elif agent_id.startswith("distribution_agent"): + return "distribution_policy" + + policies = { + "select_node_policy": ( + None, + obs_select_node, + Discrete(config["env_config"]["max_number_nodes"]), + { + "gamma": 0.9, + "model": { + "custom_model": "select_node_model", + "custom_model_config": { + "state_size": config["env_config"]["state_size"], + "fc1_units": 64, + "fc2_units": 64, + }, + }, + }, + ), + "distribution_policy": ( + None, + obs_distribute_node, + Discrete(2), + { + "gamma": 0.9, + "model": { + "custom_model": "distribution_model", + "custom_model_config": { + "state_size": config["env_config"]["state_size"], + "fc1_units": 64, + "fc2_units": 64, + }, + }, + }, + ), + # "vectorization_policy": (None, obs_vectorization_node, + # ) + } + + config["multiagent"] = { + "policies": policies, + "policy_mapping_fn": function(policy_mapping_fn), + } + + # def env_creator(env_config): + # return DistributeLoopEnv(env_config) + # register_env("Environment", env_creator) + + self.trained_agent = SimpleQTrainer(env=DistributeLoopEnv, config=config) + # self.train_agent = DistributionInference(model_path, test_dir) + # logging.info("{} {}".format(self.trained_agent, type(self.trained_agent))) + checkpoint = model_path + self.trained_agent.restore(checkpoint) + + self.config = config + + self.temp_rootname = "/tmp/loopdistppipe" + self.tc = None + self.fc = None + self.tensor_specs = None + self.advice_spec = None + + # config = config["env_config"] + # self.env = DistributeLoopEnv(env_config) + + def dot_to_json(self, dot_): + py_dot_graph = pydot.graph_from_dot_data(dot_)[0] + graph_netx = networkx.drawing.nx_pydot.from_pydot(py_dot_graph) + graph_json = json_graph.adjacency_data(graph_netx) + return graph_json + + def run_predict(self, test_file): + env = DistributeLoopEnv(self.config["env_config"]) + + # Use for running with custom_loop_distribution + graph = self.dot_to_json(test_file) + obs = env.reset(graph) + + env.advice_spec = self.advice_spec + env.tc = self.tc + env.fc = self.fc + env.temp_rootname = self.temp_rootname + # Use for running directly inference.py + # obs = env.reset(test_file) + + score = 0 + while True: + logging.debug("-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-") + + # return the color index for a node + # print("state {}".format(obs)) + action = {} + for agent_id, agent_obs in obs.items(): + # print("agent_id: {}".format(agent_id)) + # print("agent_obs: {}".format(agent_obs)) + policy_id = self.config["multiagent"]["policy_mapping_fn"](agent_id) + action[agent_id] = self.trained_agent.compute_action( + agent_obs, policy_id=policy_id + ) + print("action: {}".format(action[agent_id])) + + obs, reward, done, response = env.step(action) + done = done["__all__"] + # sum up reward for all agents + # episode_reward += sum(reward.values()) + + # action = self.trained_agent.compute_action(state) + + # next_state, reward, done, response = env.step(action) + logging.debug("reward : {}".format(reward)) + + # state = next_state + if done: + with open("actionlist.txt", "a") as actionfile: + actionfile.write(str(test_file) + "\n") + assert response is not None, "Allocation is not preset." + break + response = env.partition_seq + print("response: {}".format(response)) + return reward, response + + def run_predict_multiple_loops(self, rdgs): + # Load the envroinment + # env = DistributeLoopEnv(config) + # seqs = [] + dist_seq = [] + # vf_seq = [] + for rdg in rdgs: + # reward, seqs = self.run_predict(rdg) + reward, seqs = self.run_predict(rdg) + print("seqs: {}".format(seqs)) + dist_seq.append(seqs) + # vf_seq.append(seqs[1]) + + count = 0 + + select_node_agent = "select_node_agent_{}".format(count) + distribution_agent = "distribution_agent_{}".format(count) + + return [dist_seq] + + +def predict_loop_distribution(rdgs: list, trained_dist_model: str): + print("trained_dist_model: {}".format(trained_dist_model)) + sys.argv.append("") + ray.init() + + inference_obj = DistributionInference(trained_dist_model) + # agent.distribution_task.net_local.load_state_dict(torch.load(trained_dist_model, map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu"))) + # agent.vectorization_task.net_local.load_state_dict(torch.load(trained_vec_model, map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu"))) + print("Start the inference....") + logging.info("Start the inference....") + seqs = inference_obj.run_predict_multiple_loops(rdgs) + logging.info("Distrubuted seqs : {}".format(seqs)) + ray.shutdown() + + return seqs + +def run_pipe_communication(data_format, pipe_name): + def parseObservation(obs): + if data_format == "json": + if "Exit" in obs.keys(): + return "Exit" + return obs["RDG"] + elif data_format == "bytes": + if obs[0].spec().name == "Exit": + return "Exit" + rdg = "".join(chr(int(x)) for x in obs[0]) + return rdg + elif data_format == "protobuf": + pass + + ray.init() + inference_obj = DistributionInference(MODEL_PATH, data_format=data_format) + inference_obj.use_pipe = True + print("Inference model created, using pipe:", pipe_name) + serdes = SerDes.SerDes(data_format, "/tmp/" + pipe_name) + print("Serdes init...") + serdes.init() + + with open(f'{data_format}_seq_output.log', 'w') as f: + while True: + try: + print("Entered while loop...") + msg = serdes.readObservation() + msg = parseObservation(msg) + if msg == "Exit": + out = 1 + serdes.sendData(out) + continue + _, seq = inference_obj.run_predict(msg) + f.write(str(seq) + "\n") + serdes.sendData(seq) + except Exception as e: + print("*****Exception occured*******: ", e) + serdes.init() + +class service_server(LoopDistribution_pb2_grpc.LoopDistribution): + def __init__(self, inference_obj) -> None: + self.inference_obj = inference_obj + + def getAdvice(self, request, context): + try: + done = False + while not done: + msg = request + if msg == "Exit": + out = 1 + continue + _, seq = self.inference_obj.run_predict(msg) + return seq + except Exception as e: + print('Error') + traceback.print_exc() + reply = LoopDistribution_pb2.Advice(action=[]) + return reply + +if __name__ == "__main__": + args = parser.parse_args() + use_pipe = args.use_pipe + use_grpc = args.use_grpc + if not use_pipe and not use_grpc: + model_path = MODEL_PATH + test_dir = TEST_DIR + args = { + "no_render": True, + "checkpoint": model_path, + "run": "SimpleQ", + "env": "", + "config": {}, + "video_dir": "", + "steps": 0, + "episodes": 0, + "arch": "X86", + } + args = Namespace(**args) + + rdgs = [] + for path in glob.glob(os.path.join(test_dir, "*.json")): + with open(path) as f: + # print(json.dumps(json.load(f))) + rdgs.append(json.load(f)) + # rdgs.append(json.dumps(json.load(f))) + + predict_loop_distribution(rdgs, model_path) + + # for file in os.listdir(test_dir): + # reward, count = inference_obj.run_predict(file) + # # action, count = inference_obj.compute_action(file) + + select_node_agent = "select_node_agent_{}".format(count) + distribution_agent = "distribution_agent_{}".format(count) + + if use_pipe: + run_pipe_communication(args.data_format, args.pipe_name) + elif use_grpc: + server = grpc.server(futures.ThreadPoolExecutor(max_workers=10), options = [ + ('grpc.max_send_message_length', 200*1024*1024), #50MB + ('grpc.max_receive_message_length', 200*1024*1024) #50MB + ]) + ray.init() + inference_obj = DistributionInference(MODEL_PATH) + inference_obj.use_pipe = False + LoopDistribution_pb2_grpc.add_LoopDistributionServicer_to_server(service_server(inference_obj), server) + server.add_insecure_port('localhost:' + args.server_port) + server.start() + print("Server running at port: " + args.server_port) + server.wait_for_termination() \ No newline at end of file diff --git a/model/POSET-RL/Environment_pipe.py b/model/POSET-RL/Environment_pipe.py new file mode 100755 index 000000000000..983908bbe1a1 --- /dev/null +++ b/model/POSET-RL/Environment_pipe.py @@ -0,0 +1,614 @@ +# Defines environment for the RL model + +import os +import gym +import subprocess +import sys +import numpy as np +from gym.spaces import Discrete, Box, Dict +from Filesystem import * +import tempfile +import time +from ray.rllib.utils.torch_ops import FLOAT_MIN, FLOAT_MAX +from tqdm import tqdm +import logging +from google.protobuf.json_format import MessageToJson +import json +from po_config import BUILD_DIR, CONFIG_DIR +import grpc +sys.path.append(f"{BUILD_DIR}/MLCompilerBridge/MLModelRunner/gRPCModelRunner/Python-Utilities/") +import posetRL_pb2_grpc, posetRL_pb2 +from google.protobuf.empty_pb2 import Empty +from typing import Union + +sys.path.append(f"{BUILD_DIR}/MLCompilerBridge/CompilerInterface/") +from PipeCompilerInterface import PipeCompilerInterface +from GrpcCompilerInterface import GrpcCompilerInterface + +#import pipeCompilerInterface +empty_message = Empty() + + +class PhaseOrder(gym.Env): + def __init__(self, config): + self.ENV_Dir = None + self.BaseIR = None + self.CurrIR = None + self.Curr_Dir = None + self.minBinarySize = None + self.lastBinarySize = None + self.baseBinarySize = None + self.lastMcaThroughtput = None + self.OzMcaThroughtput = None + self.doneList = [] + self.StateIndex = 0 + self.embedding = None + self.iteration_counter = 0 + self.rename_Dir = False + self.FileSys_Obj = fsystem(config["llvm_dir"], config["ir2vec_dir"]) + self.FileSys_Obj.createFolder("env") + self.temporaryDirectory = tempfile.gettempdir() + + self.clang_arch_flag = "-mcpu=cortex-a72" if config["target"] == "AArch64" else "" + self.opt_arch_flag = "--mcpu=cortex-a72" if config["target"] == "AArch64" else "" + + self.alpha = config["alpha"] + self.beta = config["beta"] + self.size_reward_thresh = config["size_reward_thresh"] + self.mca_reward_thresh = config["mca_reward_thresh"] + + # Action space size with optimization sub-sequences obtained from ODG + self.action_space_size = config["action_space_size"] + self.action_space = Discrete(self.action_space_size) + self.action_count = 0 + self.cur_action_seq = [] + self.cur_action_mask = [1] * self.action_space_size + self.mode = "train" + self.Obs = None + obs_space = Box(FLOAT_MIN, FLOAT_MAX, + shape=(config["state_size"], ), dtype=np.float32) + self.observation_space = Dict({"action_mask": Box( + 0, 1, shape=(self.action_space_size,)), "state": obs_space}) + + self.mode = config["mode"] + self.grpc_rtt = 0 + if "worker_index" in config.keys(): + self.worker_index = config.worker_index + else: + self.worker_index = 0 + + if self.mode != 'inference': + self.FileSys_Obj.createFolder("env") + self.make(os.path.abspath(config["train_dir"])) + self.train_Dir = os.path.abspath(config["train_dir"]) + + else: + self.FileSys_Obj.createFolder("inference") + self.FileSys_Obj.TrainingDataPath = os.path.join( + self.FileSys_Obj.PhaseOrderDir, "inference") + self.test_Benchmark = os.path.join( + self.FileSys_Obj.PhaseOrderDir, config["test_dir"]) + + self.assembly_file_path = f"{self.temporaryDirectory}/assemblyfile_{self.worker_index}.s" + + logger = logging.getLogger("__file__") + log_level = logging.DEBUG + if os.path.exists("env.log"): + os.remove("env.log") + logging.basicConfig( + filename='env.log', format='%(levelname)s - %(filename)s - %(message)s', level=log_level) + + # pipes opening + self.data_format = config["data_format"] + self.use_pipe = config["use_pipe"] + self.tensor_specs = None + self.advice_spec = None + + self.temp_rootname = "/tmp/" + config["pipe_name"] + if self.use_pipe: + self.compiler_interface = PipeCompilerInterface(self.data_format, self.temp_rootname) + + self.use_grpc = config["use_grpc"] + if self.use_grpc: + self.compiler_interface = None + self.is_init = True + + self.server_port = config["server_port"] + + def make(self, TrainingPath): + self.FileSys_Obj.generateTrainingData(TrainingPath) + self.Obs = self.FileSys_Obj.LLFileList + + def getEmbedding(self, fileName) : + EmbFile = self.Curr_Dir + "/" + str(self.StateIndex) + # Get IR2Vec FlowAware embeddings + command = self.FileSys_Obj.IR2VecBin + " -fa -vocab " + \ + self.FileSys_Obj.SeedEmbeddingPath + " -o " + EmbFile + " -level p " + fileName + os.system(command) + emb = np.loadtxt(EmbFile) + # Threshold for embedding values + emb[emb > 100000.0] = 100000.0 + emb[emb < -100000.0] = -100000.0 + return emb + + def createEnv(self, fileName): + # env folder will contain folders for separate files with ll and executables + if self.mode != 'inference': + self.ENV_Dir = os.path.join(self.FileSys_Obj.PhaseOrderDir, "env") + else: + self.ENV_Dir = os.path.join( + self.FileSys_Obj.PhaseOrderDir, "inference") + + # setting current directory to point to the folder for the chosen file + self.Curr_Dir = self.ENV_Dir + "/" + os.path.splitext(fileName)[0] + logging.info("Curr_Dir {}".format(self.Curr_Dir)) + + # Creating the folder for the chosen file + self.FileSys_Obj.createFolder(self.Curr_Dir, True) + + # Copying the LL file from training folder to newly created folder + if self.mode != 'inference': + self.FileSys_Obj.copyFile(os.path.join( + self.FileSys_Obj.TrainingDataPath, fileName), self.Curr_Dir) + else: + # quiet# print("test_Benchmark {}".format(self.test_Benchmark)) + logging.info("test_Benchmark {}".format(self.test_Benchmark)) + self.FileSys_Obj.copyFile(os.path.join( + self.test_Benchmark, fileName), self.Curr_Dir) + + # Setting up different Paths and Minimum Size + self.BaseIR = os.path.join(self.Curr_Dir, fileName) + self.baseBinarySize, self.minBinarySize = self.getBinarySize( + self.BaseIR, True) + self.lastBinarySize = self.baseBinarySize + + self.CurrIR = os.path.join(self.Curr_Dir, fileName) + self.prev_action = None + + def reset(self, test_file=None, embedding=None): + self.BaseIR = None + self.CurrIR = None + self.Curr_Dir = None + self.minBinarySize = None + self.lastBinarySize = 0 + self.baseBinarySize = None + self.embedding = None + self.stub = None + self.StateIndex = 0 + self.cur_action_mask = [1] * self.action_space_size + + if self.mode != 'inference': + logging.info("Number of files {}".format(len(self.Obs))) + if (len(self.Obs) >= 1): + + index = np.random.random_integers(0, len(self.Obs) - 1) + + self.serverId = self.startServer( + self.Obs[index], "127.0.0.1:" + str(self.server_port)) + # print("Server started at pid:", self.serverId) + + if self.use_grpc and self.compiler_interface is None: + self.compiler_interface = GrpcCompilerInterface(mode='client', stub_class=posetRL_pb2_grpc.PosetRLServiceStub, hostip='127.0.0.1', hostport= self.server_port) + + self.createEnv(self.Obs[index]) + self.doneList.append(self.Obs[index]) + self.Obs.remove(self.Obs[index]) + if (len(self.Obs) == 0): + self.Obs = self.doneList.copy() + self.doneList.clear() + self.iteration_counter += 1 + self.rename_Dir = True + + else: + if not self.use_pipe and not self.use_grpc: + self.Obs = test_file + logging.info("test_file {}".format(test_file)) + index = np.random.random_integers(0, len(self.Obs) - 1) + logging.info("Obs {}".format(index)) + self.createEnv(test_file) + + + # Opening pipe files + if self.use_pipe: + if self.is_init: + self.compiler_interface.reset_pipes() + self.is_init = False + + result = self.readObservation() # DEBUG + + if result is None: + raise + else: + self.embedding = result + elif self.use_grpc: + if self.mode == 'inference': + self.embedding = np.array(embedding) + else: + self.embedding = self.stable_grpc("Action", 0) # LLVMgRPC way + else: + self.embedding = self.getEmbedding(self.BaseIR) + + action_mask = [1] * self.action_space_size + next_observation = {'action_mask': np.array( + action_mask), 'state': self.embedding} + self.cur_obs = next_observation + + return next_observation + + def readObservation(self): + embedding = np.empty([300]) + features = self.compiler_interface.evaluate() + + + if self.data_format == "bytes": + for i in range(len(features[0])): + embedding[i] = features[0][i] + elif self.data_format == "json": + for i in range(len(features["embedding"])): + embedding[i] = features["embedding"][i] + + return embedding + + + + def sendResponse(self, value: Union[int, float]): + self.compiler_interface.populate_buffer(int(value)) + + def getBinarySize(self, IRFile, init=False): + fileName = os.path.splitext(os.path.basename(IRFile))[0] + minBinarySize = 0 + baseBinarySize = 0 + if (init): + # Compute O0 Binary size + command = self.FileSys_Obj.ClangPath + " " + self.clang_arch_flag + " -c " + \ + self.Curr_Dir + "/" + fileName + ".ll -o " + \ + self.Curr_Dir + "/" + "base_binary.o" + f" -mllvm -ml-config-path={CONFIG_DIR}" + # print("O0 binary object compile command: "+command) + os.system(command) + baseBinarySize = os.path.getsize(self.Curr_Dir + "/base_binary.o") + logging.info("base {}".format(baseBinarySize)) + + # Compute Oz Binary size + command = self.FileSys_Obj.OptPath + " " + self.opt_arch_flag + " -S -add-size-attr --enableMinSizeAttr --removeNoInlineAttr " + \ + self.Curr_Dir + "/" + fileName + ".ll -o " + \ + self.Curr_Dir + "/" + fileName + ".ll" + f"-ml-config-path={CONFIG_DIR} " + command = self.FileSys_Obj.OptPath + " " + self.opt_arch_flag + " -S -Oz " + \ + self.Curr_Dir + "/" + fileName + ".ll -o " + \ + self.Curr_Dir + "/" + fileName + "_Oz.ll" + f" -ml-config-path={CONFIG_DIR} " + os.system(command) + command = self.FileSys_Obj.ClangPath + " " + self.clang_arch_flag + " -c " + \ + self.Curr_Dir + "/" + fileName + "_Oz.ll -o " + \ + self.Curr_Dir + "/" + "Oz_binary.o" + f" -mllvm -ml-config-path={CONFIG_DIR} " + os.system(command) + minBinarySize = os.path.getsize(self.Curr_Dir + "/Oz_binary.o") + + # Get Oz MCA Throughput + self.OzMcaThroughtput = self.getMCACost( + self.Curr_Dir + "/" + fileName + "_Oz") + logging.info("base {}".format(self.OzMcaThroughtput)) + + return baseBinarySize, minBinarySize + + # Get next action (sub-sequence) to be applied on the LLVM IR + def step(self, action_index): + prev_embedding = self.embedding + + Reward = 0 + done = False + # Get embedding for New IR + # here we can use gRPC server to get the new embeddings + # self.embedding = self.applyActionGetEmbeddings(action=action_index) + + # make call to compiler to get the updated embedding + if self.mode == 'inference' and self.use_grpc: + pass + else: + # if self.use_pipe or self.use_grpc: + # result = self.compiler_interface.evaluate() + if self.use_pipe: + self.sendResponse(action_index) + result = self.readObservation() + elif self.use_grpc: + result = self.stable_grpc("Action", action_index) # LLVMgRPC way + else: + Reward, NextStateIR = self.getLocalReward(action_index) + result = self.getEmbedding(NextStateIR) + self.CurrIR = NextStateIR + if result is None: + raise Exception("result is None") + else: + self.embedding = result + + self.cur_action_mask[action_index] = 0 + self.action_count += 1 + self.cur_action_seq.append(action_index) + next_observation = {'action_mask': np.array( + self.cur_action_mask), 'state': self.embedding} + self.cur_obs = next_observation + + # Max number of actions (optimaztions sub-sequences) to be applied + if self.action_count >= 15: + done = True + logging.info(self.cur_action_seq) + if self.mode == 'inference': + # Write pass sequence to actionfile + with open('actionlist.txt', 'a') as actionfile: + act_flag = 0 + actionfile.write('[') + for act_idx in self.cur_action_seq: + if act_flag == 1: + actionfile.write('-'+str(act_idx)) + else: + act_flag = 1 + actionfile.write(str(act_idx)) + actionfile.write('] ') + + if self.mode != 'inference': + if not self.use_pipe: + self.stable_grpc("Exit", None) + try: + outs, errs = self.server_pid.communicate(timeout=5) + except: + self.serverId.kill() + print("Clang failing") + + Reward = self.getReward(self.assembly_file_path) + if self.use_pipe: + self.sendResponse(-1) # self.populate_buffer(-1) + self.compiler_interface.evaluate('exit') + + if self.mode != "inference": + Reward = self.getReward(self.assembly_file_path) + # else: + # self.compiler_interface.reset_pipes() + + self.cur_action_seq = [] + self.action_count = 0 + logging.info("Reward {}".format(Reward)) + logging.info("Action {}".format(action_index)) + logging.info("done {}".format(done)) + + return next_observation, Reward, done, {} + + # Get llvm-mca Block RThroughput for the IR + def getMCACost(self, new_file): + cmd1 = self.FileSys_Obj.LlcPath + " " + self.opt_arch_flag + \ + " " + new_file + ".ll" + " -o " + new_file + ".s" + f" -ml-config-path={CONFIG_DIR}" + os.system(cmd1) + cmd2 = self.FileSys_Obj.MCAPath + " " + \ + self.opt_arch_flag + " " + new_file + ".s" + pro = subprocess.Popen(cmd2, executable='/bin/bash', shell=True, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf8') + Output_cmd2 = pro.stdout + + line = Output_cmd2.readline() + if pro.stderr is not None: + logging.critical('Error : {}'.format(pro.stderr)) + if self.use_pipe: + currMcaThroughtput = 0 + while line: + pair = line.split(':') + if pair[0] == 'Block RThroughput': + currMcaThroughtput = float(pair[1].strip(' ')) + line = Output_cmd2.readline() + + logging.info("LLVM-MCA command: {}".format(cmd2)) + + return currMcaThroughtput + + # Get reward for an action + def getLocalReward(self, action): + self.StateIndex += 1 + fileName = os.path.splitext(os.path.basename(self.BaseIR))[0] + + logging.info("fileName {}".format(fileName)) + logging.info("StateIndex {}".format(self.StateIndex)) + logging.info("BaseIR {}".format(self.CurrIR)) + + # Modified IR path + new_IR = self.Curr_Dir + "/" + fileName + \ + "_" + str(self.StateIndex) + ".ll" + new_file = self.Curr_Dir + "/" + fileName + "_" + str(self.StateIndex) + + # Applying the action and saving the IR file as _ + # Here we can use gRPC server to apply the action + command = self.FileSys_Obj.OptPath + " " + self.opt_arch_flag + \ + " -S -O34 -SubNum=" + str(action) + " " + \ + self.CurrIR + " -o " + new_IR + f" -ml-config-path={CONFIG_DIR}" + os.system(command) + command = self.FileSys_Obj.ClangPath + " " + \ + self.clang_arch_flag + " -c " + new_IR + " -o " + new_file + ".o" + f" -mllvm -ml-config-path={CONFIG_DIR}" + os.system(command) + # Size reward + currBinarySize = os.path.getsize(new_file + ".o") + + logging.info("lastBinarySize {}".format(self.lastBinarySize)) + logging.info("currBinarySize {}".format(currBinarySize)) + + if ((self.baseBinarySize - self.minBinarySize) > 0): + reward_binarySize = (self.lastBinarySize - currBinarySize) / \ + (self.baseBinarySize - self.minBinarySize) + else: + reward_binarySize = (self.lastBinarySize - + currBinarySize) / self.baseBinarySize + + self.lastBinarySize = currBinarySize + + # Throughput reward + currMcaThroughtput = self.getMCACost(new_file) + logging.info("currMcaThroughtput: {}".format(currMcaThroughtput)) + logging.info("OzMcaThroughtput: {}".format(self.OzMcaThroughtput)) + logging.info("lastMcaThroughtput: {}".format(self.lastMcaThroughtput)) + + if self.lastMcaThroughtput is None: + mca_cost = (self.OzMcaThroughtput - + currMcaThroughtput) / self.OzMcaThroughtput + else: + mca_cost = (self.lastMcaThroughtput - + currMcaThroughtput) / self.OzMcaThroughtput + + self.lastMcaThroughtput = currMcaThroughtput + + logging.info("Thr-debug:{}".format(mca_cost)) + logging.info("Size-debug:{}".format(reward_binarySize)) + + # Reward thresholds + if mca_cost > self.mca_reward_thresh: + mca_cost = self.mca_reward_thresh + elif mca_cost < -self.mca_reward_thresh: + mca_cost = -self.mca_reward_thresh + + if reward_binarySize > self.size_reward_thresh: + reward_binarySize = self.size_reward_thresh + elif reward_binarySize < -self.size_reward_thresh: + reward_binarySize = -self.size_reward_thresh + + # Cumulative reward with alpha and beta hyperparameters + reward = self.alpha*reward_binarySize + self.beta*mca_cost + + return reward, new_IR + + def getReward(self, AssemblyFilePath): + # object size reward + objectFilePath = f"{self.temporaryDirectory}/objectfile_{self.worker_index}.o" + objectFileGenerationCommand = self.FileSys_Obj.ClangPath + " -c " + \ + self.clang_arch_flag + " " + AssemblyFilePath + " -o " + objectFilePath + f" -mllvm -ml-config-path={CONFIG_DIR}" + + os.system(objectFileGenerationCommand) + + currentBinarySize = os.path.getsize(objectFilePath) + + if ((self.baseBinarySize - self.minBinarySize) > 0): + reward_binarySize = (self.lastBinarySize - currentBinarySize) / \ + (self.baseBinarySize - self.minBinarySize) + else: + reward_binarySize = (self.lastBinarySize - + currentBinarySize) / self.baseBinarySize + + self.lastBinarySize = currentBinarySize + + llvmMcaCommand = f"{self.FileSys_Obj.MCAPath} {self.opt_arch_flag} {AssemblyFilePath}" #+ " -ml-config-path={CONFIG_DIR}" + pro = subprocess.Popen(llvmMcaCommand, executable='/bin/bash', shell=True, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf8') + + Output_cmd2 = pro.stdout + + line = Output_cmd2.readline() + if pro.stderr is not None: + logging.critical('Error : {}'.format(pro.stderr)) + + while line: + pair = line.split(':') + if pair[0] == 'Block RThroughput': + currMcaThroughtput = float(pair[1].strip(' ')) + line = Output_cmd2.readline() + + logging.info("currMcaThroughtput: {}".format(currMcaThroughtput)) + logging.info("OzMcaThroughtput: {}".format(self.OzMcaThroughtput)) + logging.info("lastMcaThroughtput: {}".format(self.lastMcaThroughtput)) + + if self.lastMcaThroughtput is None: + mca_cost = (self.OzMcaThroughtput - + currMcaThroughtput) / self.OzMcaThroughtput + else: + mca_cost = (self.lastMcaThroughtput - + currMcaThroughtput) / self.OzMcaThroughtput + + self.lastMcaThroughtput = currMcaThroughtput + + logging.info("Thr-debug:{}".format(mca_cost)) + logging.info("Size-debug:{}".format(reward_binarySize)) + + # Reward thresholds + if mca_cost > self.mca_reward_thresh: + mca_cost = self.mca_reward_thresh + elif mca_cost < -self.mca_reward_thresh: + mca_cost = -self.mca_reward_thresh + + if reward_binarySize > self.size_reward_thresh: + reward_binarySize = self.size_reward_thresh + elif reward_binarySize < -self.size_reward_thresh: + reward_binarySize = -self.size_reward_thresh + + # Cumulative reward with alpha and beta hyperparameters + reward = self.alpha*reward_binarySize + self.beta*mca_cost + + return reward + + def set_config(path): + global config_path + config_path = path + return config_path + + def startServer(self, filename, ip): + optPath = f"{BUILD_DIR}/bin/opt" + clangPath = f"{BUILD_DIR}/bin/clang" + filepath = self.train_Dir + "/" + filename + newfilepath = self.assembly_file_path + data_format = self.data_format + + cmd = f"{clangPath} -S -mllvm --OPosetRL -mllvm -ml-config-path={CONFIG_DIR} -mllvm --training -mllvm -data-format={data_format} -mllvm --server_address={ip} {filepath} -o {newfilepath}" + if self.use_pipe: + cmd = cmd + " -mllvm -use-pipe" + pid = subprocess.Popen(cmd, executable='/bin/bash', + shell=True, preexec_fn=os.setsid) + return pid + + def repeatedgRPCFieldToNumpyArray(self, gRPCObj): + jsonObj = MessageToJson(gRPCObj) + dictObj = json.loads(jsonObj) + array = dictObj['embedding'] + return np.array(array) + + def applyActionGetEmbeddings(self, action): + request = posetRL_pb2.ActionRequest(action=action) + + self.compiler_interface.populate_buffer(request) + response = self.compiler_interface.evaluate() + # response = self.stub.applyActionGetEmbeddings(request) + return self.repeatedgRPCFieldToNumpyArray(response) + + def stopServer(self): + request = posetRL_pb2.ActionRequest(action=-1) + self.compiler_interface.populate_buffer(request) + self.compiler_interface.evaluate() + # self.stub.applyActionGetEmbeddings(request) + + def stable_grpc(self, op, action): + attempt = 0 + max_retries = 5 + retry_wait_seconds = 0.1 + retry_wait_backoff_exponent = 1.5 + + result = None + while True: + try: + t1 = time.time() + if op != "Exit": + result = self.applyActionGetEmbeddings(action=action) + else: + result = self.stopServer() + t2 = time.time() + self.grpc_rtt += t2-t1 + break + except grpc.RpcError as e: + + if e.code() == grpc.StatusCode.UNAVAILABLE: + # print("Error in grpc") + # if op == 'Exit' and self.last_task_done == 0: + # raise + attempt += 1 + if attempt > max_retries: + print("Maximum attempts completed") + return None + # raise #ServiceTransportError( f"{self.url} {e.details()} ({max_retries} retries)") from None + remaining = max_retries - attempt + time.sleep(retry_wait_seconds) + retry_wait_seconds *= retry_wait_backoff_exponent + else: + if self.mode != 'inference': + print("Unknown error", e.code()) + return None + else: + raise + return result diff --git a/model/POSET-RL/Filesystem.py b/model/POSET-RL/Filesystem.py new file mode 100755 index 000000000000..f29c29da694a --- /dev/null +++ b/model/POSET-RL/Filesystem.py @@ -0,0 +1,38 @@ +# Defines filesystem utilities + +import os +import shutil + +class fsystem: + # Get paths relative to LLVM and IR2Vec directories + def __init__(self, LLVMPath="", IR2Vec=""): + self.LLVMPath = os.path.abspath(LLVMPath) + self.OptPath = os.path.join(self.LLVMPath, "bin", "opt") + self.MCAPath = os.path.join(self.LLVMPath, "bin", "llvm-mca") + self.AddOptAttr = os.path.join(self.LLVMPath, "add-size-attr.so") + self.ClangPath = os.path.join(self.LLVMPath, "bin", "clang") + self.LlcPath = os.path.join(self.LLVMPath, "bin" , "llc") + self.IR2Vec = os.path.abspath(IR2Vec) + self.SeedEmbeddingPath = os.path.join(self.IR2Vec, "seedEmbeddingVocab-300-llvm10.txt") + self.IR2VecBin = os.path.join(self.IR2Vec, "ir2vec") + self.TrainingDataPath = None + self.PhaseOrderDir = os.getcwd() + + self.LLFileList = [] + + def generateTrainingData(self, path): + self.TrainingDataPath = os.path.join(self.PhaseOrderDir,path) + for file in os.listdir(self.TrainingDataPath): + self.LLFileList.append(file) + + def createFolder(self, path, new=False): + path = os.path.join(self.PhaseOrderDir,path) + if(not os.path.exists(path)): + os.mkdir(path) + elif(os.path.exists(path)): + if(new): + shutil.rmtree(path) + os.mkdir(path) + + def copyFile(self, src, dest): + shutil.copy(src, dest) diff --git a/model/POSET-RL/experiment.py b/model/POSET-RL/experiment.py new file mode 100755 index 000000000000..2aacf21a6ced --- /dev/null +++ b/model/POSET-RL/experiment.py @@ -0,0 +1,171 @@ +# Train RLLib model +# For more details on RLLib: https://docs.ray.io/en/latest/rllib/index.html +# Usage: python experiment.py --llvm_dir \ +# --ir2vec_dir \ +# --train_dir \ +# --train-iterations +# [--isAArch] +# --log-dir +# --alpha +# --beta +# --size_reward_thresh +# --mca_reward_thresh +# Example: python experiment.py --llvm_dir POSET_RL/llvm-project-10/build \ +# --ir2vec_dir POSET_IR2Vec \ +# --train_dir train_ll \ +# --train-iterations 100 \ +# --isAArch \ +# --log-dir 10-5-0.2-0.2-aarch \ +# --alpha 10 \ +# --beta 5 \ +# --size_reward_thresh 0.2 \ +# --mca_reward_thresh 0.2 + +import argparse +import os + +import ray +from ray import tune +from ray.rllib.agents.dqn import DQNTrainer, DEFAULT_CONFIG +#from Environment_1 import PhaseOrder +from Environment_pipe import PhaseOrder +from ray.rllib.models import ModelCatalog +from model import CustomPhaseOrderModel +from po_config import BUILD_DIR, MODEL_DIR + +from Filesystem import * + +import logging +#import utils + +parser = argparse.ArgumentParser() +parser.add_argument("-llvm", "--llvm_dir", required=True, help = "path to llvm-build directory") +parser.add_argument("-ir2vec", "--ir2vec_dir", required=True, help = "path to IR2vec directory which has seed embedding and IR2Vec binary files") +parser.add_argument("-train", "--train_dir", required=True, help = "path to directory with LLVM IR files for training") +parser.add_argument("-iter", "--train-iterations", required=False, type=int, default=300) +parser.add_argument("-a", "--isAArch", required=False, default=False, action='store_true') +parser.add_argument("-log", "--log_dir", required=False, type=str, default="0.2thresh-10alpha-5beta-x86") +parser.add_argument("-alpha", "--alpha", required=False, type=float, default=10) +parser.add_argument("-beta", "--beta", required=False, type=float, default=5) +parser.add_argument("-size_reward_thresh", "--size_reward_thresh", required=False, type=float, default=0.2) +parser.add_argument("-mca_reward_thresh", "--mca_reward_thresh", required=False, type=float, default=0.2) +parser.add_argument("--use_pipe", action='store_true', help = "Use pipe communication", required=False, default=False) +parser.add_argument("--use_grpc", action='store_true', help = "Use grpc communication", required=False, default=False) +parser.add_argument("--pipe_name",type=str,help="String Pipe name", default='posetrl_pipe') +parser.add_argument( + "--data_format", + type=str, + choices=["json", "protobuf", "bytes"], + help="Data format to use for communication", +) +parser.add_argument("--server_port", type=str, help="Server port", default=50051) + +# Use for resuming training from checkpoint +checkpoint = None + +def experiment(config): + iterations = config.pop("train-iterations") + global checkpoint + train_results = {} + print(config) + train_agent = DQNTrainer(config=config, env=PhaseOrder) + if checkpoint is not None: + train_agent.restore(checkpoint) + + for i in range(iterations): + train_results = train_agent.train() + # train_agent.export_policy_model("/home/cs20btech11018/repos/ML-Phase-Ordering/RLLib-PhaseOrder/poset-RL-onnx-model", onnx=int(os.getenv("ONNX_OPSET", "11"))) + # break + + + checkpoint = train_agent.save(tune.get_trial_dir()) + train_agent.stop() + +if __name__ == '__main__': + args = parser.parse_args() + logger = logging.getLogger("__file__") + log_level = logging.DEBUG + + if os.path.exists("running.log"): + os.remove("running.log") + logging.basicConfig(filename='running.log', format='%(levelname)s - %(filename)s - %(message)s', level=log_level) + logging.info('Starting training') + logging.info(args) + + ray.init() + default_config = DEFAULT_CONFIG.copy() + + cfg = { + "hiddens": [], + "dueling": False, + } + + ModelCatalog.register_custom_model("My_torch_model", CustomPhaseOrderModel) + target_arch = "AArch64" if args.isAArch else "X86" + # Define model and environment config with below hyperparameters + config = dict( + { + "model": { + "custom_model": "My_torch_model", + "custom_model_config": { + "state_size": 300, + "fc1_units": 64, + "fc2_units": 64 + }, + }, + "env": PhaseOrder, + "lr": 0.0001, + "env_config": { + "target": target_arch, + "state_size": 300, + "mode": "train", + "dump_type": "One", + "intermediate_data": "./temp", + "llvm_dir": args.llvm_dir, + "ir2vec_dir": args.ir2vec_dir, + "train_dir": args.train_dir, + "alpha": args.alpha, + "beta": args.beta, + "size_reward_thresh": args.size_reward_thresh, + "mca_reward_thresh": args.mca_reward_thresh, + "action_space_size": 34, + "use_pipe": args.use_pipe, + "data_format": args.data_format, + "use_grpc": args.use_grpc, + "server_port": args.server_port, + "pipe_name": args.pipe_name + }, + "train_batch_size": 512, + "exploration_config": { + "type": "EpsilonGreedy", + "initial_epsilon": 1.0, + "final_epsilon": 0.01, + "epsilon_timesteps": 20000, + }, + # "horizon":10, + "framework": "torch", + "train-iterations": args.train_iterations, + "batch_mode": "truncate_episodes", + "seed": 1, + "num_gpus": 0, + "num_workers": 1, + "num_gpus_per_worker": 0 + }, + **cfg) + # config = dict(config,**default_config) + config["timesteps_per_iteration"] = 90 + + if args.use_grpc: + experiment_name = "grpc_results" + elif args.use_pipe: + experiment_name = f"pipe_{args.data_format}_results" + else: + experiment_name = "orignal_run_results" + + #Start model training with given config + tune.run( + experiment, + config=config, + resources_per_trial=DQNTrainer.default_resource_request(config), + local_dir=(MODEL_DIR + "/checkpoint_dir"), + name=experiment_name) # name=args.log_dir diff --git a/model/POSET-RL/inference.py b/model/POSET-RL/inference.py new file mode 100755 index 000000000000..525dfb9c3a08 --- /dev/null +++ b/model/POSET-RL/inference.py @@ -0,0 +1,283 @@ +# Script to perform inference on test LLVM IR files +# Use run-inference.sh to call this script +# Usage: python inference.py --ir2vec_dir \ +# --test_dir \ +# --model \ +# [--isAArch] +# --alpha +# --beta +# --size_reward_thresh +# --mca_reward_thresh +# Example: python inference.py --ir2vec_dir POSET-RL/IR2Vec \ +# --test_dir test_ll \ +# --model POSET_RL/saved_models/model \ +# [--isAArch] +# --alpha 10 +# --beta 5 +# --size_reward_thresh 0.2 +# --mca_reward_thresh 0.2 + +import argparse +import numpy as np +import argparse +import os + +# import utils +import logging +import time + +import ray +from ray import tune +from ray.rllib.agents import ppo +from ray.rllib.agents import dqn +from ray.rllib.agents.dqn import DQNTrainer, DEFAULT_CONFIG +from Environment_pipe import PhaseOrder +from ray.rllib.models import ModelCatalog +from model import CustomPhaseOrderModel +from ray.tune.registry import register_env +from datetime import datetime +from po_config import BUILD_DIR + +import sys +sys.path.append(f"{BUILD_DIR}/MLCompilerBridge/MLModelRunner/gRPCModelRunner/Python-Utilities") +import posetRL_pb2_grpc, posetRL_pb2 + +sys.path.append(f"{BUILD_DIR}/MLCompilerBridge/CompilerInterface/") +from GrpcCompilerInterface import GrpcCompilerInterface + +from Filesystem import * + +logger = logging.getLogger(__file__) +logging.basicConfig( + filename="inference.log", + format="%(levelname)s - %(filename)s - %(message)s", + level=logging.DEBUG, +) + +import networkx +from networkx.readwrite import json_graph +import json +import torch +import pydot + +import grpc +from concurrent import futures +import traceback + +parser = argparse.ArgumentParser() +parser.add_argument( + "--ir2vec_dir", + required=False, + help="path to IR2vec directory which has seed embedding and IR2Vec binary files", +) +parser.add_argument( + "--test_dir", help="Path to test directory", required=False, default="./" +) +parser.add_argument("--model", help="Path to saved checkpoint") +parser.add_argument( + "-a", "--isAArch", required=False, default=False, action="store_true" +) +parser.add_argument("-alpha", "--alpha", required=False, type=float, default=10) +parser.add_argument("-beta", "--beta", required=False, type=float, default=5) +parser.add_argument( + "-size_reward_thresh", + "--size_reward_thresh", + required=False, + type=float, + default=0.2, +) +parser.add_argument( + "-mca_reward_thresh", "--mca_reward_thresh", required=False, type=float, default=0.2 +) +parser.add_argument( + "--use_pipe", + action="store_true", + help="Use pipe communication", + required=False, + default=False, +) +parser.add_argument("--server_port", type=str, help="Server port", default=50051) +parser.add_argument( + "--data_format", + type=str, + choices=["json", "protobuf", "bytes"], + help="Data format to use for communication", +) +parser.add_argument("--pipe_name",type=str,help="String Pipe name") +parser.add_argument("--use_grpc", action='store_true', help = "Use grpc communication", required=False, default=False) +class PhaseOrderInference: + def __init__(self, model_path, use_pipe=False, use_grpc=False, data_format="json"): + print("use_pipe {}".format(use_pipe)) + logdir = "/tmp" + logger = logging.getLogger(__file__) + logging.basicConfig( + filename="running.log", + format="%(levelname)s - %(filename)s - %(message)s", + level=logging.DEBUG, + ) + + config = DEFAULT_CONFIG.copy() + + cfg = { + "hiddens": [], + "dueling": False, + } + + ModelCatalog.register_custom_model("My_torch_model", CustomPhaseOrderModel) + target_arch = "AArch64" if args.isAArch else "X86" + # Define model and environment config + config = dict( + { + "model": { + "custom_model": "My_torch_model", + "custom_model_config": { + "state_size": 300, + "fc1_units": 64, + "fc2_units": 64, + }, + }, + "env_config": { + "target": target_arch, + "state_size": 300, + "mode": "inference", + "dump_type": "One", + "intermediate_data": "./temp", + "llvm_dir": BUILD_DIR, + "ir2vec_dir": args.ir2vec_dir, + "test_dir": args.test_dir, + "alpha": args.alpha, + "beta": args.beta, + "size_reward_thresh": args.size_reward_thresh, + "mca_reward_thresh": args.mca_reward_thresh, + "action_space_size": 34, + "use_pipe": use_pipe, + "data_format": data_format, + "use_grpc": use_grpc, + "server_port": args.server_port, + "pipe_name": args.pipe_name + }, + "framework": "torch", + "explore": False, + "num_workers": 0, + "train_batch_size": 1, + }, + **cfg + ) + + def env_creator(env_config): + return PhaseOrder(env_config) + + # Create environment + register_env("Environment", env_creator) + + self.train_agent = DQNTrainer(env="Environment", config=config) + + checkpoint = model_path + # Load saved model + self.train_agent.restore(checkpoint) + + self.config = config + + # torch.onnx.export(self.train_agent.get_policy().model, ({"obs": torch.randn(1, 334)}, {}), f="/Pramana/ML_LLVM_Tools/ml-llvm-project/onnx_checkpoints_posetrl/posetrl_model.onnx", verbose=True, input_names=["obs"], output_names=["output"]) + + def dot_to_json(self, dot_): + py_dot_graph = pydot.graph_from_dot_data(dot_)[0] + graph_netx = networkx.drawing.nx_pydot.from_pydot(py_dot_graph) + graph_json = json_graph.adjacency_data(graph_netx) + return graph_json + + # Predict best optimization sequence for the given LLVM IR + def run_predict(self, test_file=None): + env = PhaseOrder(self.config["env_config"]) + + print("test_file {}".format(test_file)) + state = env.reset(test_file) + score = 0 + while True: + logging.debug("-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-") + + action = self.train_agent.compute_action(state) + print("action {}".format(action)) + + next_state, reward, done, response = env.step(action) + + logging.debug("reward : {}".format(reward)) + + state = next_state + if done: + with open("actionlist.txt", "a") as actionfile: + actionfile.write(str(test_file) + "\n") + assert response is not None, "Allocation is not preset." + break + + return reward, response + +class service_server(posetRL_pb2_grpc.PosetRLService): + def __init__(self, inference_obj): + self.inference_obj = inference_obj + self.new_file = True + self.state = None + self.env = None + self.action = None + + def getAdvice(self, request, context): + try: + done = False + if self.new_file: + self.env = PhaseOrder(self.inference_obj.config["env_config"]) + self.state = self.env.reset(embedding=request.embedding) + self.new_file = False + print("Episode Started") + else: + self.env.embedding = np.array(request.embedding) + self.state, reward, done, response = self.env.step(self.action) + if not done: + self.action = self.inference_obj.train_agent.compute_action(self.state) + reply=posetRL_pb2.ActionRequest(action=self.action.item()) + else: + reply=posetRL_pb2.ActionRequest(action=-1) + self.new_file = True + print("Episode Finished") + return reply + except: + print('Error') + traceback.print_exc() + reply=posetRL_pb2.ActionRequest(action=-1) + return reply + + + + +if __name__ == "__main__": + args = parser.parse_args() + logging.info("Start the inference....") + + ray.init() + + inference_obj = PhaseOrderInference( + args.model, args.use_pipe, args.use_grpc, args.data_format + ) + if args.use_pipe: + print("about to enter while loop...") + while True: + reward, response = inference_obj.run_predict() + elif args.use_grpc: + # ray.init() + compiler_interface = GrpcCompilerInterface(mode = 'server', add_server_method=posetRL_pb2_grpc.add_PosetRLServiceServicer_to_server, grpc_service_obj=service_server(inference_obj), hostport= args.server_port) + compiler_interface.start_server() + + else: + now = datetime.now() + date_time = now.strftime("%m-%d-%Y-%H-%M-%S") + file_name = "timetaken-spec06-posetrl-orignal-" + date_time + ".txt" + repeat_count = 3 + for file in os.listdir(args.test_dir): + f = open(file_name, "a") + count = 0 + while count < repeat_count: + start = time.time() + reward, response = inference_obj.run_predict(file) + end = time.time() + f.write("Time taken for {} is {}\n".format(file, end - start)) + count+=1 + f.close() diff --git a/model/POSET-RL/model.py b/model/POSET-RL/model.py new file mode 100755 index 000000000000..20416a2b9618 --- /dev/null +++ b/model/POSET-RL/model.py @@ -0,0 +1,41 @@ +# RL model definition + +import torch +import torch.nn as nn + +import ray +from ray.rllib.models import ModelCatalog +from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 +from ray.rllib.utils.torch_ops import FLOAT_MIN + +import os +import logging +import pandas as pd + +logger = logging.getLogger(__file__) +logging.basicConfig(filename='running.log', format='%(levelname)s - %(filename)s - %(message)s', level=logging.DEBUG) + +class CustomPhaseOrderModel(TorchModelV2, nn.Module): + def __init__(self, obs_space, action_space, num_outputs, model_config, name, **customized_model_kwargs): + TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name, **customized_model_kwargs) + nn.Module.__init__(self) + custom_config = model_config["custom_model_config"] + self.model = nn.Sequential( + nn.Linear(custom_config["state_size"], custom_config["fc1_units"]), + nn.ReLU(), + nn.Linear(custom_config["fc1_units"], custom_config["fc1_units"]), + nn.ReLU(), + nn.Linear(custom_config["fc2_units"], num_outputs) + ) + + def forward(self, input_dict, state, seq_lens): + + model_out = self.model( + input_dict["obs"]["state"] + ) + + mask = input_dict["obs"]["action_mask"] + inf_mask = torch.clamp(torch.log(mask), min=FLOAT_MIN) + model_out = model_out + inf_mask + + return model_out, state From 9b80e93528fbf899b40707ab82e13a5255d7445b Mon Sep 17 00:00:00 2001 From: svkeerthy Date: Sun, 14 Jan 2024 18:49:18 +0530 Subject: [PATCH 02/52] Updating MLCompilerBridge submodule (cherry picked from commit 257ff3f16171c6cb4caad2b6eba43857ad44f30b) --- MLCompilerBridge | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MLCompilerBridge b/MLCompilerBridge index 3d59126519db..b3f7359866e9 160000 --- a/MLCompilerBridge +++ b/MLCompilerBridge @@ -1 +1 @@ -Subproject commit 3d59126519dbd8c56b60ad95ec2220c0b8be3957 +Subproject commit b3f7359866e9a551327a58e2358cbc14b74d3d47 From f376eaf8ecb9593e40b59873eb8e25ba0d02ab9c Mon Sep 17 00:00:00 2001 From: svkeerthy Date: Sun, 14 Jan 2024 18:49:18 +0530 Subject: [PATCH 03/52] Updating MLCompilerBridge submodule (cherry picked from commit 257ff3f16171c6cb4caad2b6eba43857ad44f30b) From 00cbf721104a4593c20e4a196a4bfccf1e529412 Mon Sep 17 00:00:00 2001 From: svkeerthy Date: Sun, 14 Jan 2024 19:27:06 +0530 Subject: [PATCH 04/52] MLIR HelloMLBridgePass bugfix (cherry picked from commit 610c4e3dad27a375c9bc58d4685fcc6fcd0ee812) --- mlir/lib/Transforms/HelloMLBridgePass.cpp | 491 ++++++++++++++++++++++ 1 file changed, 491 insertions(+) create mode 100644 mlir/lib/Transforms/HelloMLBridgePass.cpp diff --git a/mlir/lib/Transforms/HelloMLBridgePass.cpp b/mlir/lib/Transforms/HelloMLBridgePass.cpp new file mode 100644 index 000000000000..27da7916267e --- /dev/null +++ b/mlir/lib/Transforms/HelloMLBridgePass.cpp @@ -0,0 +1,491 @@ +#include "MLModelRunner/MLModelRunner.h" +#include "MLModelRunner/ONNXModelRunner/ONNXModelRunner.h" +#include "MLModelRunner/ONNXModelRunner/environment.h" +#include "MLModelRunner/ONNXModelRunner/utils.h" +#include "MLModelRunner/PipeModelRunner.h" +#include "MLModelRunner/TFModelRunner.h" +#include "MLModelRunner/Utils/MLConfig.h" +#include "MLModelRunner/gRPCModelRunner.h" +#include "grpc/helloMLBridge/helloMLBridge.grpc.pb.h" +#include "grpc/helloMLBridge/helloMLBridge.pb.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassRegistry.h" +#include "mlir/Transforms/Passes.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include +#include + +#include "tf_models/LinearModel1000.h" +#include "tf_models/LinearModel10000.h" +#include "tf_models/LinearModel10500.h" +#include "tf_models/LinearModel11000.h" +#include "tf_models/LinearModel11500.h" +#include "tf_models/LinearModel12000.h" +#include "tf_models/LinearModel12500.h" +#include "tf_models/LinearModel13000.h" +#include "tf_models/LinearModel13500.h" +#include "tf_models/LinearModel14000.h" +#include "tf_models/LinearModel14500.h" +#include "tf_models/LinearModel1500.h" +#include "tf_models/LinearModel15000.h" +#include "tf_models/LinearModel15500.h" +#include "tf_models/LinearModel16000.h" +#include "tf_models/LinearModel16500.h" +#include "tf_models/LinearModel17000.h" +#include "tf_models/LinearModel17500.h" +#include "tf_models/LinearModel18000.h" +#include "tf_models/LinearModel18500.h" +#include "tf_models/LinearModel19000.h" +#include "tf_models/LinearModel19500.h" +#include "tf_models/LinearModel2000.h" +#include "tf_models/LinearModel20000.h" +#include "tf_models/LinearModel20500.h" +#include "tf_models/LinearModel21000.h" +#include "tf_models/LinearModel21500.h" +#include "tf_models/LinearModel22000.h" +#include "tf_models/LinearModel22500.h" +#include "tf_models/LinearModel23000.h" +#include "tf_models/LinearModel23500.h" +#include "tf_models/LinearModel24000.h" +#include "tf_models/LinearModel24500.h" +#include "tf_models/LinearModel2500.h" +#include "tf_models/LinearModel25000.h" +#include "tf_models/LinearModel25500.h" +#include "tf_models/LinearModel26000.h" +#include "tf_models/LinearModel26500.h" +#include "tf_models/LinearModel27000.h" +#include "tf_models/LinearModel27500.h" +#include "tf_models/LinearModel28000.h" +#include "tf_models/LinearModel28500.h" +#include "tf_models/LinearModel29000.h" +#include "tf_models/LinearModel29500.h" +#include "tf_models/LinearModel3000.h" +#include "tf_models/LinearModel30000.h" +#include "tf_models/LinearModel30500.h" +#include "tf_models/LinearModel31000.h" +#include "tf_models/LinearModel31500.h" +#include "tf_models/LinearModel32000.h" +#include "tf_models/LinearModel32500.h" +#include "tf_models/LinearModel33000.h" +#include "tf_models/LinearModel33500.h" +#include "tf_models/LinearModel34000.h" +#include "tf_models/LinearModel34500.h" +#include "tf_models/LinearModel3500.h" +#include "tf_models/LinearModel35000.h" +#include "tf_models/LinearModel35500.h" +#include "tf_models/LinearModel36000.h" +#include "tf_models/LinearModel36500.h" +#include "tf_models/LinearModel37000.h" +#include "tf_models/LinearModel37500.h" +#include "tf_models/LinearModel38000.h" +#include "tf_models/LinearModel38500.h" +#include "tf_models/LinearModel39000.h" +#include "tf_models/LinearModel39500.h" +#include "tf_models/LinearModel4000.h" +#include "tf_models/LinearModel40000.h" +#include "tf_models/LinearModel40500.h" +#include "tf_models/LinearModel41000.h" +#include "tf_models/LinearModel41500.h" +#include "tf_models/LinearModel42000.h" +#include "tf_models/LinearModel42500.h" +#include "tf_models/LinearModel43000.h" +#include "tf_models/LinearModel43500.h" +#include "tf_models/LinearModel44000.h" +#include "tf_models/LinearModel44500.h" +#include "tf_models/LinearModel4500.h" +#include "tf_models/LinearModel45000.h" +#include "tf_models/LinearModel45500.h" +#include "tf_models/LinearModel46000.h" +#include "tf_models/LinearModel46500.h" +#include "tf_models/LinearModel47000.h" +#include "tf_models/LinearModel47500.h" +#include "tf_models/LinearModel48000.h" +#include "tf_models/LinearModel48500.h" +#include "tf_models/LinearModel49000.h" +#include "tf_models/LinearModel49500.h" +#include "tf_models/LinearModel500.h" +#include "tf_models/LinearModel5000.h" +#include "tf_models/LinearModel50000.h" +#include "tf_models/LinearModel5500.h" +#include "tf_models/LinearModel6000.h" +#include "tf_models/LinearModel6500.h" +#include "tf_models/LinearModel7000.h" +#include "tf_models/LinearModel7500.h" +#include "tf_models/LinearModel8000.h" +#include "tf_models/LinearModel8500.h" +#include "tf_models/LinearModel9000.h" +#include "tf_models/LinearModel9500.h" + +#define MODELS(M) \ + M(500) \ + M(1000) \ + M(1500) \ + M(2000) \ + M(2500) \ + M(3000) \ + M(3500) \ + M(4000) \ + M(4500) \ + M(5000) \ + M(5500) \ + M(6000) \ + M(6500) \ + M(7000) \ + M(7500) \ + M(8000) \ + M(8500) \ + M(9000) \ + M(9500) \ + M(10000) \ + M(10500) \ + M(11000) \ + M(11500) \ + M(12000) \ + M(12500) \ + M(13000) \ + M(13500) \ + M(14000) \ + M(14500) \ + M(15000) \ + M(15500) \ + M(16000) \ + M(16500) \ + M(17000) \ + M(17500) \ + M(18000) \ + M(18500) \ + M(19000) \ + M(19500) \ + M(20000) \ + M(20500) \ + M(21000) \ + M(21500) \ + M(22000) \ + M(22500) \ + M(23000) \ + M(23500) \ + M(24000) \ + M(24500) \ + M(25000) \ + M(25500) \ + M(26000) \ + M(26500) \ + M(27000) \ + M(27500) \ + M(28000) \ + M(28500) \ + M(29000) \ + M(29500) \ + M(30000) \ + M(30500) \ + M(31000) \ + M(31500) \ + M(32000) \ + M(32500) \ + M(33000) \ + M(33500) \ + M(34000) \ + M(34500) \ + M(35000) \ + M(35500) \ + M(36000) \ + M(36500) \ + M(37000) \ + M(37500) \ + M(38000) \ + M(38500) \ + M(39000) \ + M(39500) \ + M(40000) \ + M(40500) \ + M(41000) \ + M(41500) \ + M(42000) \ + M(42500) \ + M(43000) \ + M(43500) \ + M(44000) \ + M(44500) \ + M(45000) \ + M(45500) \ + M(46000) \ + M(46500) \ + M(47000) \ + M(47500) \ + M(48000) \ + M(48500) \ + M(49000) \ + M(49500) \ + M(50000) + +static llvm::cl::opt + training("mlir-hello-training", llvm::cl::Hidden, + llvm::cl::desc("whether it is training or inference"), + llvm::cl::init(false)); +static llvm::cl::opt server_address( + "mlir-hello-server-address", llvm::cl::Hidden, + llvm::cl::desc( + "Starts the server in the given address, format :"), + llvm::cl::init("localhost:5050")); + +static llvm::cl::opt data_format( + "mlir-hello-data-format", llvm::cl::Hidden, llvm::cl::init("json"), + llvm::cl::desc("Data format to use for communication with python model")); + +static llvm::cl::opt + useONNX("mlir-hello-use-onnx", llvm::cl::Hidden, + llvm::cl::desc("Use ONNX for inferencing model"), + llvm::cl::init(false)); + +static llvm::cl::opt + usePipe("mlir-hello-use-pipe", llvm::cl::Hidden, + llvm::cl::desc("Use pipe based interation with python model"), + llvm::cl::init(false)); + +static llvm::cl::opt + pipe_name("mlir-hello-pipe-name", llvm::cl::Hidden, llvm::cl::init("dummy"), + llvm::cl::desc("Name for pipe file")); +static llvm::cl::opt n("mlir-hello-data-size", llvm::cl::Hidden, + llvm::cl::init(1000), + llvm::cl::desc("Size of input vector")); + +static llvm::cl::opt + useTF("mlir-hello-use-tf", llvm::cl::Hidden, + llvm::cl::desc("Use TF AOT for inferencing model"), + llvm::cl::init(false)); + +using namespace mlir; +using namespace grpc; +using namespace MLBridge; +using namespace helloMLBridgegRPC; + +namespace { + +std::random_device rd; +std::mt19937 gen(5); +std::uniform_real_distribution dis(0.0, 1.0); + +class HelloMLBridgeEnv : public Environment { + Observation CurrObs; + +public: + HelloMLBridgeEnv() { setNextAgent("agent"); }; + Observation &reset() override; + Observation &step(Action) override; + +protected: + std::vector FeatureVector; +}; + +Observation &HelloMLBridgeEnv::step(Action Action) { + CurrObs.clear(); + std::copy(FeatureVector.begin(), FeatureVector.end(), + std::back_inserter(CurrObs)); + setDone(); + return CurrObs; +} + +Observation &HelloMLBridgeEnv::reset() { + std::copy(FeatureVector.begin(), FeatureVector.end(), + std::back_inserter(CurrObs)); + return CurrObs; +} + +struct MLIRHelloMLBridge : public OperationPass, + public HelloMLBridgeEnv { +public: + MLIRHelloMLBridge() {} + + struct HelloMLIRTraining + : public helloMLBridgegRPC::HelloMLBridgeService::Service { + private: + std::vector FeatureVector; + + public: + grpc::Status + getTensor(grpc::ServerContext *context, + const ::helloMLBridgegRPC::ActionRequest *request, + ::helloMLBridgegRPC::TensorResponse *response) override { + if (request->action() == -1) { + return grpc::Status::OK; + } + if (request->action()) { + populateFeatureVector(FeatureVector); + for (int I = 0, E = FeatureVector.size(); I < E; I++) { + response->add_tensor(FeatureVector[I]); + } + } + return grpc::Status::OK; + } + }; + + void setTFModelRunner(int n) { + switch (n) { +#define M(x) \ + case x: \ + MLRunner = new TFModelRunner("output"); \ + break; + MODELS(M) +#undef M + } + // MLRunner = new TFModelRunner("output"); + } + + void TFinitCommunication() { + auto StartTime = std::chrono::high_resolution_clock::now(); + + std::pair> p1("x", FeatureVector); + + setTFModelRunner(n); + MLRunner->populateFeatures(p1); + double Out = MLRunner->evaluate(); + + auto EndTime = std::chrono::high_resolution_clock::now(); + + auto Duration = std::chrono::duration_cast( + EndTime - StartTime); + std::ofstream outputFile; + outputFile.open("tf-inference.csv", std::ios_base::app); + outputFile << n << "," << Duration.count() << "\n"; + outputFile.close(); + } + + void runOnOperation() override { + assert(MLConfig::mlconfig != "" && "ml-config-path required"); + // Get the current operation being operated on. + Operation *op = getOperation(); + // llvm::errs() << "Hello World pass\n"; + // bool use_pipe = false; + // bool useONNX = false; + if (useTF) { + populateFeatureVector(FeatureVector); + TFinitCommunication(); + return; + } + if (usePipe) { + populateFeatureVector(FeatureVector); + initCommunication(); + } else { + if (training) { + HelloMLIRTraining *gRPCTrainer = new HelloMLIRTraining(); + MLRunner = new gRPCModelRunner< + helloMLBridgegRPC::HelloMLBridgeService::Service, + helloMLBridgegRPC::HelloMLBridgeService::Stub, + helloMLBridgegRPC::TensorResponse, + helloMLBridgegRPC::ActionRequest>(server_address, gRPCTrainer); + } else if (useONNX) { + std::ofstream outputFile; + outputFile.open("onnx-inference.csv", std::ios::app); + Agent *agent = new Agent(MLConfig::mlconfig + + "/hellopass/onnx_test_dir/dummy-torch-model-" + + std::to_string(n) + ".onnx"); + std::map agents; + agents["agent"] = agent; + auto StartTime = std::chrono::high_resolution_clock::now(); + Env = new HelloMLBridgeEnv(); + MLRunner = new ONNXModelRunner(this, agents, nullptr); + populateFeatureVector(FeatureVector); + int Out = MLRunner->evaluate(); + auto EndTime = std::chrono::high_resolution_clock::now(); + auto Duration = std::chrono::duration_cast( + EndTime - StartTime); + outputFile << n << "," << Duration.count() << "\n"; + outputFile.close(); + } else { + // llvm::errs() << "Using 2nd gRPC flow...\n"; + std::ofstream outputFile; + outputFile.open("grpc-inference.csv", std::ios::app); + auto StartTime = std::chrono::high_resolution_clock::now(); + + helloMLBridgegRPC::TensorResponse request; + helloMLBridgegRPC::ActionRequest response; + MLRunner = + new gRPCModelRunner( + server_address, &request, &response); + + MLRunner->setRequest(&request); + MLRunner->setResponse(&response); + populateFeatureVector(FeatureVector); + std::pair> p1("tensor", FeatureVector); + MLRunner->populateFeatures(p1); + int Out = MLRunner->evaluate(); + + auto EndTime = std::chrono::high_resolution_clock::now(); + + auto Duration = std::chrono::duration_cast( + EndTime - StartTime); + // outs() << n << " " << Duration.count() << "\n"; + outputFile << n << "," << Duration.count() << "\n"; + outputFile.close(); + } + } + } + +private: + BaseSerDes::Kind SerDesType; + HelloMLBridgeEnv *Env; + std::string basename = "/tmp/" + pipe_name; + MLModelRunner *MLRunner; + static void populateFeatureVector(std::vector &FeatureVector); + void initCommunication(); + void setModelRunner(int n); +}; + +void MLIRHelloMLBridge::initCommunication() { + if (data_format == "bytes") { + SerDesType = BaseSerDes::Kind::Bitstream; + } else if (data_format == "json") { + SerDesType = BaseSerDes::Kind::Json; + } + basename = "/tmp/" + pipe_name; + auto StartTime = std::chrono::high_resolution_clock::now(); + MLRunner = + new PipeModelRunner(basename + ".out", basename + ".in", SerDesType); + + std::pair> p1("tensor", FeatureVector); + MLRunner->populateFeatures(p1); + int Out = MLRunner->evaluate(); + + // llvm::outs() << "Returned value: " << Out << "\n"; + + auto EndTime = std::chrono::high_resolution_clock::now(); + + auto Duration = std::chrono::duration_cast( + EndTime - StartTime); + std::ofstream outputFile; + outputFile.open("pipe-" + data_format + "-inference.csv", std::ios::app); + outputFile << n << "," << Duration.count() << "\n"; + outputFile.close(); +} + +void MLIRHelloMLBridge::populateFeatureVector( + std::vector &FeatureVector) { + FeatureVector.resize(n); + for (int i = 0; i < n; i++) { + FeatureVector[i] = dis(gen); + } +} + +void MLIRHelloMLBridge::setModelRunner(int n) { MLRunner = nullptr; } + +} // end anonymous namespace + +std::unique_ptr mlir::createMLIRHelloMLBridgePass() { + return std::make_unique(); +} + +static PassRegistration pass("mlir-hello-mlbridge", + "MLIR Hello MLBridge"); From 8a62cb93413b682e9ad0e619979989bc2e656cac Mon Sep 17 00:00:00 2001 From: Yash Agrawal Date: Mon, 8 Apr 2024 12:38:48 +0530 Subject: [PATCH 05/52] Revert "MLIR HelloMLBridgePass bugfix" This reverts commit 00cbf721104a4593c20e4a196a4bfccf1e529412. --- mlir/lib/Transforms/HelloMLBridgePass.cpp | 491 ---------------------- 1 file changed, 491 deletions(-) delete mode 100644 mlir/lib/Transforms/HelloMLBridgePass.cpp diff --git a/mlir/lib/Transforms/HelloMLBridgePass.cpp b/mlir/lib/Transforms/HelloMLBridgePass.cpp deleted file mode 100644 index 27da7916267e..000000000000 --- a/mlir/lib/Transforms/HelloMLBridgePass.cpp +++ /dev/null @@ -1,491 +0,0 @@ -#include "MLModelRunner/MLModelRunner.h" -#include "MLModelRunner/ONNXModelRunner/ONNXModelRunner.h" -#include "MLModelRunner/ONNXModelRunner/environment.h" -#include "MLModelRunner/ONNXModelRunner/utils.h" -#include "MLModelRunner/PipeModelRunner.h" -#include "MLModelRunner/TFModelRunner.h" -#include "MLModelRunner/Utils/MLConfig.h" -#include "MLModelRunner/gRPCModelRunner.h" -#include "grpc/helloMLBridge/helloMLBridge.grpc.pb.h" -#include "grpc/helloMLBridge/helloMLBridge.pb.h" -#include "mlir/IR/MLIRContext.h" -#include "mlir/IR/PatternMatch.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Pass/PassRegistry.h" -#include "mlir/Transforms/Passes.h" -#include "llvm/IR/Module.h" -#include "llvm/MC/SubtargetFeature.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include -#include -#include -#include - -#include "tf_models/LinearModel1000.h" -#include "tf_models/LinearModel10000.h" -#include "tf_models/LinearModel10500.h" -#include "tf_models/LinearModel11000.h" -#include "tf_models/LinearModel11500.h" -#include "tf_models/LinearModel12000.h" -#include "tf_models/LinearModel12500.h" -#include "tf_models/LinearModel13000.h" -#include "tf_models/LinearModel13500.h" -#include "tf_models/LinearModel14000.h" -#include "tf_models/LinearModel14500.h" -#include "tf_models/LinearModel1500.h" -#include "tf_models/LinearModel15000.h" -#include "tf_models/LinearModel15500.h" -#include "tf_models/LinearModel16000.h" -#include "tf_models/LinearModel16500.h" -#include "tf_models/LinearModel17000.h" -#include "tf_models/LinearModel17500.h" -#include "tf_models/LinearModel18000.h" -#include "tf_models/LinearModel18500.h" -#include "tf_models/LinearModel19000.h" -#include "tf_models/LinearModel19500.h" -#include "tf_models/LinearModel2000.h" -#include "tf_models/LinearModel20000.h" -#include "tf_models/LinearModel20500.h" -#include "tf_models/LinearModel21000.h" -#include "tf_models/LinearModel21500.h" -#include "tf_models/LinearModel22000.h" -#include "tf_models/LinearModel22500.h" -#include "tf_models/LinearModel23000.h" -#include "tf_models/LinearModel23500.h" -#include "tf_models/LinearModel24000.h" -#include "tf_models/LinearModel24500.h" -#include "tf_models/LinearModel2500.h" -#include "tf_models/LinearModel25000.h" -#include "tf_models/LinearModel25500.h" -#include "tf_models/LinearModel26000.h" -#include "tf_models/LinearModel26500.h" -#include "tf_models/LinearModel27000.h" -#include "tf_models/LinearModel27500.h" -#include "tf_models/LinearModel28000.h" -#include "tf_models/LinearModel28500.h" -#include "tf_models/LinearModel29000.h" -#include "tf_models/LinearModel29500.h" -#include "tf_models/LinearModel3000.h" -#include "tf_models/LinearModel30000.h" -#include "tf_models/LinearModel30500.h" -#include "tf_models/LinearModel31000.h" -#include "tf_models/LinearModel31500.h" -#include "tf_models/LinearModel32000.h" -#include "tf_models/LinearModel32500.h" -#include "tf_models/LinearModel33000.h" -#include "tf_models/LinearModel33500.h" -#include "tf_models/LinearModel34000.h" -#include "tf_models/LinearModel34500.h" -#include "tf_models/LinearModel3500.h" -#include "tf_models/LinearModel35000.h" -#include "tf_models/LinearModel35500.h" -#include "tf_models/LinearModel36000.h" -#include "tf_models/LinearModel36500.h" -#include "tf_models/LinearModel37000.h" -#include "tf_models/LinearModel37500.h" -#include "tf_models/LinearModel38000.h" -#include "tf_models/LinearModel38500.h" -#include "tf_models/LinearModel39000.h" -#include "tf_models/LinearModel39500.h" -#include "tf_models/LinearModel4000.h" -#include "tf_models/LinearModel40000.h" -#include "tf_models/LinearModel40500.h" -#include "tf_models/LinearModel41000.h" -#include "tf_models/LinearModel41500.h" -#include "tf_models/LinearModel42000.h" -#include "tf_models/LinearModel42500.h" -#include "tf_models/LinearModel43000.h" -#include "tf_models/LinearModel43500.h" -#include "tf_models/LinearModel44000.h" -#include "tf_models/LinearModel44500.h" -#include "tf_models/LinearModel4500.h" -#include "tf_models/LinearModel45000.h" -#include "tf_models/LinearModel45500.h" -#include "tf_models/LinearModel46000.h" -#include "tf_models/LinearModel46500.h" -#include "tf_models/LinearModel47000.h" -#include "tf_models/LinearModel47500.h" -#include "tf_models/LinearModel48000.h" -#include "tf_models/LinearModel48500.h" -#include "tf_models/LinearModel49000.h" -#include "tf_models/LinearModel49500.h" -#include "tf_models/LinearModel500.h" -#include "tf_models/LinearModel5000.h" -#include "tf_models/LinearModel50000.h" -#include "tf_models/LinearModel5500.h" -#include "tf_models/LinearModel6000.h" -#include "tf_models/LinearModel6500.h" -#include "tf_models/LinearModel7000.h" -#include "tf_models/LinearModel7500.h" -#include "tf_models/LinearModel8000.h" -#include "tf_models/LinearModel8500.h" -#include "tf_models/LinearModel9000.h" -#include "tf_models/LinearModel9500.h" - -#define MODELS(M) \ - M(500) \ - M(1000) \ - M(1500) \ - M(2000) \ - M(2500) \ - M(3000) \ - M(3500) \ - M(4000) \ - M(4500) \ - M(5000) \ - M(5500) \ - M(6000) \ - M(6500) \ - M(7000) \ - M(7500) \ - M(8000) \ - M(8500) \ - M(9000) \ - M(9500) \ - M(10000) \ - M(10500) \ - M(11000) \ - M(11500) \ - M(12000) \ - M(12500) \ - M(13000) \ - M(13500) \ - M(14000) \ - M(14500) \ - M(15000) \ - M(15500) \ - M(16000) \ - M(16500) \ - M(17000) \ - M(17500) \ - M(18000) \ - M(18500) \ - M(19000) \ - M(19500) \ - M(20000) \ - M(20500) \ - M(21000) \ - M(21500) \ - M(22000) \ - M(22500) \ - M(23000) \ - M(23500) \ - M(24000) \ - M(24500) \ - M(25000) \ - M(25500) \ - M(26000) \ - M(26500) \ - M(27000) \ - M(27500) \ - M(28000) \ - M(28500) \ - M(29000) \ - M(29500) \ - M(30000) \ - M(30500) \ - M(31000) \ - M(31500) \ - M(32000) \ - M(32500) \ - M(33000) \ - M(33500) \ - M(34000) \ - M(34500) \ - M(35000) \ - M(35500) \ - M(36000) \ - M(36500) \ - M(37000) \ - M(37500) \ - M(38000) \ - M(38500) \ - M(39000) \ - M(39500) \ - M(40000) \ - M(40500) \ - M(41000) \ - M(41500) \ - M(42000) \ - M(42500) \ - M(43000) \ - M(43500) \ - M(44000) \ - M(44500) \ - M(45000) \ - M(45500) \ - M(46000) \ - M(46500) \ - M(47000) \ - M(47500) \ - M(48000) \ - M(48500) \ - M(49000) \ - M(49500) \ - M(50000) - -static llvm::cl::opt - training("mlir-hello-training", llvm::cl::Hidden, - llvm::cl::desc("whether it is training or inference"), - llvm::cl::init(false)); -static llvm::cl::opt server_address( - "mlir-hello-server-address", llvm::cl::Hidden, - llvm::cl::desc( - "Starts the server in the given address, format :"), - llvm::cl::init("localhost:5050")); - -static llvm::cl::opt data_format( - "mlir-hello-data-format", llvm::cl::Hidden, llvm::cl::init("json"), - llvm::cl::desc("Data format to use for communication with python model")); - -static llvm::cl::opt - useONNX("mlir-hello-use-onnx", llvm::cl::Hidden, - llvm::cl::desc("Use ONNX for inferencing model"), - llvm::cl::init(false)); - -static llvm::cl::opt - usePipe("mlir-hello-use-pipe", llvm::cl::Hidden, - llvm::cl::desc("Use pipe based interation with python model"), - llvm::cl::init(false)); - -static llvm::cl::opt - pipe_name("mlir-hello-pipe-name", llvm::cl::Hidden, llvm::cl::init("dummy"), - llvm::cl::desc("Name for pipe file")); -static llvm::cl::opt n("mlir-hello-data-size", llvm::cl::Hidden, - llvm::cl::init(1000), - llvm::cl::desc("Size of input vector")); - -static llvm::cl::opt - useTF("mlir-hello-use-tf", llvm::cl::Hidden, - llvm::cl::desc("Use TF AOT for inferencing model"), - llvm::cl::init(false)); - -using namespace mlir; -using namespace grpc; -using namespace MLBridge; -using namespace helloMLBridgegRPC; - -namespace { - -std::random_device rd; -std::mt19937 gen(5); -std::uniform_real_distribution dis(0.0, 1.0); - -class HelloMLBridgeEnv : public Environment { - Observation CurrObs; - -public: - HelloMLBridgeEnv() { setNextAgent("agent"); }; - Observation &reset() override; - Observation &step(Action) override; - -protected: - std::vector FeatureVector; -}; - -Observation &HelloMLBridgeEnv::step(Action Action) { - CurrObs.clear(); - std::copy(FeatureVector.begin(), FeatureVector.end(), - std::back_inserter(CurrObs)); - setDone(); - return CurrObs; -} - -Observation &HelloMLBridgeEnv::reset() { - std::copy(FeatureVector.begin(), FeatureVector.end(), - std::back_inserter(CurrObs)); - return CurrObs; -} - -struct MLIRHelloMLBridge : public OperationPass, - public HelloMLBridgeEnv { -public: - MLIRHelloMLBridge() {} - - struct HelloMLIRTraining - : public helloMLBridgegRPC::HelloMLBridgeService::Service { - private: - std::vector FeatureVector; - - public: - grpc::Status - getTensor(grpc::ServerContext *context, - const ::helloMLBridgegRPC::ActionRequest *request, - ::helloMLBridgegRPC::TensorResponse *response) override { - if (request->action() == -1) { - return grpc::Status::OK; - } - if (request->action()) { - populateFeatureVector(FeatureVector); - for (int I = 0, E = FeatureVector.size(); I < E; I++) { - response->add_tensor(FeatureVector[I]); - } - } - return grpc::Status::OK; - } - }; - - void setTFModelRunner(int n) { - switch (n) { -#define M(x) \ - case x: \ - MLRunner = new TFModelRunner("output"); \ - break; - MODELS(M) -#undef M - } - // MLRunner = new TFModelRunner("output"); - } - - void TFinitCommunication() { - auto StartTime = std::chrono::high_resolution_clock::now(); - - std::pair> p1("x", FeatureVector); - - setTFModelRunner(n); - MLRunner->populateFeatures(p1); - double Out = MLRunner->evaluate(); - - auto EndTime = std::chrono::high_resolution_clock::now(); - - auto Duration = std::chrono::duration_cast( - EndTime - StartTime); - std::ofstream outputFile; - outputFile.open("tf-inference.csv", std::ios_base::app); - outputFile << n << "," << Duration.count() << "\n"; - outputFile.close(); - } - - void runOnOperation() override { - assert(MLConfig::mlconfig != "" && "ml-config-path required"); - // Get the current operation being operated on. - Operation *op = getOperation(); - // llvm::errs() << "Hello World pass\n"; - // bool use_pipe = false; - // bool useONNX = false; - if (useTF) { - populateFeatureVector(FeatureVector); - TFinitCommunication(); - return; - } - if (usePipe) { - populateFeatureVector(FeatureVector); - initCommunication(); - } else { - if (training) { - HelloMLIRTraining *gRPCTrainer = new HelloMLIRTraining(); - MLRunner = new gRPCModelRunner< - helloMLBridgegRPC::HelloMLBridgeService::Service, - helloMLBridgegRPC::HelloMLBridgeService::Stub, - helloMLBridgegRPC::TensorResponse, - helloMLBridgegRPC::ActionRequest>(server_address, gRPCTrainer); - } else if (useONNX) { - std::ofstream outputFile; - outputFile.open("onnx-inference.csv", std::ios::app); - Agent *agent = new Agent(MLConfig::mlconfig + - "/hellopass/onnx_test_dir/dummy-torch-model-" + - std::to_string(n) + ".onnx"); - std::map agents; - agents["agent"] = agent; - auto StartTime = std::chrono::high_resolution_clock::now(); - Env = new HelloMLBridgeEnv(); - MLRunner = new ONNXModelRunner(this, agents, nullptr); - populateFeatureVector(FeatureVector); - int Out = MLRunner->evaluate(); - auto EndTime = std::chrono::high_resolution_clock::now(); - auto Duration = std::chrono::duration_cast( - EndTime - StartTime); - outputFile << n << "," << Duration.count() << "\n"; - outputFile.close(); - } else { - // llvm::errs() << "Using 2nd gRPC flow...\n"; - std::ofstream outputFile; - outputFile.open("grpc-inference.csv", std::ios::app); - auto StartTime = std::chrono::high_resolution_clock::now(); - - helloMLBridgegRPC::TensorResponse request; - helloMLBridgegRPC::ActionRequest response; - MLRunner = - new gRPCModelRunner( - server_address, &request, &response); - - MLRunner->setRequest(&request); - MLRunner->setResponse(&response); - populateFeatureVector(FeatureVector); - std::pair> p1("tensor", FeatureVector); - MLRunner->populateFeatures(p1); - int Out = MLRunner->evaluate(); - - auto EndTime = std::chrono::high_resolution_clock::now(); - - auto Duration = std::chrono::duration_cast( - EndTime - StartTime); - // outs() << n << " " << Duration.count() << "\n"; - outputFile << n << "," << Duration.count() << "\n"; - outputFile.close(); - } - } - } - -private: - BaseSerDes::Kind SerDesType; - HelloMLBridgeEnv *Env; - std::string basename = "/tmp/" + pipe_name; - MLModelRunner *MLRunner; - static void populateFeatureVector(std::vector &FeatureVector); - void initCommunication(); - void setModelRunner(int n); -}; - -void MLIRHelloMLBridge::initCommunication() { - if (data_format == "bytes") { - SerDesType = BaseSerDes::Kind::Bitstream; - } else if (data_format == "json") { - SerDesType = BaseSerDes::Kind::Json; - } - basename = "/tmp/" + pipe_name; - auto StartTime = std::chrono::high_resolution_clock::now(); - MLRunner = - new PipeModelRunner(basename + ".out", basename + ".in", SerDesType); - - std::pair> p1("tensor", FeatureVector); - MLRunner->populateFeatures(p1); - int Out = MLRunner->evaluate(); - - // llvm::outs() << "Returned value: " << Out << "\n"; - - auto EndTime = std::chrono::high_resolution_clock::now(); - - auto Duration = std::chrono::duration_cast( - EndTime - StartTime); - std::ofstream outputFile; - outputFile.open("pipe-" + data_format + "-inference.csv", std::ios::app); - outputFile << n << "," << Duration.count() << "\n"; - outputFile.close(); -} - -void MLIRHelloMLBridge::populateFeatureVector( - std::vector &FeatureVector) { - FeatureVector.resize(n); - for (int i = 0; i < n; i++) { - FeatureVector[i] = dis(gen); - } -} - -void MLIRHelloMLBridge::setModelRunner(int n) { MLRunner = nullptr; } - -} // end anonymous namespace - -std::unique_ptr mlir::createMLIRHelloMLBridgePass() { - return std::make_unique(); -} - -static PassRegistration pass("mlir-hello-mlbridge", - "MLIR Hello MLBridge"); From bbf7d4d74e727c4dc97e8d670c3037f30ecf181a Mon Sep 17 00:00:00 2001 From: svkeerthy Date: Sun, 14 Jan 2024 19:27:06 +0530 Subject: [PATCH 06/52] MLIR HelloMLBridgePass bugfix (cherry picked from commit 610c4e3dad27a375c9bc58d4685fcc6fcd0ee812) --- mlir/lib/Transforms/HelloMLBridgePass.cpp | 491 ++++++++++++++++++++++ 1 file changed, 491 insertions(+) create mode 100644 mlir/lib/Transforms/HelloMLBridgePass.cpp diff --git a/mlir/lib/Transforms/HelloMLBridgePass.cpp b/mlir/lib/Transforms/HelloMLBridgePass.cpp new file mode 100644 index 000000000000..27da7916267e --- /dev/null +++ b/mlir/lib/Transforms/HelloMLBridgePass.cpp @@ -0,0 +1,491 @@ +#include "MLModelRunner/MLModelRunner.h" +#include "MLModelRunner/ONNXModelRunner/ONNXModelRunner.h" +#include "MLModelRunner/ONNXModelRunner/environment.h" +#include "MLModelRunner/ONNXModelRunner/utils.h" +#include "MLModelRunner/PipeModelRunner.h" +#include "MLModelRunner/TFModelRunner.h" +#include "MLModelRunner/Utils/MLConfig.h" +#include "MLModelRunner/gRPCModelRunner.h" +#include "grpc/helloMLBridge/helloMLBridge.grpc.pb.h" +#include "grpc/helloMLBridge/helloMLBridge.pb.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassRegistry.h" +#include "mlir/Transforms/Passes.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include +#include + +#include "tf_models/LinearModel1000.h" +#include "tf_models/LinearModel10000.h" +#include "tf_models/LinearModel10500.h" +#include "tf_models/LinearModel11000.h" +#include "tf_models/LinearModel11500.h" +#include "tf_models/LinearModel12000.h" +#include "tf_models/LinearModel12500.h" +#include "tf_models/LinearModel13000.h" +#include "tf_models/LinearModel13500.h" +#include "tf_models/LinearModel14000.h" +#include "tf_models/LinearModel14500.h" +#include "tf_models/LinearModel1500.h" +#include "tf_models/LinearModel15000.h" +#include "tf_models/LinearModel15500.h" +#include "tf_models/LinearModel16000.h" +#include "tf_models/LinearModel16500.h" +#include "tf_models/LinearModel17000.h" +#include "tf_models/LinearModel17500.h" +#include "tf_models/LinearModel18000.h" +#include "tf_models/LinearModel18500.h" +#include "tf_models/LinearModel19000.h" +#include "tf_models/LinearModel19500.h" +#include "tf_models/LinearModel2000.h" +#include "tf_models/LinearModel20000.h" +#include "tf_models/LinearModel20500.h" +#include "tf_models/LinearModel21000.h" +#include "tf_models/LinearModel21500.h" +#include "tf_models/LinearModel22000.h" +#include "tf_models/LinearModel22500.h" +#include "tf_models/LinearModel23000.h" +#include "tf_models/LinearModel23500.h" +#include "tf_models/LinearModel24000.h" +#include "tf_models/LinearModel24500.h" +#include "tf_models/LinearModel2500.h" +#include "tf_models/LinearModel25000.h" +#include "tf_models/LinearModel25500.h" +#include "tf_models/LinearModel26000.h" +#include "tf_models/LinearModel26500.h" +#include "tf_models/LinearModel27000.h" +#include "tf_models/LinearModel27500.h" +#include "tf_models/LinearModel28000.h" +#include "tf_models/LinearModel28500.h" +#include "tf_models/LinearModel29000.h" +#include "tf_models/LinearModel29500.h" +#include "tf_models/LinearModel3000.h" +#include "tf_models/LinearModel30000.h" +#include "tf_models/LinearModel30500.h" +#include "tf_models/LinearModel31000.h" +#include "tf_models/LinearModel31500.h" +#include "tf_models/LinearModel32000.h" +#include "tf_models/LinearModel32500.h" +#include "tf_models/LinearModel33000.h" +#include "tf_models/LinearModel33500.h" +#include "tf_models/LinearModel34000.h" +#include "tf_models/LinearModel34500.h" +#include "tf_models/LinearModel3500.h" +#include "tf_models/LinearModel35000.h" +#include "tf_models/LinearModel35500.h" +#include "tf_models/LinearModel36000.h" +#include "tf_models/LinearModel36500.h" +#include "tf_models/LinearModel37000.h" +#include "tf_models/LinearModel37500.h" +#include "tf_models/LinearModel38000.h" +#include "tf_models/LinearModel38500.h" +#include "tf_models/LinearModel39000.h" +#include "tf_models/LinearModel39500.h" +#include "tf_models/LinearModel4000.h" +#include "tf_models/LinearModel40000.h" +#include "tf_models/LinearModel40500.h" +#include "tf_models/LinearModel41000.h" +#include "tf_models/LinearModel41500.h" +#include "tf_models/LinearModel42000.h" +#include "tf_models/LinearModel42500.h" +#include "tf_models/LinearModel43000.h" +#include "tf_models/LinearModel43500.h" +#include "tf_models/LinearModel44000.h" +#include "tf_models/LinearModel44500.h" +#include "tf_models/LinearModel4500.h" +#include "tf_models/LinearModel45000.h" +#include "tf_models/LinearModel45500.h" +#include "tf_models/LinearModel46000.h" +#include "tf_models/LinearModel46500.h" +#include "tf_models/LinearModel47000.h" +#include "tf_models/LinearModel47500.h" +#include "tf_models/LinearModel48000.h" +#include "tf_models/LinearModel48500.h" +#include "tf_models/LinearModel49000.h" +#include "tf_models/LinearModel49500.h" +#include "tf_models/LinearModel500.h" +#include "tf_models/LinearModel5000.h" +#include "tf_models/LinearModel50000.h" +#include "tf_models/LinearModel5500.h" +#include "tf_models/LinearModel6000.h" +#include "tf_models/LinearModel6500.h" +#include "tf_models/LinearModel7000.h" +#include "tf_models/LinearModel7500.h" +#include "tf_models/LinearModel8000.h" +#include "tf_models/LinearModel8500.h" +#include "tf_models/LinearModel9000.h" +#include "tf_models/LinearModel9500.h" + +#define MODELS(M) \ + M(500) \ + M(1000) \ + M(1500) \ + M(2000) \ + M(2500) \ + M(3000) \ + M(3500) \ + M(4000) \ + M(4500) \ + M(5000) \ + M(5500) \ + M(6000) \ + M(6500) \ + M(7000) \ + M(7500) \ + M(8000) \ + M(8500) \ + M(9000) \ + M(9500) \ + M(10000) \ + M(10500) \ + M(11000) \ + M(11500) \ + M(12000) \ + M(12500) \ + M(13000) \ + M(13500) \ + M(14000) \ + M(14500) \ + M(15000) \ + M(15500) \ + M(16000) \ + M(16500) \ + M(17000) \ + M(17500) \ + M(18000) \ + M(18500) \ + M(19000) \ + M(19500) \ + M(20000) \ + M(20500) \ + M(21000) \ + M(21500) \ + M(22000) \ + M(22500) \ + M(23000) \ + M(23500) \ + M(24000) \ + M(24500) \ + M(25000) \ + M(25500) \ + M(26000) \ + M(26500) \ + M(27000) \ + M(27500) \ + M(28000) \ + M(28500) \ + M(29000) \ + M(29500) \ + M(30000) \ + M(30500) \ + M(31000) \ + M(31500) \ + M(32000) \ + M(32500) \ + M(33000) \ + M(33500) \ + M(34000) \ + M(34500) \ + M(35000) \ + M(35500) \ + M(36000) \ + M(36500) \ + M(37000) \ + M(37500) \ + M(38000) \ + M(38500) \ + M(39000) \ + M(39500) \ + M(40000) \ + M(40500) \ + M(41000) \ + M(41500) \ + M(42000) \ + M(42500) \ + M(43000) \ + M(43500) \ + M(44000) \ + M(44500) \ + M(45000) \ + M(45500) \ + M(46000) \ + M(46500) \ + M(47000) \ + M(47500) \ + M(48000) \ + M(48500) \ + M(49000) \ + M(49500) \ + M(50000) + +static llvm::cl::opt + training("mlir-hello-training", llvm::cl::Hidden, + llvm::cl::desc("whether it is training or inference"), + llvm::cl::init(false)); +static llvm::cl::opt server_address( + "mlir-hello-server-address", llvm::cl::Hidden, + llvm::cl::desc( + "Starts the server in the given address, format :"), + llvm::cl::init("localhost:5050")); + +static llvm::cl::opt data_format( + "mlir-hello-data-format", llvm::cl::Hidden, llvm::cl::init("json"), + llvm::cl::desc("Data format to use for communication with python model")); + +static llvm::cl::opt + useONNX("mlir-hello-use-onnx", llvm::cl::Hidden, + llvm::cl::desc("Use ONNX for inferencing model"), + llvm::cl::init(false)); + +static llvm::cl::opt + usePipe("mlir-hello-use-pipe", llvm::cl::Hidden, + llvm::cl::desc("Use pipe based interation with python model"), + llvm::cl::init(false)); + +static llvm::cl::opt + pipe_name("mlir-hello-pipe-name", llvm::cl::Hidden, llvm::cl::init("dummy"), + llvm::cl::desc("Name for pipe file")); +static llvm::cl::opt n("mlir-hello-data-size", llvm::cl::Hidden, + llvm::cl::init(1000), + llvm::cl::desc("Size of input vector")); + +static llvm::cl::opt + useTF("mlir-hello-use-tf", llvm::cl::Hidden, + llvm::cl::desc("Use TF AOT for inferencing model"), + llvm::cl::init(false)); + +using namespace mlir; +using namespace grpc; +using namespace MLBridge; +using namespace helloMLBridgegRPC; + +namespace { + +std::random_device rd; +std::mt19937 gen(5); +std::uniform_real_distribution dis(0.0, 1.0); + +class HelloMLBridgeEnv : public Environment { + Observation CurrObs; + +public: + HelloMLBridgeEnv() { setNextAgent("agent"); }; + Observation &reset() override; + Observation &step(Action) override; + +protected: + std::vector FeatureVector; +}; + +Observation &HelloMLBridgeEnv::step(Action Action) { + CurrObs.clear(); + std::copy(FeatureVector.begin(), FeatureVector.end(), + std::back_inserter(CurrObs)); + setDone(); + return CurrObs; +} + +Observation &HelloMLBridgeEnv::reset() { + std::copy(FeatureVector.begin(), FeatureVector.end(), + std::back_inserter(CurrObs)); + return CurrObs; +} + +struct MLIRHelloMLBridge : public OperationPass, + public HelloMLBridgeEnv { +public: + MLIRHelloMLBridge() {} + + struct HelloMLIRTraining + : public helloMLBridgegRPC::HelloMLBridgeService::Service { + private: + std::vector FeatureVector; + + public: + grpc::Status + getTensor(grpc::ServerContext *context, + const ::helloMLBridgegRPC::ActionRequest *request, + ::helloMLBridgegRPC::TensorResponse *response) override { + if (request->action() == -1) { + return grpc::Status::OK; + } + if (request->action()) { + populateFeatureVector(FeatureVector); + for (int I = 0, E = FeatureVector.size(); I < E; I++) { + response->add_tensor(FeatureVector[I]); + } + } + return grpc::Status::OK; + } + }; + + void setTFModelRunner(int n) { + switch (n) { +#define M(x) \ + case x: \ + MLRunner = new TFModelRunner("output"); \ + break; + MODELS(M) +#undef M + } + // MLRunner = new TFModelRunner("output"); + } + + void TFinitCommunication() { + auto StartTime = std::chrono::high_resolution_clock::now(); + + std::pair> p1("x", FeatureVector); + + setTFModelRunner(n); + MLRunner->populateFeatures(p1); + double Out = MLRunner->evaluate(); + + auto EndTime = std::chrono::high_resolution_clock::now(); + + auto Duration = std::chrono::duration_cast( + EndTime - StartTime); + std::ofstream outputFile; + outputFile.open("tf-inference.csv", std::ios_base::app); + outputFile << n << "," << Duration.count() << "\n"; + outputFile.close(); + } + + void runOnOperation() override { + assert(MLConfig::mlconfig != "" && "ml-config-path required"); + // Get the current operation being operated on. + Operation *op = getOperation(); + // llvm::errs() << "Hello World pass\n"; + // bool use_pipe = false; + // bool useONNX = false; + if (useTF) { + populateFeatureVector(FeatureVector); + TFinitCommunication(); + return; + } + if (usePipe) { + populateFeatureVector(FeatureVector); + initCommunication(); + } else { + if (training) { + HelloMLIRTraining *gRPCTrainer = new HelloMLIRTraining(); + MLRunner = new gRPCModelRunner< + helloMLBridgegRPC::HelloMLBridgeService::Service, + helloMLBridgegRPC::HelloMLBridgeService::Stub, + helloMLBridgegRPC::TensorResponse, + helloMLBridgegRPC::ActionRequest>(server_address, gRPCTrainer); + } else if (useONNX) { + std::ofstream outputFile; + outputFile.open("onnx-inference.csv", std::ios::app); + Agent *agent = new Agent(MLConfig::mlconfig + + "/hellopass/onnx_test_dir/dummy-torch-model-" + + std::to_string(n) + ".onnx"); + std::map agents; + agents["agent"] = agent; + auto StartTime = std::chrono::high_resolution_clock::now(); + Env = new HelloMLBridgeEnv(); + MLRunner = new ONNXModelRunner(this, agents, nullptr); + populateFeatureVector(FeatureVector); + int Out = MLRunner->evaluate(); + auto EndTime = std::chrono::high_resolution_clock::now(); + auto Duration = std::chrono::duration_cast( + EndTime - StartTime); + outputFile << n << "," << Duration.count() << "\n"; + outputFile.close(); + } else { + // llvm::errs() << "Using 2nd gRPC flow...\n"; + std::ofstream outputFile; + outputFile.open("grpc-inference.csv", std::ios::app); + auto StartTime = std::chrono::high_resolution_clock::now(); + + helloMLBridgegRPC::TensorResponse request; + helloMLBridgegRPC::ActionRequest response; + MLRunner = + new gRPCModelRunner( + server_address, &request, &response); + + MLRunner->setRequest(&request); + MLRunner->setResponse(&response); + populateFeatureVector(FeatureVector); + std::pair> p1("tensor", FeatureVector); + MLRunner->populateFeatures(p1); + int Out = MLRunner->evaluate(); + + auto EndTime = std::chrono::high_resolution_clock::now(); + + auto Duration = std::chrono::duration_cast( + EndTime - StartTime); + // outs() << n << " " << Duration.count() << "\n"; + outputFile << n << "," << Duration.count() << "\n"; + outputFile.close(); + } + } + } + +private: + BaseSerDes::Kind SerDesType; + HelloMLBridgeEnv *Env; + std::string basename = "/tmp/" + pipe_name; + MLModelRunner *MLRunner; + static void populateFeatureVector(std::vector &FeatureVector); + void initCommunication(); + void setModelRunner(int n); +}; + +void MLIRHelloMLBridge::initCommunication() { + if (data_format == "bytes") { + SerDesType = BaseSerDes::Kind::Bitstream; + } else if (data_format == "json") { + SerDesType = BaseSerDes::Kind::Json; + } + basename = "/tmp/" + pipe_name; + auto StartTime = std::chrono::high_resolution_clock::now(); + MLRunner = + new PipeModelRunner(basename + ".out", basename + ".in", SerDesType); + + std::pair> p1("tensor", FeatureVector); + MLRunner->populateFeatures(p1); + int Out = MLRunner->evaluate(); + + // llvm::outs() << "Returned value: " << Out << "\n"; + + auto EndTime = std::chrono::high_resolution_clock::now(); + + auto Duration = std::chrono::duration_cast( + EndTime - StartTime); + std::ofstream outputFile; + outputFile.open("pipe-" + data_format + "-inference.csv", std::ios::app); + outputFile << n << "," << Duration.count() << "\n"; + outputFile.close(); +} + +void MLIRHelloMLBridge::populateFeatureVector( + std::vector &FeatureVector) { + FeatureVector.resize(n); + for (int i = 0; i < n; i++) { + FeatureVector[i] = dis(gen); + } +} + +void MLIRHelloMLBridge::setModelRunner(int n) { MLRunner = nullptr; } + +} // end anonymous namespace + +std::unique_ptr mlir::createMLIRHelloMLBridgePass() { + return std::make_unique(); +} + +static PassRegistration pass("mlir-hello-mlbridge", + "MLIR Hello MLBridge"); From 8ffd1b96979bed2afff9e05a4c7d0f844847716c Mon Sep 17 00:00:00 2001 From: svkeerthy Date: Mon, 15 Jan 2024 22:26:27 +0530 Subject: [PATCH 07/52] Updating MLCompilerBridge submodule (cherry picked from commit d75c78f5ce8e034ea58ece913640fd5daaf27360) --- MLCompilerBridge | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MLCompilerBridge b/MLCompilerBridge index b3f7359866e9..394c8eccbb4a 160000 --- a/MLCompilerBridge +++ b/MLCompilerBridge @@ -1 +1 @@ -Subproject commit b3f7359866e9a551327a58e2358cbc14b74d3d47 +Subproject commit 394c8eccbb4a4fab4e65983c1d86e9135172b2b2 From c247655fa39f6de424869d0b642131722a76b108 Mon Sep 17 00:00:00 2001 From: Umesh-k26 Date: Sat, 20 Jan 2024 11:38:37 +0530 Subject: [PATCH 08/52] Updating MLCompilerBridge submodule (cherry picked from commit bc678f33a3749c7231bcbea68da7327f6270ae9f) --- MLCompilerBridge | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MLCompilerBridge b/MLCompilerBridge index 394c8eccbb4a..970d7b8d671f 160000 --- a/MLCompilerBridge +++ b/MLCompilerBridge @@ -1 +1 @@ -Subproject commit 394c8eccbb4a4fab4e65983c1d86e9135172b2b2 +Subproject commit 970d7b8d671f7c4f40ab6ba7b4d85b20445e9578 From 4dbd33727002525dae3e5ff1a6cbf1060ece7130 Mon Sep 17 00:00:00 2001 From: Umesh-k26 Date: Sat, 20 Jan 2024 11:41:55 +0530 Subject: [PATCH 09/52] DEBUG_TYPE fix in Hello-MLBridge pass (cherry picked from commit 2d00f46f0790be1568243a4739885dd984820dee) --- llvm/lib/Transforms/Hello-MLBridge/Hello.cpp | 21 ++++++++++++++++++++ llvm/lib/Transforms/PosetRL/CMakeLists.txt | 14 +++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 llvm/lib/Transforms/PosetRL/CMakeLists.txt diff --git a/llvm/lib/Transforms/Hello-MLBridge/Hello.cpp b/llvm/lib/Transforms/Hello-MLBridge/Hello.cpp index 4964773f4f1f..e0f4a0124851 100644 --- a/llvm/lib/Transforms/Hello-MLBridge/Hello.cpp +++ b/llvm/lib/Transforms/Hello-MLBridge/Hello.cpp @@ -231,6 +231,27 @@ M(49500) \ M(50000) +<<<<<<< HEAD +======= +#define DEBUG_TYPE "hello_mlbridge" + +using namespace llvm; +using namespace grpc; +using namespace helloMLBridgegRPC; + +// #define DEBUG_TYPE "hello_mlbridge" + +STATISTIC(hellomodule, "Counts number of functions greeted"); + +static cl::opt training("hello-training", cl::Hidden, + cl::desc("whether it is training or inference"), + cl::init(false)); + +static cl::opt server_address( + "hello-server-address", cl::Hidden, + cl::desc("Starts the server in the given address, format :"), + cl::init("localhost:5050")); +>>>>>>> 2d00f46f0790... DEBUG_TYPE fix in Hello-MLBridge pass using namespace llvm; using namespace MLBridge; diff --git a/llvm/lib/Transforms/PosetRL/CMakeLists.txt b/llvm/lib/Transforms/PosetRL/CMakeLists.txt new file mode 100644 index 000000000000..4230cb5f8252 --- /dev/null +++ b/llvm/lib/Transforms/PosetRL/CMakeLists.txt @@ -0,0 +1,14 @@ +add_llvm_component_library(LLVMPosetRL posetRL.cpp + +LINK_COMPONENTS +IR2Vec + +ADDITIONAL_HEADER_DIRS +${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms +${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/PosetRL + +DEPENDS +intrinsics_gen +LLVMMLBridge +) +target_link_libraries(LLVMPosetRL PRIVATE LLVMMLBridge) From 61f4208d3cf7700d747caa3b079bea649ea53bb4 Mon Sep 17 00:00:00 2001 From: Siddharth Jain Date: Sat, 20 Jan 2024 12:08:01 +0530 Subject: [PATCH 10/52] Uncommented GVNPass from PassManagerBuilder file (cherry picked from commit 952e103584f64bf546ee996e287d1e5ac0498041) --- .../lib/Transforms/IPO/PassManagerBuilder.cpp | 2186 +++++++++++++++++ 1 file changed, 2186 insertions(+) create mode 100644 llvm/lib/Transforms/IPO/PassManagerBuilder.cpp diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp new file mode 100644 index 000000000000..e0ebfdf44261 --- /dev/null +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -0,0 +1,2186 @@ +//===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the PassManagerBuilder class, which is used to set up a +// "standard" optimization sequence suitable for languages like C and C++. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm-c/Transforms/PassManagerBuilder.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CFLAndersAliasAnalysis.h" +#include "llvm/Analysis/CFLSteensAliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ScopedNoAliasAA.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Transforms/AddSizeAttr/AddSizeAttr.h" +#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/Attributor.h" +#include "llvm/Transforms/IPO/ForceFunctionAttrs.h" +#include "llvm/Transforms/IPO/FunctionAttrs.h" +#include "llvm/Transforms/IPO/InferFunctionAttrs.h" +#include "llvm/Transforms/InstCombine/InstCombine.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/PosetRL/PosetRL.h" +#include "llvm/Transforms/CodeSizeOpt/CodeSizeOpt.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/InstSimplifyPass.h" +#include "llvm/Transforms/Scalar/LICM.h" +#include "llvm/Transforms/Scalar/LoopUnrollPass.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Vectorize.h" +#include "llvm/Transforms/Vectorize/LoopVectorize.h" +#include "llvm/Transforms/Vectorize/SLPVectorizer.h" +#include "llvm/Transforms/IR2Vec-LOF/custom_loop_distribution.h" + +using namespace llvm; + + +static cl::opt + OPosetRL("OPosetRL", cl::init(false), cl::Hidden, + cl::desc("poset rl pass sequence")); + +static cl::opt + OCodeSizeOpt("OCodeSizeOpt", cl::init(false), cl::Hidden, + cl::desc("codesize opt pass sequence")); +static cl::opt + RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, + cl::ZeroOrMore, cl::desc("Run Partial inlinining pass")); + +static cl::opt +UseGVNAfterVectorization("use-gvn-after-vectorization", + cl::init(false), cl::Hidden, + cl::desc("Run GVN instead of Early CSE after vectorization passes")); + +static cl::opt ExtraVectorizerPasses( + "extra-vectorizer-passes", cl::init(false), cl::Hidden, + cl::desc("Run cleanup optimization passes after vectorization.")); + +static cl::opt +RunLoopRerolling("reroll-loops", cl::Hidden, + cl::desc("Run the loop rerolling pass")); + +static cl::opt RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, + cl::desc("Run the NewGVN pass")); + +static cl::opt +Runcustom_loop_distribution("cld", cl::init(false), cl::Hidden, + cl::desc("costomized loop-distribution pass")); + +static cl::opt +RunNoPreDistributionPasses("No-PreDistributionPasses", cl::init(false), cl::Hidden, + cl::desc("Apply pre-distribution passes")); + +static cl::opt +RunNoPostDistributionPasses("No-PostDistributionPasses", cl::init(false), cl::Hidden, + cl::desc("Apply post-distribution passes")); + +// Experimental option to use CFL-AA +enum class CFLAAType { None, Steensgaard, Andersen, Both }; +static cl::opt + UseCFLAA("use-cfl-aa", cl::init(CFLAAType::None), cl::Hidden, + cl::desc("Enable the new, experimental CFL alias analysis"), + cl::values(clEnumValN(CFLAAType::None, "none", "Disable CFL-AA"), + clEnumValN(CFLAAType::Steensgaard, "steens", + "Enable unification-based CFL-AA"), + clEnumValN(CFLAAType::Andersen, "anders", + "Enable inclusion-based CFL-AA"), + clEnumValN(CFLAAType::Both, "both", + "Enable both variants of CFL-AA"))); + +static cl::opt EnableLoopInterchange( + "enable-loopinterchange", cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental LoopInterchange Pass")); + +static cl::opt EnableUnrollAndJam("enable-unroll-and-jam", + cl::init(false), cl::Hidden, + cl::desc("Enable Unroll And Jam Pass")); + +static cl::opt + EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden, + cl::desc("Enable preparation for ThinLTO.")); + +static cl::opt + EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden, + cl::desc("Enable performing ThinLTO.")); + +cl::opt EnableHotColdSplit("hot-cold-split", cl::init(false), cl::Hidden, + cl::desc("Enable hot-cold splitting pass")); + +static cl::opt UseLoopVersioningLICM( + "enable-loop-versioning-licm", cl::init(false), cl::Hidden, + cl::desc("Enable the experimental Loop Versioning LICM pass")); + +static cl::opt + DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, + cl::desc("Disable pre-instrumentation inliner")); + +static cl::opt PreInlineThreshold( + "preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore, + cl::desc("Control the amount of inlining in pre-instrumentation inliner " + "(default = 75)")); + +static cl::opt EnableGVNHoist( + "enable-gvn-hoist", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN hoisting pass (default = off)")); + +static cl::opt + DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false), + cl::Hidden, + cl::desc("Disable shrink-wrap library calls")); + +static cl::opt EnableSimpleLoopUnswitch( + "enable-simple-loop-unswitch", cl::init(false), cl::Hidden, + cl::desc("Enable the simple loop unswitch pass. Also enables independent " + "cleanup passes integrated into the loop pass manager pipeline.")); + +static cl::opt EnableGVNSink( + "enable-gvn-sink", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN sinking pass (default = off)")); + +// This option is used in simplifying testing SampleFDO optimizations for +// profile loading. +static cl::opt + EnableCHR("enable-chr", cl::init(true), cl::Hidden, + cl::desc("Enable control height reduction optimization (CHR)")); + +cl::opt FlattenedProfileUsed( + "flattened-profile-used", cl::init(false), cl::Hidden, + cl::desc("Indicate the sample profile being used is flattened, i.e., " + "no inline hierachy exists in the profile. ")); + +cl::opt EnableOrderFileInstrumentation( + "enable-order-file-instrumentation", cl::init(false), cl::Hidden, + cl::desc("Enable order file instrumentation (default = off)")); + +static cl::opt + EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, + cl::desc("Enable lowering of the matrix intrinsics")); + +static cl::opt + EnableFusion("enable-fusion", cl::init(false), cl::Hidden, + cl::desc("LOF:Enable LLVM fusion")); + +PassManagerBuilder::PassManagerBuilder() { + OptLevel = 2; + SizeLevel = 0; + LibraryInfo = nullptr; + Inliner = nullptr; + DisableUnrollLoops = false; + SLPVectorize = RunSLPVectorization; + LoopVectorize = EnableLoopVectorization; + LoopsInterleaved = EnableLoopInterleaving; + RerollLoops = RunLoopRerolling; + NewGVN = RunNewGVN; + LicmMssaOptCap = SetLicmMssaOptCap; + LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; + DisableGVNLoadPRE = false; + ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; + VerifyInput = false; + VerifyOutput = false; + MergeFunctions = false; + PrepareForLTO = false; + EnablePGOInstrGen = false; + EnablePGOCSInstrGen = false; + EnablePGOCSInstrUse = false; + PGOInstrGen = ""; + PGOInstrUse = ""; + PGOSampleUse = ""; + PrepareForThinLTO = EnablePrepareForThinLTO; + PerformThinLTO = EnablePerformThinLTO; + DivergentTarget = false; +} + +PassManagerBuilder::~PassManagerBuilder() { + delete LibraryInfo; + delete Inliner; +} + +/// Set of global extensions, automatically added as part of the standard set. +static ManagedStatic< + SmallVector, + 8>> + GlobalExtensions; +static PassManagerBuilder::GlobalExtensionID GlobalExtensionsCounter; + +/// Check if GlobalExtensions is constructed and not empty. +/// Since GlobalExtensions is a managed static, calling 'empty()' will trigger +/// the construction of the object. +static bool GlobalExtensionsNotEmpty() { + return GlobalExtensions.isConstructed() && !GlobalExtensions->empty(); +} + +PassManagerBuilder::GlobalExtensionID +PassManagerBuilder::addGlobalExtension(PassManagerBuilder::ExtensionPointTy Ty, + PassManagerBuilder::ExtensionFn Fn) { + auto ExtensionID = GlobalExtensionsCounter++; + GlobalExtensions->push_back(std::make_tuple(Ty, std::move(Fn), ExtensionID)); + return ExtensionID; +} + +void PassManagerBuilder::removeGlobalExtension( + PassManagerBuilder::GlobalExtensionID ExtensionID) { + // RegisterStandardPasses may try to call this function after GlobalExtensions + // has already been destroyed; doing so should not generate an error. + if (!GlobalExtensions.isConstructed()) + return; + + auto GlobalExtension = + llvm::find_if(*GlobalExtensions, [ExtensionID](const auto &elem) { + return std::get<2>(elem) == ExtensionID; + }); + assert(GlobalExtension != GlobalExtensions->end() && + "The extension ID to be removed should always be valid."); + + GlobalExtensions->erase(GlobalExtension); +} + +void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) { + Extensions.push_back(std::make_pair(Ty, std::move(Fn))); +} + +void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, + legacy::PassManagerBase &PM) const { + if (GlobalExtensionsNotEmpty()) { + for (auto &Ext : *GlobalExtensions) { + if (std::get<0>(Ext) == ETy) + std::get<1>(Ext)(*this, PM); + } + } + for (unsigned i = 0, e = Extensions.size(); i != e; ++i) + if (Extensions[i].first == ETy) + Extensions[i].second(*this, PM); +} + +void PassManagerBuilder::addInitialAliasAnalysisPasses( + legacy::PassManagerBase &PM) const { + switch (UseCFLAA) { + case CFLAAType::Steensgaard: + PM.add(createCFLSteensAAWrapperPass()); + break; + case CFLAAType::Andersen: + PM.add(createCFLAndersAAWrapperPass()); + break; + case CFLAAType::Both: + PM.add(createCFLSteensAAWrapperPass()); + PM.add(createCFLAndersAAWrapperPass()); + break; + default: + break; + } + + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that + // BasicAliasAnalysis wins if they disagree. This is intended to help + // support "obvious" type-punning idioms. + PM.add(createTypeBasedAAWrapperPass()); + PM.add(createScopedNoAliasAAWrapperPass()); +} + +void PassManagerBuilder::addInstructionCombiningPass( + legacy::PassManagerBase &PM) const { + bool ExpensiveCombines = OptLevel > 2; + PM.add(createInstructionCombiningPass(ExpensiveCombines)); +} + +void PassManagerBuilder::populateFunctionPassManager( + legacy::FunctionPassManager &FPM) { + addExtensionsToPM(EP_EarlyAsPossible, FPM); + FPM.add(createEntryExitInstrumenterPass()); + + // Add LibraryInfo if we have some. + if (LibraryInfo) + FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); + + if (OptLevel == 0) return; + + addInitialAliasAnalysisPasses(FPM); + + FPM.add(createCFGSimplificationPass()); + FPM.add(createSROAPass()); + FPM.add(createEarlyCSEPass()); + FPM.add(createLowerExpectIntrinsicPass()); +} + +void PassManagerBuilder::customPopulateFunctionPassManager( + legacy::FunctionPassManager &FPM, unsigned customSizeLevel, unsigned subSeqNum) { + //if (((customSizeLevel==15 || customSizeLevel==17) && subSeqNum == 0) || (customSizeLevel==30 && subSeqNum < 4)) { + if (customSizeLevel==15 || customSizeLevel==17 || customSizeLevel==30 || customSizeLevel==34 || customSizeLevel==40) { + FPM.add(createEntryExitInstrumenterPass()); + } + + // Add LibraryInfo if we have some. + if (LibraryInfo) + FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); + + //if (OptLevel == 0) return; + + addInitialAliasAnalysisPasses(FPM); + + if ((customSizeLevel==15 || customSizeLevel==17) && subSeqNum == 0){ + FPM.add(createCFGSimplificationPass()); + FPM.add(createSROAPass()); + FPM.add(createEarlyCSEPass()); + FPM.add(createLowerExpectIntrinsicPass()); + } + + if ((customSizeLevel==34 || customSizeLevel==40) && (subSeqNum == 29 || subSeqNum == 30 || subSeqNum == 31)){ + FPM.add(createCFGSimplificationPass()); + FPM.add(createSROAPass()); + FPM.add(createEarlyCSEPass()); + FPM.add(createLowerExpectIntrinsicPass()); + } + + if (customSizeLevel==30 && subSeqNum == 3){ + FPM.add(createCFGSimplificationPass()); + } + + if (customSizeLevel==30 && subSeqNum == 29){ + FPM.add(createSROAPass()); + FPM.add(createEarlyCSEPass()); + FPM.add(createLowerExpectIntrinsicPass()); + } +} + +// Do PGO instrumentation generation or use pass as the option specified. +void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM, + bool IsCS = false) { + if (IsCS) { + if (!EnablePGOCSInstrGen && !EnablePGOCSInstrUse) + return; + } else if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty()) + return; + + // Perform the preinline and cleanup passes for O1 and above. + // And avoid doing them if optimizing for size. + // We will not do this inline for context sensitive PGO (when IsCS is true). + if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner && + PGOSampleUse.empty() && !IsCS) { + // Create preinline pass. We construct an InlineParams object and specify + // the threshold here to avoid the command line options of the regular + // inliner to influence pre-inlining. The only fields of InlineParams we + // care about are DefaultThreshold and HintThreshold. + InlineParams IP; + IP.DefaultThreshold = PreInlineThreshold; + // FIXME: The hint threshold has the same value used by the regular inliner. + // This should probably be lowered after performance testing. + IP.HintThreshold = 325; + + MPM.add(createFunctionInliningPass(IP)); + MPM.add(createSROAPass()); + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createInstructionCombiningPass()); // Combine silly seq's + addExtensionsToPM(EP_Peephole, MPM); + } + if ((EnablePGOInstrGen && !IsCS) || (EnablePGOCSInstrGen && IsCS)) { + MPM.add(createPGOInstrumentationGenLegacyPass(IsCS)); + // Add the profile lowering pass. + InstrProfOptions Options; + if (!PGOInstrGen.empty()) + Options.InstrProfileOutput = PGOInstrGen; + Options.DoCounterPromotion = true; + Options.UseBFIInPromotion = IsCS; + MPM.add(createLoopRotatePass()); + MPM.add(createInstrProfilingLegacyPass(Options, IsCS)); + } + if (!PGOInstrUse.empty()) + MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse, IsCS)); + // Indirect call promotion that promotes intra-module targets only. + // For ThinLTO this is done earlier due to interactions with globalopt + // for imported functions. We don't run this at -O0. + if (OptLevel > 0 && !IsCS) + MPM.add( + createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty())); +} + +void PassManagerBuilder::addFunctionSimplificationPasses( + legacy::PassManagerBase &MPM) { + // Start of function pass. + // Break up aggregate allocas, using SSAUpdater. + assert(OptLevel >= 1 && "Calling function optimizer with no optimization level!"); + MPM.add(createSROAPass()); + MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies + + if (OptLevel > 1) { + if (EnableGVNHoist) + MPM.add(createGVNHoistPass()); + if (EnableGVNSink) { + MPM.add(createGVNSinkPass()); + MPM.add(createCFGSimplificationPass()); + } + } + + if (OptLevel > 1) { + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); + + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + } + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + // Combine silly seq's + if (OptLevel > 2) + MPM.add(createAggressiveInstCombinerPass()); + addInstructionCombiningPass(MPM); + if (SizeLevel == 0 && !DisableLibCallsShrinkWrap) + MPM.add(createLibCallsShrinkWrapPass()); + addExtensionsToPM(EP_Peephole, MPM); + + + // Optimize memory intrinsic calls based on the profiled size information. + if (SizeLevel == 0) + MPM.add(createPGOMemOPSizeOptLegacyPass()); + + // TODO: Investigate the cost/benefit of tail call elimination on debugging. + if (OptLevel > 1) + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createReassociatePass()); // Reassociate expressions + + // Begin the loop pass pipeline. + if (EnableSimpleLoopUnswitch) { + // The simple loop unswitch pass relies on separate cleanup passes. Schedule + // them first so when we re-process a loop they run before other loop + // passes. + MPM.add(createLoopInstSimplifyPass()); + MPM.add(createLoopSimplifyCFGPass()); + } + // Rotate Loop - disable header duplication at -Oz + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + // TODO: Investigate promotion cap for O1. + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + if (EnableSimpleLoopUnswitch) + MPM.add(createSimpleLoopUnswitchLegacyPass()); + else + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + // FIXME: We break the loop pass pipeline here in order to do full + // simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace the + // need for this. + MPM.add(createCFGSimplificationPass()); + addInstructionCombiningPass(MPM); + // We resume loop passes creating a second loop pipeline here. + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. + addExtensionsToPM(EP_LateLoopOptimizations, MPM); + MPM.add(createLoopDeletionPass()); // Delete dead loops + + if (EnableLoopInterchange) + MPM.add(createLoopInterchangePass()); // Interchange loops + + // Unroll small loops + MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + addExtensionsToPM(EP_LoopOptimizerEnd, MPM); + // This ends the loop pass pipelines. + + if (OptLevel > 1) { + MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds + MPM.add(NewGVN ? createNewGVNPass() + : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + } + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP + + // Delete dead bit computations (instcombine runs after to fold away the dead + // computations, and then ADCE will run later to exploit any new DCE + // opportunities that creates). + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + + // Run instcombine after redundancy elimination to exploit opportunities + // opened up by them. + addInstructionCombiningPass(MPM); + addExtensionsToPM(EP_Peephole, MPM); + if (OptLevel > 1) { + MPM.add(createJumpThreadingPass()); // Thread jumps + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + } + + addExtensionsToPM(EP_ScalarOptimizerLate, MPM); + + if (RerollLoops) + MPM.add(createLoopRerollPass()); + + // TODO: Investigate if this is too expensive at O1. + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + // Clean up after everything. + addInstructionCombiningPass(MPM); + addExtensionsToPM(EP_Peephole, MPM); + + if (EnableCHR && OptLevel >= 3 && + (!PGOInstrUse.empty() || !PGOSampleUse.empty() || EnablePGOCSInstrGen)) + MPM.add(createControlHeightReductionLegacyPass()); +} + +void PassManagerBuilder::customPopulateModulePassManager( + legacy::PassManagerBase &MPM, unsigned customSizeLevel, unsigned subSeqNum) { + + // if (customSizeLevel == 0 && subSeqNum == 0){ + // MPM.add(createPosetRLPass()); + + // } + + if (((customSizeLevel == 15 || customSizeLevel == 17) && subSeqNum == 0) || (customSizeLevel == 30 && subSeqNum == 29)){ + // Allow forcing function attributes as a debugging and tuning aid. + MPM.add(createForceFunctionAttrsLegacyPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 29 || subSeqNum == 30 || subSeqNum == 31)) { + // Allow forcing function attributes as a debugging and tuning aid. + MPM.add(createForceFunctionAttrsLegacyPass()); + } + + // Add LibraryInfo if we have some. + if (LibraryInfo) + MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); + + addInitialAliasAnalysisPasses(MPM); + + if (customSizeLevel == 15 && subSeqNum == 0){ + MPM.add(createInferFunctionAttrsLegacyPass()); + MPM.add(createPromoteMemoryToRegisterPass()); + } + + if (customSizeLevel == 15 && subSeqNum == 1){ + MPM.add(createIPSCCPPass()); + MPM.add(createCalledValuePropagationPass()); + MPM.add(createAttributorLegacyPass()); + MPM.add(createGlobalOptimizerPass()); + } + + if (customSizeLevel == 15 && subSeqNum == 2){ + MPM.add(createDeadArgEliminationPass()); + addInstructionCombiningPass(MPM); + MPM.add(createCFGSimplificationPass()); + } + + if (customSizeLevel == 15 && subSeqNum == 3){ + MPM.add(createPruneEHPass()); // Remove dead EH info + bool RunInliner = false; + if (Inliner) { + MPM.add(Inliner); + Inliner = nullptr; + RunInliner = true; + } + MPM.add(createPostOrderFunctionAttrsLegacyPass()); + MPM.add(createBarrierNoopPass()); + } + + if (customSizeLevel == 15 && subSeqNum == 4){ + MPM.add(createSROAPass()); + MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + } + + if (customSizeLevel == 15 && subSeqNum == 5){ + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + addInstructionCombiningPass(MPM); + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createReassociatePass()); // Reassociate expressions + } + + if (customSizeLevel == 15 && subSeqNum == 6){ + // Rotate Loop - disable header duplication at -Oz + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + // TODO: Investigate promotion cap for O1. + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + addInstructionCombiningPass(MPM); + } + + if (customSizeLevel == 15 && subSeqNum == 7){ + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. + MPM.add(createLoopDeletionPass()); // Delete dead loops + MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + } + + if (customSizeLevel == 15 && subSeqNum == 8){ + MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds + MPM.add(NewGVN ? createNewGVNPass() + : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + addInstructionCombiningPass(MPM); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + } + + if (customSizeLevel == 15 && subSeqNum == 9){ + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + addInstructionCombiningPass(MPM); + } + + if (customSizeLevel == 15 && subSeqNum == 10){ + MPM.add(createBarrierNoopPass()); + MPM.add(createEliminateAvailableExternallyPass()); + MPM.add(createReversePostOrderFunctionAttrsPass()); + MPM.add(createGlobalOptimizerPass()); + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); + } + + if (customSizeLevel == 15 && subSeqNum == 11){ + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLoopDistributePass()); + MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize)); + } + + if (customSizeLevel == 15 && subSeqNum == 12){ + MPM.add(createLoopLoadEliminationPass()); + addInstructionCombiningPass(MPM); + MPM.add(createCFGSimplificationPass(1, true, true, false, true)); + addInstructionCombiningPass(MPM); + } + + if (customSizeLevel == 15 && subSeqNum == 13){ + MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + addInstructionCombiningPass(MPM); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAlignmentFromAssumptionsPass()); + } + + if (customSizeLevel == 15 && subSeqNum == 14){ + MPM.add(createStripDeadPrototypesPass()); + MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. + MPM.add(createConstantMergePass()); // Merge dup global constants + MPM.add(createLoopSinkPass()); + MPM.add(createInstSimplifyLegacyPass()); + MPM.add(createDivRemPairsPass()); + MPM.add(createCFGSimplificationPass()); + } + + if (customSizeLevel == 17 && subSeqNum == 0){ + MPM.add(createInferFunctionAttrsLegacyPass()); + MPM.add(createIPSCCPPass()); + MPM.add(createCalledValuePropagationPass()); + MPM.add(createAttributorLegacyPass()); + MPM.add(createGlobalOptimizerPass()); + MPM.add(createPromoteMemoryToRegisterPass()); + MPM.add(createDeadArgEliminationPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 29 || subSeqNum == 30 || subSeqNum == 31)){ + MPM.add(createInferFunctionAttrsLegacyPass()); + MPM.add(createIPSCCPPass()); + MPM.add(createCalledValuePropagationPass()); + MPM.add(createAttributorLegacyPass()); + MPM.add(createGlobalOptimizerPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 31){ + MPM.add(createPromoteMemoryToRegisterPass()); + MPM.add(createDeadArgEliminationPass()); + } + // if (OptLevel > 2) + // MPM.add(createCallSiteSplittingPass()); + + if (customSizeLevel == 17 && subSeqNum == 1) { + addInstructionCombiningPass(MPM); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 5) { + addInstructionCombiningPass(MPM); + } + + if (customSizeLevel == 40 && (subSeqNum > 6 && subSeqNum < 22)) { + MPM.add(createPromoteMemoryToRegisterPass()); + } + + if (customSizeLevel == 17 && subSeqNum == 2) { + MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + MPM.add(createPruneEHPass()); // Remove dead EH info + bool RunInliner = false; + if (Inliner) { + MPM.add(Inliner); + Inliner = nullptr; + RunInliner = true; + } + MPM.add(createPostOrderFunctionAttrsLegacyPass()); + MPM.add(createSROAPass()); + MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + MPM.add(createBarrierNoopPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 23 || subSeqNum == 24 || subSeqNum == 25 || subSeqNum == 26 || subSeqNum == 27)) { + MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + MPM.add(createPruneEHPass()); // Remove dead EH info + bool RunInliner = false; + if (Inliner) { + MPM.add(Inliner); + Inliner = nullptr; + RunInliner = true; + } + MPM.add(createPostOrderFunctionAttrsLegacyPass()); + MPM.add(createSROAPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 23 || subSeqNum == 24 || subSeqNum == 25)) { + MPM.add(createEarlyCSEPass()); + MPM.add(createLowerExpectIntrinsicPass()); + MPM.add(createForceFunctionAttrsLegacyPass()); + MPM.add(createInferFunctionAttrsLegacyPass()); + MPM.add(createIPSCCPPass()); + MPM.add(createCalledValuePropagationPass()); + MPM.add(createAttributorLegacyPass()); + MPM.add(createGlobalOptimizerPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 23) { + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createConstantMergePass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 24) { + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 25) { + MPM.add(createPromoteMemoryToRegisterPass()); + MPM.add(createDeadArgEliminationPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 26 || subSeqNum == 27)) { + MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 26) { + MPM.add(createDeadStoreEliminationPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 23 || subSeqNum == 24 || subSeqNum == 25 || subSeqNum == 26 || subSeqNum == 27)) { + MPM.add(createBarrierNoopPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 32 || subSeqNum == 33)) { + MPM.add(createCFGSimplificationPass()); + MPM.add(createSROAPass()); + MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 32) { + MPM.add(createDeadStoreEliminationPass()); + } + + if (customSizeLevel == 17 && subSeqNum == 3) { + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + } + + if (customSizeLevel == 17 && subSeqNum == 4) { + addInstructionCombiningPass(MPM); + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 6) { + addInstructionCombiningPass(MPM); + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + } + + if (customSizeLevel == 17 && subSeqNum == 5) { + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createReassociatePass()); // Reassociate expressions + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 28) { + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createReassociatePass()); // Reassociate expressions + } + + if (customSizeLevel == 17 && subSeqNum == 6) { + // Rotate Loop - disable header duplication at -Oz + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + // TODO: Investigate promotion cap for O1. + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 12) { + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 14 || subSeqNum == 15)) { + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAlignmentFromAssumptionsPass()); + MPM.add(createStripDeadPrototypesPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 14) { + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createConstantMergePass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 15) { + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 16) { + // Rotate Loop - disable header duplication at -Oz + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + // TODO: Investigate promotion cap for O1. + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + } + + if (customSizeLevel == 17 && subSeqNum == 7) { + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. + MPM.add(createLoopDeletionPass()); // Delete dead loops + MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds + MPM.add(NewGVN ? createNewGVNPass() + : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && ( subSeqNum == 7 || subSeqNum == 8)) { + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. + MPM.add(createLoopDeletionPass()); // Delete dead loops + MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 8) { + MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds + MPM.add(NewGVN ? createNewGVNPass() + : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + } + + if (customSizeLevel == 17 && subSeqNum == 8) { + addInstructionCombiningPass(MPM); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 3 || subSeqNum == 4)) { + addInstructionCombiningPass(MPM); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); + } + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 3) { + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + } + + if (customSizeLevel == 17 && subSeqNum == 9) { + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 9) { + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + } + + if (customSizeLevel == 17 && subSeqNum == 10) { + addInstructionCombiningPass(MPM); + MPM.add(createBarrierNoopPass()); + MPM.add(createEliminateAvailableExternallyPass()); + MPM.add(createReversePostOrderFunctionAttrsPass()); + MPM.add(createGlobalOptimizerPass()); + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 0 || subSeqNum == 1 || subSeqNum == 2)) { + addInstructionCombiningPass(MPM); + MPM.add(createBarrierNoopPass()); + MPM.add(createEliminateAvailableExternallyPass()); + MPM.add(createReversePostOrderFunctionAttrsPass()); + MPM.add(createGlobalOptimizerPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 0) { + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createConstantMergePass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 1) { + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 2) { + MPM.add(createPromoteMemoryToRegisterPass()); + MPM.add(createDeadArgEliminationPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 29) { + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createConstantMergePass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 30) { + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); + } + + if (customSizeLevel == 17 && subSeqNum == 11) { + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLoopDistributePass()); + MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize)); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 17) { + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLoopDistributePass()); + MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize)); + } + + if (customSizeLevel == 17 && subSeqNum == 12) { + MPM.add(createLoopLoadEliminationPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 21) { + MPM.add(createLoopLoadEliminationPass()); + } + + if (customSizeLevel == 17 && subSeqNum == 13) { + MPM.add(createCFGSimplificationPass(1, true, true, false, true)); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 22) { + MPM.add(createCFGSimplificationPass(1, true, true, false, true)); + } + + if (customSizeLevel == 17 && subSeqNum == 14) { + MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + } + + if (customSizeLevel == 17 && subSeqNum == 15) { + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAlignmentFromAssumptionsPass()); + MPM.add(createStripDeadPrototypesPass()); + MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. + MPM.add(createConstantMergePass()); // Merge dup global constants + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 10 || subSeqNum == 11)) { + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAlignmentFromAssumptionsPass()); + MPM.add(createStripDeadPrototypesPass()); + MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 10) { + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createConstantMergePass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 11) { + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 13) { + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + } + + if (customSizeLevel == 17 && subSeqNum == 16) { + MPM.add(createLoopSinkPass()); + MPM.add(createInstSimplifyLegacyPass()); + MPM.add(createDivRemPairsPass()); + MPM.add(createCFGSimplificationPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 18) { + MPM.add(createLoopSinkPass()); + MPM.add(createInstSimplifyLegacyPass()); + MPM.add(createDivRemPairsPass()); + MPM.add(createCFGSimplificationPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 19){ + MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 20){ + MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds + MPM.add(NewGVN ? createNewGVNPass() + : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + } + + if (customSizeLevel == 30 && subSeqNum == 0){ + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createReassociatePass()); // Reassociate expressions + } + + if (customSizeLevel == 30 && subSeqNum == 1){ + MPM.add(createCFGSimplificationPass(1, true, true, false, true)); + } + + if (customSizeLevel == 30 && subSeqNum == 2){ + MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + MPM.add(createPruneEHPass()); // Remove dead EH info + bool RunInliner = false; + if (Inliner) { + MPM.add(Inliner); + Inliner = nullptr; + RunInliner = true; + } + MPM.add(createPostOrderFunctionAttrsLegacyPass()); + MPM.add(createBarrierNoopPass()); + } + + if (customSizeLevel == 30 && subSeqNum == 4){ + addInstructionCombiningPass(MPM); + MPM.add(createBarrierNoopPass()); + MPM.add(createEliminateAvailableExternallyPass()); + MPM.add(createReversePostOrderFunctionAttrsPass()); + } + + if (customSizeLevel == 30 && subSeqNum == 5){ + addInstructionCombiningPass(MPM); + MPM.add(createJumpThreadingPass()); // Thread jumps + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + } + + if (customSizeLevel == 30 && subSeqNum == 6){ + addInstructionCombiningPass(MPM); + MPM.add(createJumpThreadingPass()); // Thread jumps + MPM.add(createCorrelatedValuePropagationPass()); + } + + if (customSizeLevel == 30 && subSeqNum == 7){ + addInstructionCombiningPass(MPM); + } + + if (customSizeLevel == 30 && subSeqNum == 8){ + addInstructionCombiningPass(MPM); + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + } + + if (customSizeLevel == 30 && subSeqNum == 9){ //Check + addInstructionCombiningPass(MPM); + } + + if (customSizeLevel == 30 && subSeqNum == 10){ + MPM.add(createLoopLoadEliminationPass()); + } + + if (customSizeLevel == 30 && subSeqNum == 11){ + MPM.add(createLoopSinkPass()); + MPM.add(createInstSimplifyLegacyPass()); + MPM.add(createDivRemPairsPass()); + MPM.add(createCFGSimplificationPass()); + } + + if (customSizeLevel == 30 && subSeqNum == 12){ + MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + } + + + if (customSizeLevel == 30 && subSeqNum == 13){ + MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds + MPM.add(NewGVN ? createNewGVNPass() + : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + } + + + if (customSizeLevel == 30 && subSeqNum == 14){ + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAlignmentFromAssumptionsPass()); + MPM.add(createStripDeadPrototypesPass()); + } + + if (customSizeLevel == 30 && subSeqNum == 15){ + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + } + + if (customSizeLevel == 30 && subSeqNum == 16){ + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + } + + if (customSizeLevel == 30 && subSeqNum == 17){ + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. + MPM.add(createLoopDeletionPass()); // Delete dead loops + MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + } + + if (customSizeLevel == 30 && subSeqNum == 18){ + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. + MPM.add(createLoopDeletionPass()); // Delete dead loops + MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds + MPM.add(NewGVN ? createNewGVNPass() + : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + } + + if (customSizeLevel == 30 && subSeqNum == 19){ + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLoopDistributePass()); + MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize)); + } + + if (customSizeLevel == 30 && subSeqNum == 20){ + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAlignmentFromAssumptionsPass()); + MPM.add(createStripDeadPrototypesPass()); + } + + if (customSizeLevel == 30 && subSeqNum == 21){ + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + } + + if (customSizeLevel == 30 && subSeqNum == 22){ + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + } + + if (customSizeLevel == 30 && subSeqNum == 23){ + MPM.add(createGlobalOptimizerPass()); + } + + if (customSizeLevel == 30 && subSeqNum == 24){ + MPM.add(createGlobalOptimizerPass()); // Optimize out global vars + // Promote any localized global vars. + MPM.add(createPromoteMemoryToRegisterPass()); + MPM.add(createDeadArgEliminationPass()); // Dead argument elimination + } + + if (customSizeLevel == 30 && subSeqNum == 25){ + MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. + MPM.add(createConstantMergePass()); // Merge dup global constants + } + + if (customSizeLevel == 30 && subSeqNum == 26){ + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); + } + + if (customSizeLevel == 30 && subSeqNum == 27){ + MPM.add(createSROAPass()); + MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + } + + if (customSizeLevel == 30 && subSeqNum == 28){ + MPM.add(createSROAPass()); + MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + } + + if (customSizeLevel == 30 && subSeqNum == 29){ + MPM.add(createInferFunctionAttrsLegacyPass()); + MPM.add(createIPSCCPPass()); + MPM.add(createCalledValuePropagationPass()); + MPM.add(createAttributorLegacyPass()); + } + // We add a module alias analysis pass here. In part due to bugs in the + // analysis infrastructure this "works" in that the analysis stays alive + // for the entire SCC pass run below. + //MPM.add(createGlobalsAAWrapperPass()); + + // if (OptLevel > 2) + // MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args + + // addFunctionSimplificationPasses(MPM); + + // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC + // pass manager that we are specifically trying to avoid. To prevent this + // we must insert a no-op module pass to reset the pass manager. + // MPM.add(createBarrierNoopPass()); + + // The inliner performs some kind of dead code elimination as it goes, + // but there are cases that are not really caught by it. We might + // at some point consider teaching the inliner about them, but it + // is OK for now to run GlobalOpt + GlobalDCE in tandem as their + // benefits generally outweight the cost, making the whole pipeline + // faster. + // if (RunInliner) { + // MPM.add(createGlobalOptimizerPass()); + // MPM.add(createGlobalDCEPass()); + // } + + + // Scheduling LoopVersioningLICM when inlining is over, because after that + // we may see more accurate aliasing. Reason to run this late is that too + // early versioning may prevent further inlining due to increase of code + // size. By placing it just after inlining other optimizations which runs + // later might get benefit of no-alias assumption in clone loop. + // if (UseLoopVersioningLICM) { + // MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM + // MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + // } + + // if (EnableMatrix) { + // MPM.add(createLowerMatrixIntrinsicsPass()); + // // CSE the pointer arithmetic of the column vectors. This allows alias + // // analysis to establish no-aliasing between loads and stores of different + // // columns of the same matrix. + // MPM.add(createEarlyCSEPass(false)); + // } + + // Re-rotate loops in all our loop nests. These may have fallout out of + // rotated form due to GVN or other transformations, and the vectorizer relies + // on the rotated form. Disable header duplication at -Oz. + // MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + + // Distribute loops to allow partial vectorization. I.e. isolate dependences + // into separate loop that would otherwise inhibit vectorization. This is + // currently only performed for loops marked with the metadata + // llvm.loop.distribute=true or when -enable-loop-distribute is specified. + // MPM.add(createLoopDistributePass()); + + // Eliminate loads by forwarding stores from the previous iteration to loads + // of the current iteration. + // MPM.add(createLoopLoadEliminationPass()); + + // FIXME: Because of #pragma vectorize enable, the passes below are always + // inserted in the pipeline, even when the vectorizer doesn't run (ex. when + // on -O1 and no #pragma is found). Would be good to have these two passes + // as function calls, so that we can only pass them when the vectorizer + // changed the code. + // if (OptLevel > 1 && ExtraVectorizerPasses) { + // // At higher optimization levels, try to clean up any runtime overlap and + // // alignment checks inserted by the vectorizer. We want to track correllated + // // runtime checks for two inner loops in the same outer loop, fold any + // // common computations, hoist loop-invariant aspects out of any outer loop, + // // and unswitch the runtime checks if possible. Once hoisted, we may have + // // dead (or speculatable) control flows or more combining opportunities. + // MPM.add(createEarlyCSEPass()); + // MPM.add(createCorrelatedValuePropagationPass()); + // addInstructionCombiningPass(MPM); + // MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + // MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + // MPM.add(createCFGSimplificationPass()); + // addInstructionCombiningPass(MPM); + // } + + // Cleanup after loop vectorization, etc. Simplification passes like CVP and + // GVN, loop transforms, and others have already run, so it's now better to + // convert to more optimized IR using more aggressive simplify CFG options. + // The extra sinking transform can create larger basic blocks, so do this + // before SLP vectorization. + // MPM.add(createCFGSimplificationPass(1, true, true, false, true)); + + // if (SLPVectorize) { + // MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + // if (OptLevel > 1 && ExtraVectorizerPasses) { + // MPM.add(createEarlyCSEPass()); + // } + // } + + // if (EnableUnrollAndJam && !DisableUnrollLoops) { + // // Unroll and Jam. We do this before unroll but need to be in a separate + // // loop pass manager in order for the outer loop to be processed by + // // unroll and jam before the inner loop is unrolled. + // MPM.add(createLoopUnrollAndJamPass(OptLevel)); + // } + + // Unroll small loops + // MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, + // ForgetAllSCEVInLoopUnroll)); + + // if (!DisableUnrollLoops) { + // // LoopUnroll may generate some redundency to cleanup. + // addInstructionCombiningPass(MPM); + + // // Runtime unrolling will introduce runtime check in loop prologue. If the + // // unrolled loop is a inner loop, then the prologue will be inside the + // // outer loop. LICM pass can help to promote the runtime check out if the + // // checked value is loop invariant. + // MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + // } + + // MPM.add(createWarnMissedTransformationsPass()); + + // After vectorization and unrolling, assume intrinsics may tell us more + // about pointer alignments. + // MPM.add(createAlignmentFromAssumptionsPass()); + + // FIXME: We shouldn't bother with this anymore. + // MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes + + // GlobalOpt already deletes dead functions and globals, at -O2 try a + // late pass of GlobalDCE. It is capable of deleting dead cycles. + // if (OptLevel > 1) { + // MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. + // MPM.add(createConstantMergePass()); // Merge dup global constants + // } + + // See comment in the new PM for justification of scheduling splitting at + // this stage (\ref buildModuleSimplificationPipeline). + // if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO)) + // MPM.add(createHotColdSplittingPass()); + + // if (MergeFunctions) + // MPM.add(createMergeFunctionsPass()); + + // LoopSink pass sinks instructions hoisted by LICM, which serves as a + // canonicalization pass that enables other optimizations. As a result, + // LoopSink pass needs to be a very late IR pass to avoid undoing LICM + // result too early. + // MPM.add(createLoopSinkPass()); + // Get rid of LCSSA nodes. + // MPM.add(createInstSimplifyLegacyPass()); + + // This hoists/decomposes div/rem ops. It should run after other sink/hoist + // passes to avoid re-sinking, but before SimplifyCFG because it can allow + // flattening of blocks. + // MPM.add(createDivRemPairsPass()); + + // LoopSink (and other loop passes since the last simplifyCFG) might have + // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. + // MPM.add(createCFGSimplificationPass()); +} + +void PassManagerBuilder::populateModulePassManager( + legacy::PassManagerBase &MPM) { + // Whether this is a default or *LTO pre-link pipeline. The FullLTO post-link + // is handled separately, so just check this is not the ThinLTO post-link. + bool DefaultOrPreLinkPipeline = !PerformThinLTO; + + if (OPosetRL){ + errs() << "opt level "<< OptLevel << " SizeLevel " << SizeLevel << "\n"; + MPM.add(createPosetRLPass()); + return; + } + + if(OCodeSizeOpt) { + errs() << "opt level "<< OptLevel << " SizeLevel " << SizeLevel << "\n"; + MPM.add(createCodeSizeOptPass()); + return; + } + + if(!RunNoPreDistributionPasses){ + if (!PGOSampleUse.empty()) { + MPM.add(createPruneEHPass()); + // In ThinLTO mode, when flattened profile is used, all the available + // profile information will be annotated in PreLink phase so there is + // no need to load the profile again in PostLink. + if (!(FlattenedProfileUsed && PerformThinLTO)) + MPM.add(createSampleProfileLoaderPass(PGOSampleUse)); + } + + // Allow forcing function attributes as a debugging and tuning aid. + MPM.add(createForceFunctionAttrsLegacyPass()); + + // If all optimizations are disabled, just run the always-inline pass and, + // if enabled, the function merging pass. + if (OptLevel == 0) { + addPGOInstrPasses(MPM); + if (Inliner) { + MPM.add(Inliner); + Inliner = nullptr; + } + + // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly + // creates a CGSCC pass manager, but we don't want to add extensions into + // that pass manager. To prevent this we insert a no-op module pass to reset + // the pass manager to get the same behavior as EP_OptimizerLast in non-O0 + // builds. The function merging pass is + if (MergeFunctions) + MPM.add(createMergeFunctionsPass()); + else if (GlobalExtensionsNotEmpty() || !Extensions.empty()) + MPM.add(createBarrierNoopPass()); + + if (PerformThinLTO) { + // Drop available_externally and unreferenced globals. This is necessary + // with ThinLTO in order to avoid leaving undefined references to dead + // globals in the object file. + MPM.add(createEliminateAvailableExternallyPass()); + MPM.add(createGlobalDCEPass()); + } + + addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); + + if (PrepareForLTO || PrepareForThinLTO) { + MPM.add(createCanonicalizeAliasesPass()); + // Rename anon globals to be able to export them in the summary. + // This has to be done after we add the extensions to the pass manager + // as there could be passes (e.g. Adddress sanitizer) which introduce + // new unnamed globals. + MPM.add(createNameAnonGlobalPass()); + } + return; + } + + // Add LibraryInfo if we have some. + if (LibraryInfo) + MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); + + addInitialAliasAnalysisPasses(MPM); + + // For ThinLTO there are two passes of indirect call promotion. The + // first is during the compile phase when PerformThinLTO=false and + // intra-module indirect call targets are promoted. The second is during + // the ThinLTO backend when PerformThinLTO=true, when we promote imported + // inter-module indirect calls. For that we perform indirect call promotion + // earlier in the pass pipeline, here before globalopt. Otherwise imported + // available_externally functions look unreferenced and are removed. + if (PerformThinLTO) + MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true, + !PGOSampleUse.empty())); + + // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops + // as it will change the CFG too much to make the 2nd profile annotation + // in backend more difficult. + bool PrepareForThinLTOUsingPGOSampleProfile = + PrepareForThinLTO && !PGOSampleUse.empty(); + if (PrepareForThinLTOUsingPGOSampleProfile) + DisableUnrollLoops = true; + + // Infer attributes about declarations if possible. + MPM.add(createInferFunctionAttrsLegacyPass()); + + addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); + + if (OptLevel > 2) + MPM.add(createCallSiteSplittingPass()); + + MPM.add(createIPSCCPPass()); // IP SCCP + MPM.add(createCalledValuePropagationPass()); + + // Infer attributes on declarations, call sites, arguments, etc. + MPM.add(createAttributorLegacyPass()); + + MPM.add(createGlobalOptimizerPass()); // Optimize out global vars + // Promote any localized global vars. + MPM.add(createPromoteMemoryToRegisterPass()); + + MPM.add(createDeadArgEliminationPass()); // Dead argument elimination + + addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE + addExtensionsToPM(EP_Peephole, MPM); + MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + + // For SamplePGO in ThinLTO compile phase, we do not want to do indirect + // call promotion as it will change the CFG too much to make the 2nd + // profile annotation in backend more difficult. + // PGO instrumentation is added during the compile phase for ThinLTO, do + // not run it a second time + if (DefaultOrPreLinkPipeline && !PrepareForThinLTOUsingPGOSampleProfile) + addPGOInstrPasses(MPM); + + // Create profile COMDAT variables. Lld linker wants to see all variables + // before the LTO/ThinLTO link since it needs to resolve symbols/comdats. + if (!PerformThinLTO && EnablePGOCSInstrGen) + MPM.add(createPGOInstrumentationGenCreateVarLegacyPass(PGOInstrGen)); + + // We add a module alias analysis pass here. In part due to bugs in the + // analysis infrastructure this "works" in that the analysis stays alive + // for the entire SCC pass run below. + MPM.add(createGlobalsAAWrapperPass()); + + // Start of CallGraph SCC passes. + MPM.add(createPruneEHPass()); // Remove dead EH info + bool RunInliner = false; + if (Inliner) { + MPM.add(Inliner); + Inliner = nullptr; + RunInliner = true; + } + + MPM.add(createPostOrderFunctionAttrsLegacyPass()); + if (OptLevel > 2) + MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args + + addExtensionsToPM(EP_CGSCCOptimizerLate, MPM); + addFunctionSimplificationPasses(MPM); + + // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC + // pass manager that we are specifically trying to avoid. To prevent this + // we must insert a no-op module pass to reset the pass manager. + MPM.add(createBarrierNoopPass()); + + if (RunPartialInlining) + MPM.add(createPartialInliningPass()); + + if (OptLevel > 1 && !PrepareForLTO && !PrepareForThinLTO) + // Remove avail extern fns and globals definitions if we aren't + // compiling an object file for later LTO. For LTO we want to preserve + // these so they are eligible for inlining at link-time. Note if they + // are unreferenced they will be removed by GlobalDCE later, so + // this only impacts referenced available externally globals. + // Eventually they will be suppressed during codegen, but eliminating + // here enables more opportunity for GlobalDCE as it may make + // globals referenced by available external functions dead + // and saves running remaining passes on the eliminated functions. + MPM.add(createEliminateAvailableExternallyPass()); + + // CSFDO instrumentation and use pass. Don't invoke this for Prepare pass + // for LTO and ThinLTO -- The actual pass will be called after all inlines + // are performed. + // Need to do this after COMDAT variables have been eliminated, + // (i.e. after EliminateAvailableExternallyPass). + if (!(PrepareForLTO || PrepareForThinLTO)) + addPGOInstrPasses(MPM, /* IsCS */ true); + + if (EnableOrderFileInstrumentation) + MPM.add(createInstrOrderFilePass()); + + MPM.add(createReversePostOrderFunctionAttrsPass()); + + // The inliner performs some kind of dead code elimination as it goes, + // but there are cases that are not really caught by it. We might + // at some point consider teaching the inliner about them, but it + // is OK for now to run GlobalOpt + GlobalDCE in tandem as their + // benefits generally outweight the cost, making the whole pipeline + // faster. + if (RunInliner) { + MPM.add(createGlobalOptimizerPass()); + MPM.add(createGlobalDCEPass()); + if(EnableFusion){ + MPM.add(createLoopFusePass()); + } + } + + // If we are planning to perform ThinLTO later, let's not bloat the code with + // unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes + // during ThinLTO and perform the rest of the optimizations afterward. + if (PrepareForThinLTO) { + // Ensure we perform any last passes, but do so before renaming anonymous + // globals in case the passes add any. + addExtensionsToPM(EP_OptimizerLast, MPM); + MPM.add(createCanonicalizeAliasesPass()); + // Rename anon globals to be able to export them in the summary. + MPM.add(createNameAnonGlobalPass()); + return; + } + + if (PerformThinLTO) + // Optimize globals now when performing ThinLTO, this enables more + // optimizations later. + MPM.add(createGlobalOptimizerPass()); + + // Scheduling LoopVersioningLICM when inlining is over, because after that + // we may see more accurate aliasing. Reason to run this late is that too + // early versioning may prevent further inlining due to increase of code + // size. By placing it just after inlining other optimizations which runs + // later might get benefit of no-alias assumption in clone loop. + if (UseLoopVersioningLICM) { + MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + } + + // We add a fresh GlobalsModRef run at this point. This is particularly + // useful as the above will have inlined, DCE'ed, and function-attr + // propagated everything. We should at this point have a reasonably minimal + // and richly annotated call graph. By computing aliasing and mod/ref + // information for all local globals here, the late loop passes and notably + // the vectorizer will be able to use them to help recognize vectorizable + // memory operations. + // + // Note that this relies on a bug in the pass manager which preserves + // a module analysis into a function pass pipeline (and throughout it) so + // long as the first function pass doesn't invalidate the module analysis. + // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for + // this to work. Fortunately, it is trivial to preserve AliasAnalysis + // (doing nothing preserves it as it is required to be conservatively + // correct in the face of IR changes). + MPM.add(createGlobalsAAWrapperPass()); + + MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); + + if (EnableMatrix) { + MPM.add(createLowerMatrixIntrinsicsPass()); + // CSE the pointer arithmetic of the column vectors. This allows alias + // analysis to establish no-aliasing between loads and stores of different + // columns of the same matrix. + MPM.add(createEarlyCSEPass(false)); + } + + addExtensionsToPM(EP_VectorizerStart, MPM); + + // Re-rotate loops in all our loop nests. These may have fallout out of + // rotated form due to GVN or other transformations, and the vectorizer relies + // on the rotated form. Disable header duplication at -Oz. + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + } + + // Distribute loops to allow partial vectorization. I.e. isolate dependences + // into separate loop that would otherwise inhibit vectorization. This is + // currently only performed for loops marked with the metadata + // llvm.loop.distribute=true or when -enable-loop-distribute is specified. + if(!Runcustom_loop_distribution && !RunNoPreDistributionPasses && !RunNoPostDistributionPasses) { + MPM.add(createLoopDistributePass()); + } + + // if(Runcustom_loop_distribution) { + // MPM.add(createcustom_loop_distributionPass()); + // } + + if(!RunNoPostDistributionPasses) { + MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize)); + + // Eliminate loads by forwarding stores from the previous iteration to loads + // of the current iteration. + MPM.add(createLoopLoadEliminationPass()); + + // FIXME: Because of #pragma vectorize enable, the passes below are always + // inserted in the pipeline, even when the vectorizer doesn't run (ex. when + // on -O1 and no #pragma is found). Would be good to have these two passes + // as function calls, so that we can only pass them when the vectorizer + // changed the code. + addInstructionCombiningPass(MPM); + if (OptLevel > 1 && ExtraVectorizerPasses) { + // At higher optimization levels, try to clean up any runtime overlap and + // alignment checks inserted by the vectorizer. We want to track correllated + // runtime checks for two inner loops in the same outer loop, fold any + // common computations, hoist loop-invariant aspects out of any outer loop, + // and unswitch the runtime checks if possible. Once hoisted, we may have + // dead (or speculatable) control flows or more combining opportunities. + MPM.add(createEarlyCSEPass()); + MPM.add(createCorrelatedValuePropagationPass()); + addInstructionCombiningPass(MPM); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + MPM.add(createCFGSimplificationPass()); + addInstructionCombiningPass(MPM); + } + + // Cleanup after loop vectorization, etc. Simplification passes like CVP and + // GVN, loop transforms, and others have already run, so it's now better to + // convert to more optimized IR using more aggressive simplify CFG options. + // The extra sinking transform can create larger basic blocks, so do this + // before SLP vectorization. + MPM.add(createCFGSimplificationPass(1, true, true, false, true)); + + if (SLPVectorize) { + MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + if (OptLevel > 1 && ExtraVectorizerPasses) { + MPM.add(createEarlyCSEPass()); + } + } + + addExtensionsToPM(EP_Peephole, MPM); + addInstructionCombiningPass(MPM); + + if (EnableUnrollAndJam && !DisableUnrollLoops) { + // Unroll and Jam. We do this before unroll but need to be in a separate + // loop pass manager in order for the outer loop to be processed by + // unroll and jam before the inner loop is unrolled. + MPM.add(createLoopUnrollAndJamPass(OptLevel)); + } + + // Unroll small loops + MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + + if (!DisableUnrollLoops) { + // LoopUnroll may generate some redundency to cleanup. + addInstructionCombiningPass(MPM); + + // Runtime unrolling will introduce runtime check in loop prologue. If the + // unrolled loop is a inner loop, then the prologue will be inside the + // outer loop. LICM pass can help to promote the runtime check out if the + // checked value is loop invariant. + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + } + + MPM.add(createWarnMissedTransformationsPass()); + + // After vectorization and unrolling, assume intrinsics may tell us more + // about pointer alignments. + MPM.add(createAlignmentFromAssumptionsPass()); + + // FIXME: We shouldn't bother with this anymore. + MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes + + // GlobalOpt already deletes dead functions and globals, at -O2 try a + // late pass of GlobalDCE. It is capable of deleting dead cycles. + if (OptLevel > 1) { + MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. + MPM.add(createConstantMergePass()); // Merge dup global constants + } + + // See comment in the new PM for justification of scheduling splitting at + // this stage (\ref buildModuleSimplificationPipeline). + if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO)) + MPM.add(createHotColdSplittingPass()); + + if (MergeFunctions) + MPM.add(createMergeFunctionsPass()); + + // LoopSink pass sinks instructions hoisted by LICM, which serves as a + // canonicalization pass that enables other optimizations. As a result, + // LoopSink pass needs to be a very late IR pass to avoid undoing LICM + // result too early. + MPM.add(createLoopSinkPass()); + // Get rid of LCSSA nodes. + MPM.add(createInstSimplifyLegacyPass()); + + // This hoists/decomposes div/rem ops. It should run after other sink/hoist + // passes to avoid re-sinking, but before SimplifyCFG because it can allow + // flattening of blocks. + MPM.add(createDivRemPairsPass()); + + // LoopSink (and other loop passes since the last simplifyCFG) might have + // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. + MPM.add(createCFGSimplificationPass()); + + addExtensionsToPM(EP_OptimizerLast, MPM); + + if (PrepareForLTO) { + MPM.add(createCanonicalizeAliasesPass()); + // Rename anon globals to be able to handle them in the summary + MPM.add(createNameAnonGlobalPass()); + } + } +} + +void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { + // Load sample profile before running the LTO optimization pipeline. + if (!PGOSampleUse.empty()) { + PM.add(createPruneEHPass()); + PM.add(createSampleProfileLoaderPass(PGOSampleUse)); + } + + // Remove unused virtual tables to improve the quality of code generated by + // whole-program devirtualization and bitset lowering. + PM.add(createGlobalDCEPass()); + + // Provide AliasAnalysis services for optimizations. + addInitialAliasAnalysisPasses(PM); + + // Allow forcing function attributes as a debugging and tuning aid. + PM.add(createForceFunctionAttrsLegacyPass()); + + // Infer attributes about declarations if possible. + PM.add(createInferFunctionAttrsLegacyPass()); + + if (OptLevel > 1) { + // Split call-site with more constrained arguments. + PM.add(createCallSiteSplittingPass()); + + // Indirect call promotion. This should promote all the targets that are + // left by the earlier promotion pass that promotes intra-module targets. + // This two-step promotion is to save the compile time. For LTO, it should + // produce the same result as if we only do promotion here. + PM.add( + createPGOIndirectCallPromotionLegacyPass(true, !PGOSampleUse.empty())); + + // Propagate constants at call sites into the functions they call. This + // opens opportunities for globalopt (and inlining) by substituting function + // pointers passed as arguments to direct uses of functions. + PM.add(createIPSCCPPass()); + + // Attach metadata to indirect call sites indicating the set of functions + // they may target at run-time. This should follow IPSCCP. + PM.add(createCalledValuePropagationPass()); + + // Infer attributes on declarations, call sites, arguments, etc. + PM.add(createAttributorLegacyPass()); + } + + // Infer attributes about definitions. The readnone attribute in particular is + // required for virtual constant propagation. + PM.add(createPostOrderFunctionAttrsLegacyPass()); + PM.add(createReversePostOrderFunctionAttrsPass()); + + // Split globals using inrange annotations on GEP indices. This can help + // improve the quality of generated code when virtual constant propagation or + // control flow integrity are enabled. + PM.add(createGlobalSplitPass()); + + // Apply whole-program devirtualization and virtual constant propagation. + PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr)); + + // That's all we need at opt level 1. + if (OptLevel == 1) + return; + + // Now that we internalized some globals, see if we can hack on them! + PM.add(createGlobalOptimizerPass()); + // Promote any localized global vars. + PM.add(createPromoteMemoryToRegisterPass()); + + // Linking modules together can lead to duplicated global constants, only + // keep one copy of each constant. + PM.add(createConstantMergePass()); + + // Remove unused arguments from functions. + PM.add(createDeadArgEliminationPass()); + + // Reduce the code after globalopt and ipsccp. Both can open up significant + // simplification opportunities, and both can propagate functions through + // function pointers. When this happens, we often have to resolve varargs + // calls, etc, so let instcombine do this. + if (OptLevel > 2) + PM.add(createAggressiveInstCombinerPass()); + addInstructionCombiningPass(PM); + addExtensionsToPM(EP_Peephole, PM); + + // Inline small functions + bool RunInliner = Inliner; + if (RunInliner) { + PM.add(Inliner); + Inliner = nullptr; + } + + PM.add(createPruneEHPass()); // Remove dead EH info. + + // CSFDO instrumentation and use pass. + addPGOInstrPasses(PM, /* IsCS */ true); + + // Optimize globals again if we ran the inliner. + if (RunInliner) + PM.add(createGlobalOptimizerPass()); + PM.add(createGlobalDCEPass()); // Remove dead functions. + + // If we didn't decide to inline a function, check to see if we can + // transform it to pass arguments by value instead of by reference. + PM.add(createArgumentPromotionPass()); + + // The IPO passes may leave cruft around. Clean up after them. + addInstructionCombiningPass(PM); + addExtensionsToPM(EP_Peephole, PM); + PM.add(createJumpThreadingPass()); + + // Break up allocas + PM.add(createSROAPass()); + + // LTO provides additional opportunities for tailcall elimination due to + // link-time inlining, and visibility of nocapture attribute. + if (OptLevel > 1) + PM.add(createTailCallEliminationPass()); + + if (RunInliner) + PM.add(createLoopFusePass()); + + // Infer attributes on declarations, call sites, arguments, etc. + PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture. + // Run a few AA driven optimizations here and now, to cleanup the code. + PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. + + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds. + PM.add(NewGVN ? createNewGVNPass() + : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. + PM.add(createMemCpyOptPass()); // Remove dead memcpys. + + // Nuke dead stores. + PM.add(createDeadStoreEliminationPass()); + + // More loops are countable; try to optimize them. + PM.add(createIndVarSimplifyPass()); + PM.add(createLoopDeletionPass()); + if (EnableLoopInterchange) + PM.add(createLoopInterchangePass()); + + // Unroll small loops + PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + PM.add(createLoopVectorizePass(true, !LoopVectorize)); + // The vectorizer may have significantly shortened a loop body; unroll again. + PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + + PM.add(createWarnMissedTransformationsPass()); + + // Now that we've optimized loops (in particular loop induction variables), + // we may have exposed more scalar opportunities. Run parts of the scalar + // optimizer again at this point. + addInstructionCombiningPass(PM); // Initial cleanup + PM.add(createCFGSimplificationPass()); // if-convert + PM.add(createSCCPPass()); // Propagate exposed constants + addInstructionCombiningPass(PM); // Clean up again + PM.add(createBitTrackingDCEPass()); + + // More scalar chains could be vectorized due to more alias information + if (SLPVectorize) + PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + + // After vectorization, assume intrinsics may tell us more about pointer + // alignments. + PM.add(createAlignmentFromAssumptionsPass()); + + // Cleanup and simplify the code after the scalar optimizations. + addInstructionCombiningPass(PM); + addExtensionsToPM(EP_Peephole, PM); + + PM.add(createJumpThreadingPass()); +} + +void PassManagerBuilder::addLateLTOOptimizationPasses( + legacy::PassManagerBase &PM) { + // See comment in the new PM for justification of scheduling splitting at + // this stage (\ref buildLTODefaultPipeline). + if (EnableHotColdSplit) + PM.add(createHotColdSplittingPass()); + + // Delete basic blocks, which optimization passes may have killed. + PM.add(createCFGSimplificationPass()); + + // Drop bodies of available externally objects to improve GlobalDCE. + PM.add(createEliminateAvailableExternallyPass()); + + // Now that we have optimized the program, discard unreachable functions. + PM.add(createGlobalDCEPass()); + + // FIXME: this is profitable (for compiler time) to do at -O0 too, but + // currently it damages debug info. + if (MergeFunctions) + PM.add(createMergeFunctionsPass()); +} + +void PassManagerBuilder::populateThinLTOPassManager( + legacy::PassManagerBase &PM) { + PerformThinLTO = true; + if (LibraryInfo) + PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); + + if (VerifyInput) + PM.add(createVerifierPass()); + + if (ImportSummary) { + // These passes import type identifier resolutions for whole-program + // devirtualization and CFI. They must run early because other passes may + // disturb the specific instruction patterns that these passes look for, + // creating dependencies on resolutions that may not appear in the summary. + // + // For example, GVN may transform the pattern assume(type.test) appearing in + // two basic blocks into assume(phi(type.test, type.test)), which would + // transform a dependency on a WPD resolution into a dependency on a type + // identifier resolution for CFI. + // + // Also, WPD has access to more precise information than ICP and can + // devirtualize more effectively, so it should operate on the IR first. + PM.add(createWholeProgramDevirtPass(nullptr, ImportSummary)); + PM.add(createLowerTypeTestsPass(nullptr, ImportSummary)); + } + + populateModulePassManager(PM); + + if (VerifyOutput) + PM.add(createVerifierPass()); + PerformThinLTO = false; +} + +void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { + if (LibraryInfo) + PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); + + if (VerifyInput) + PM.add(createVerifierPass()); + + addExtensionsToPM(EP_FullLinkTimeOptimizationEarly, PM); + + if (OptLevel != 0) + addLTOOptimizationPasses(PM); + else { + // The whole-program-devirt pass needs to run at -O0 because only it knows + // about the llvm.type.checked.load intrinsic: it needs to both lower the + // intrinsic itself and handle it in the summary. + PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr)); + } + + // Create a function that performs CFI checks for cross-DSO calls with targets + // in the current module. + PM.add(createCrossDSOCFIPass()); + + // Lower type metadata and the type.test intrinsic. This pass supports Clang's + // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at + // link time if CFI is enabled. The pass does nothing if CFI is disabled. + PM.add(createLowerTypeTestsPass(ExportSummary, nullptr)); + + if (OptLevel != 0) + addLateLTOOptimizationPasses(PM); + + addExtensionsToPM(EP_FullLinkTimeOptimizationLast, PM); + + if (VerifyOutput) + PM.add(createVerifierPass()); +} + +inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) { + return reinterpret_cast(P); +} + +inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) { + return reinterpret_cast(P); +} + +LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() { + PassManagerBuilder *PMB = new PassManagerBuilder(); + return wrap(PMB); +} + +void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) { + PassManagerBuilder *Builder = unwrap(PMB); + delete Builder; +} + +void +LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB, + unsigned OptLevel) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->OptLevel = OptLevel; +} + +void +LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB, + unsigned SizeLevel) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->SizeLevel = SizeLevel; +} + +void +LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { + // NOTE: The DisableUnitAtATime switch has been removed. +} + +void +LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->DisableUnrollLoops = Value; +} + +void +LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { + // NOTE: The simplify-libcalls pass has been removed. +} + +void +LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB, + unsigned Threshold) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->Inliner = createFunctionInliningPass(Threshold); +} + +void +LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB, + LLVMPassManagerRef PM) { + PassManagerBuilder *Builder = unwrap(PMB); + legacy::FunctionPassManager *FPM = unwrap(PM); + Builder->populateFunctionPassManager(*FPM); +} + +void +LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB, + LLVMPassManagerRef PM) { + PassManagerBuilder *Builder = unwrap(PMB); + legacy::PassManagerBase *MPM = unwrap(PM); + Builder->populateModulePassManager(*MPM); +} + +void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, + LLVMPassManagerRef PM, + LLVMBool Internalize, + LLVMBool RunInliner) { + PassManagerBuilder *Builder = unwrap(PMB); + legacy::PassManagerBase *LPM = unwrap(PM); + + // A small backwards compatibility hack. populateLTOPassManager used to take + // an RunInliner option. + if (RunInliner && !Builder->Inliner) + Builder->Inliner = createFunctionInliningPass(); + + Builder->populateLTOPassManager(*LPM); +} From e00e9b1d2c0c67444fb72efaa758b92669bff05a Mon Sep 17 00:00:00 2001 From: Umesh-k26 Date: Sat, 20 Jan 2024 15:33:50 +0530 Subject: [PATCH 11/52] Moved CodeSizeOpt to IPO (cherry picked from commit fb9f173c59b70a632443b7f511a26f1ae3744af0) --- MLCompilerBridge | 2 +- clang/tools/driver/CMakeLists.txt | 6 + llvm/lib/CodeGen/MLRegAlloc/CMakeLists.txt | 23 ++++ llvm/lib/Transforms/CMakeLists.txt | 6 + llvm/lib/Transforms/IPO/CMakeLists.txt | 11 ++ .../Transforms/IPO/CodeSizeOpt/CMakeLists.txt | 14 +++ .../IPO/CodeSizeOpt/CodeSizeOpt.cpp | 115 ++++++++++++++++++ .../CodeSizeOpt/inference/CodeSizeOptEnv.h | 68 +++++++++++ llvm/tools/bugpoint/CMakeLists.txt | 4 + llvm/tools/llc/CMakeLists.txt | 6 + llvm/tools/llvm-mca/CMakeLists.txt | 4 + llvm/tools/llvm-opt-fuzzer/CMakeLists.txt | 6 + llvm/tools/opt/CMakeLists.txt | 7 ++ 13 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 llvm/lib/CodeGen/MLRegAlloc/CMakeLists.txt create mode 100644 llvm/lib/Transforms/IPO/CodeSizeOpt/CMakeLists.txt create mode 100644 llvm/lib/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.cpp create mode 100644 llvm/lib/Transforms/IPO/CodeSizeOpt/inference/CodeSizeOptEnv.h diff --git a/MLCompilerBridge b/MLCompilerBridge index 970d7b8d671f..15cac951a09b 160000 --- a/MLCompilerBridge +++ b/MLCompilerBridge @@ -1 +1 @@ -Subproject commit 970d7b8d671f7c4f40ab6ba7b4d85b20445e9578 +Subproject commit 15cac951a09b1ce568f3e3c29dd1d6ef5fb46243 diff --git a/clang/tools/driver/CMakeLists.txt b/clang/tools/driver/CMakeLists.txt index 2182486f93a5..c12899092427 100644 --- a/clang/tools/driver/CMakeLists.txt +++ b/clang/tools/driver/CMakeLists.txt @@ -16,6 +16,12 @@ set( LLVM_LINK_COMPONENTS TargetParser TransformUtils Vectorize +<<<<<<< HEAD +======= + AddSizeAttr + IR2Vec + CollectMachineIR +>>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO ) # Support plugins. diff --git a/llvm/lib/CodeGen/MLRegAlloc/CMakeLists.txt b/llvm/lib/CodeGen/MLRegAlloc/CMakeLists.txt new file mode 100644 index 000000000000..a15d289a30bd --- /dev/null +++ b/llvm/lib/CodeGen/MLRegAlloc/CMakeLists.txt @@ -0,0 +1,23 @@ +# message("From MLREGALOC", ${LLVM_INCLUDE_DIR}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2") + +find_package(Protobuf CONFIG REQUIRED) +message(STATUS "Using protobuf ${Protobuf_VERSION}") +message(STATUS "Using protobuf Include Dirs ${Protobuf_INCLUDE_DIRS}") + +add_llvm_component_library(LLVMMLRegAlloc +MLRegAlloc.cpp + +DEPENDS +intrinsics_gen + +LINK_LIBS +LLVMMLRegAllocInference +LLVMSymbolic +) + +add_subdirectory(inference) +target_link_libraries(LLVMMLRegAlloc PRIVATE gRPCModelRunnerLib ONNXModelRunnerLib) +configure_file (./Config.h.cmake Config.h @ONLY) \ No newline at end of file diff --git a/llvm/lib/Transforms/CMakeLists.txt b/llvm/lib/Transforms/CMakeLists.txt index 83d5d24c10d6..8b2468080e60 100644 --- a/llvm/lib/Transforms/CMakeLists.txt +++ b/llvm/lib/Transforms/CMakeLists.txt @@ -12,4 +12,10 @@ add_subdirectory(Hello-IR2Vec) add_subdirectory(ObjCARC) add_subdirectory(Coroutines) add_subdirectory(CFGuard) +<<<<<<< HEAD add_subdirectory(Hello-MLBridge) +======= +add_subdirectory(AddSizeAttr) +add_subdirectory(MCAInstrumentation) +add_subdirectory(PipeIR) +>>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index e03aff0f65d7..57b378dd9f23 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -1,3 +1,9 @@ +<<<<<<< HEAD +======= +add_subdirectory(PosetRL) +add_subdirectory(CodeSizeOpt) + +>>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO add_llvm_component_library(LLVMipo AlwaysInliner.cpp Annotation2Metadata.cpp @@ -50,6 +56,7 @@ add_llvm_component_library(LLVMipo intrinsics_gen omp_gen +<<<<<<< HEAD COMPONENT_NAME IPO @@ -72,3 +79,7 @@ add_llvm_component_library(LLVMipo Vectorize Instrumentation ) +======= +target_link_libraries(LLVMipo PUBLIC LLVMPosetRL LLVMCodeSizeOpt) +target_link_libraries(LLVMipo PRIVATE LLVMMLBridge) +>>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO diff --git a/llvm/lib/Transforms/IPO/CodeSizeOpt/CMakeLists.txt b/llvm/lib/Transforms/IPO/CodeSizeOpt/CMakeLists.txt new file mode 100644 index 000000000000..1e57171c6d53 --- /dev/null +++ b/llvm/lib/Transforms/IPO/CodeSizeOpt/CMakeLists.txt @@ -0,0 +1,14 @@ +add_llvm_component_library(LLVMCodeSizeOpt CodeSizeOpt.cpp + +LINK_COMPONENTS +IR2Vec + +ADDITIONAL_HEADER_DIRS +${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms +${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/CodeSizeOpt + +DEPENDS +intrinsics_gen +LLVMMLBridge +) +target_link_libraries(LLVMCodeSizeOpt PUBLIC LLVMMLBridge) diff --git a/llvm/lib/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.cpp b/llvm/lib/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.cpp new file mode 100644 index 000000000000..fc3492abd33e --- /dev/null +++ b/llvm/lib/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.cpp @@ -0,0 +1,115 @@ +#include "llvm/Transforms/CodeSizeOpt/CodeSizeOpt.h" +#include "MLModelRunner/MLModelRunner.h" +#include "MLModelRunner/ONNXModelRunner/ONNXModelRunner.h" +#include "MLModelRunner/Utils/MLConfig.h" +#include "inference/CodeSizeOptEnv.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IR2Vec.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassSupport.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/CodeSizeOpt/ActionSpace/ActionEnum.h" +#include "llvm/Transforms/CodeSizeOpt/ActionSpace/ActionHeaders.h" +#include "llvm/Transforms/CodeSizeOpt/ActionSpace/ActionSwitch.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include +#include +#include +#include + +using namespace llvm; + +namespace { +struct CodeSizeOpt : public ModulePass, public CodeSizeOptEnv { + static char ID; + CodeSizeOpt() : ModulePass(ID) {} + bool runOnModule(Module &M) override { + assert(MLConfig::mlconfig != "" && "ml-config-path required" ); + this->M = &M; + llvm::Triple triple(M.getTargetTriple()); + tlii_ = llvm::TargetLibraryInfoImpl(triple); + Agent agent(MLConfig::mlconfig + "/codesizeopt/compiler_gym_ir2vec.onnx"); + std::map agents; + agents["agent"] = &agent; + MLRunner = std::make_unique(this, agents, &M.getContext()); + MLRunner->evaluate(); + errs() << "Sequence: "; + for (auto a : Sequence) + errs() << a << " "; + errs() << "\n"; + + return true; + } + + inline const llvm::TargetLibraryInfoImpl& tlii() const { return tlii_; } + + void addPassToPM(llvm::legacy::FunctionPassManager* PM, Pass* P) { + errs() << "Adding Pass: Profilesummaryinfo" << "\n"; + PM->add(new ProfileSummaryInfoWrapperPass()); + errs() << "Adding Pass: TargetLibraryInfo" << "\n"; + PM->add(new TargetLibraryInfoWrapperPass(tlii())); + errs() << "Adding Pass: TargetTransformInfo" << "\n"; + PM->add(createTargetTransformInfoWrapperPass(TargetIRAnalysis())); + errs() << "Adding Pass: " << P->getPassName() << "\n"; + PM->add(P); + } + + Embedding getEmbeddings() override { + auto Ir2vec = + IR2Vec::Embeddings(*M, IR2Vec::IR2VecMode::FlowAware, + MLConfig::mlconfig + "/ir2vec/seedEmbeddingVocab-300-llvm10.txt"); + auto ProgVector = Ir2vec.getProgramVector(); + Embedding Vector(ProgVector.begin(), ProgVector.end()); + // errs() << "Embedding: "; + // for(auto v : Vector) + // errs() << v << " "; + // errs() << "\n"; + return Vector; + } + + void applySeq(Action Action) override { + PassManagerBuilder Builder; + Builder.OptLevel = 2; + Builder.SizeLevel = 2; + + legacy::FunctionPassManager FPM(M); + legacy::PassManager MPM; + errs() << "Handle Pass: " << Action << "\n"; + +#define HANDLE_PASS(pass) addPassToPM(&FPM, pass); + HANDLE_ACTION(Action, HANDLE_PASS) +#undef HANDLE_PASS + + // Builder.customPopulateFunctionPassManager(FPM, 34, Action); + // Builder.customPopulateModulePassManager(MPM, 34, Action); + // run the passes + errs() << "Running Module Passes\n"; + MPM.run(*M); + errs() << "Running Function Passes\n"; + for (auto &F : *M) { + FPM.run(F); + } + } + +private: + Module *M; + std::unique_ptr MLRunner; + llvm::TargetLibraryInfoImpl tlii_; +}; +} // namespace +char CodeSizeOpt::ID = 0; +INITIALIZE_PASS_BEGIN(CodeSizeOpt, "codesizeopt-rl", "poset sequence pass", + false, false) +INITIALIZE_PASS_END(CodeSizeOpt, "codesizeopt-rl", "poset sequence pass", false, + false) + +ModulePass *llvm::createCodeSizeOptPass() { return new CodeSizeOpt(); } diff --git a/llvm/lib/Transforms/IPO/CodeSizeOpt/inference/CodeSizeOptEnv.h b/llvm/lib/Transforms/IPO/CodeSizeOpt/inference/CodeSizeOptEnv.h new file mode 100644 index 000000000000..f894511cd8f0 --- /dev/null +++ b/llvm/lib/Transforms/IPO/CodeSizeOpt/inference/CodeSizeOptEnv.h @@ -0,0 +1,68 @@ +#include "MLModelRunner/ONNXModelRunner/environment.h" +#include "MLModelRunner/ONNXModelRunner/utils.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include + +#define ActionMaskSize 34 +#define EmbeddingSize 300 + +using namespace llvm; +using namespace MLBridge; + +typedef std::vector Embedding; + +class CodeSizeOptEnv : public Environment { + unsigned Actioncount = 0; + Embedding CurrEmbedding; + Observation CurrObs; +public: + std::vector Sequence; + +public: + CodeSizeOptEnv(); + Observation& reset() override; + Observation& step(Action) override; + virtual Embedding getEmbeddings() = 0; + virtual void applySeq(Action) = 0; + +}; + + +inline Observation& CodeSizeOptEnv::step(Action Action) { + Sequence.push_back(Action); + applySeq(Action); + + Actioncount += 1; + CurrEmbedding = getEmbeddings(); + if (Actioncount >= 30) + setDone(); + + CurrObs.clear(); + std::copy(CurrEmbedding.begin(), CurrEmbedding.end(), + std::back_inserter(CurrObs)); + return CurrObs; +} + +inline Observation& CodeSizeOptEnv::reset() { + CurrEmbedding = getEmbeddings(); + + CurrObs.clear(); + std::copy(CurrEmbedding.begin(), CurrEmbedding.end(), + std::back_inserter(CurrObs)); + return CurrObs; +} + +inline CodeSizeOptEnv::CodeSizeOptEnv() { + CurrEmbedding.assign(EmbeddingSize, 0); + setNextAgent("agent"); +} diff --git a/llvm/tools/bugpoint/CMakeLists.txt b/llvm/tools/bugpoint/CMakeLists.txt index b0e71910c7cc..9485682025bd 100644 --- a/llvm/tools/bugpoint/CMakeLists.txt +++ b/llvm/tools/bugpoint/CMakeLists.txt @@ -21,6 +21,10 @@ set(LLVM_LINK_COMPONENTS TargetParser TransformUtils Vectorize +<<<<<<< HEAD +======= + IR2Vec +>>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO ) add_llvm_tool(bugpoint diff --git a/llvm/tools/llc/CMakeLists.txt b/llvm/tools/llc/CMakeLists.txt index 257d5b519f04..f1f37cccaa48 100644 --- a/llvm/tools/llc/CMakeLists.txt +++ b/llvm/tools/llc/CMakeLists.txt @@ -19,6 +19,12 @@ set(LLVM_LINK_COMPONENTS TargetParser TransformUtils Vectorize +<<<<<<< HEAD +======= + IPO + IR2Vec + CollectMachineIR +>>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO ) add_llvm_tool(llc diff --git a/llvm/tools/llvm-mca/CMakeLists.txt b/llvm/tools/llvm-mca/CMakeLists.txt index 878a05c51cfb..db23b161e9a6 100644 --- a/llvm/tools/llvm-mca/CMakeLists.txt +++ b/llvm/tools/llvm-mca/CMakeLists.txt @@ -10,7 +10,11 @@ set(LLVM_LINK_COMPONENTS MC MCParser Support +<<<<<<< HEAD TargetParser +======= + IR2Vec +>>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO ) add_llvm_tool(llvm-mca diff --git a/llvm/tools/llvm-opt-fuzzer/CMakeLists.txt b/llvm/tools/llvm-opt-fuzzer/CMakeLists.txt index b3c1c85681d2..b2107617702c 100644 --- a/llvm/tools/llvm-opt-fuzzer/CMakeLists.txt +++ b/llvm/tools/llvm-opt-fuzzer/CMakeLists.txt @@ -24,7 +24,13 @@ set(LLVM_LINK_COMPONENTS TargetParser TransformUtils Vectorize +<<<<<<< HEAD ) +======= + Passes + IR2Vec +) +>>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO add_llvm_fuzzer(llvm-opt-fuzzer llvm-opt-fuzzer.cpp diff --git a/llvm/tools/opt/CMakeLists.txt b/llvm/tools/opt/CMakeLists.txt index b23e70839f67..1805d68d1205 100644 --- a/llvm/tools/opt/CMakeLists.txt +++ b/llvm/tools/opt/CMakeLists.txt @@ -29,6 +29,13 @@ set(LLVM_LINK_COMPONENTS TransformUtils Vectorize Passes +<<<<<<< HEAD +======= + HelloMLBridge + AddSizeAttr + IR2Vec + CollectMachineIR +>>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO ) add_llvm_tool(opt From d795bf7bc6e33f569d596551b6d75865b4180fc5 Mon Sep 17 00:00:00 2001 From: Umesh-k26 Date: Sun, 21 Jan 2024 13:21:28 +0530 Subject: [PATCH 12/52] Added MLConfig in Support/CommandLine.cpp (cherry picked from commit e4011a594dc34423453b37bdf0aea8e2c8d51981) --- MLCompilerBridge | 2 +- llvm/include/llvm/InitializePasses.h | 18 + llvm/include/llvm/LinkAllPasses.h | 9 + .../IPO/CodeSizeOpt/ActionSpace/ActionEnum.h | 139 +++++++ .../CodeSizeOpt/ActionSpace/ActionHeaders.h | 109 +++++ .../CodeSizeOpt/ActionSpace/ActionSwitch.h | 388 ++++++++++++++++++ .../Transforms/IPO/CodeSizeOpt/CodeSizeOpt.h | 10 + llvm/lib/Support/CMakeLists.txt | 2 + llvm/lib/Support/CommandLine.cpp | 5 + llvm/lib/Transforms/CMakeLists.txt | 6 + llvm/lib/Transforms/Hello-MLBridge/Hello.cpp | 56 ++- llvm/lib/Transforms/Hello/Hello.cpp | 3 + llvm/lib/Transforms/IPO/CMakeLists.txt | 6 +- .../IPO/CodeSizeOpt/CodeSizeOpt.cpp | 33 +- .../lib/Transforms/IPO/PassManagerBuilder.cpp | 13 +- .../Transforms/demoGrpcPass/CMakeLists.txt | 6 + llvm/tools/opt/CMakeLists.txt | 3 + llvm/tools/opt/opt.cpp | 5 + 18 files changed, 784 insertions(+), 29 deletions(-) create mode 100644 llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionEnum.h create mode 100644 llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionHeaders.h create mode 100644 llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionSwitch.h create mode 100644 llvm/include/llvm/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.h diff --git a/MLCompilerBridge b/MLCompilerBridge index 15cac951a09b..62439c9bc20c 160000 --- a/MLCompilerBridge +++ b/MLCompilerBridge @@ -1 +1 @@ -Subproject commit 15cac951a09b1ce568f3e3c29dd1d6ef5fb46243 +Subproject commit 62439c9bc20ce2b29e459575afcdd1a9c3e57a1a diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 070688178d69..3cf1ec9023ce 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -351,6 +351,24 @@ void initializeWasmEHPreparePass(PassRegistry&); void initializeWinEHPreparePass(PassRegistry&); void initializeWriteBitcodePassPass(PassRegistry&); void initializeXRayInstrumentationPass(PassRegistry&); +<<<<<<< HEAD +======= +void initializePosetRLPass(PassRegistry &); +void initializeHelloMLBridgePass(PassRegistry &); +void initializeCollectMachineIRPass(PassRegistry &); +void initializeConfigGenPass(PassRegistry &); + + +void initializeRDGWrapperPassPass(PassRegistry&); + +void initializeLoopDistributionWrapperPassPass(PassRegistry&); + +void initializecustom_loop_distributionPass(PassRegistry&); + +void initializeInnerMostLoopPassPass(PassRegistry &); + +void initializeLoopDistributionServerPassPass(PassRegistry &); +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp } // end namespace llvm diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 7dd41b86700d..1673ceb26185 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -45,6 +45,11 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/BoundsChecking.h" #include "llvm/Transforms/ObjCARC.h" +<<<<<<< HEAD +======= +#include "llvm/Transforms/IPO/PosetRL/PosetRL.h" +#include "llvm/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.h" +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/InstSimplifyPass.h" @@ -141,7 +146,11 @@ namespace { (void) llvm::createMergeICmpsLegacyPass(); (void) llvm::createExpandLargeDivRemPass(); (void) llvm::createExpandMemCmpPass(); +<<<<<<< HEAD (void) llvm::createExpandVectorPredicationPass(); +======= + (void)llvm::createPosetRLPass(); +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp std::string buf; llvm::raw_string_ostream os(buf); (void) llvm::createPrintModulePass(os); diff --git a/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionEnum.h b/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionEnum.h new file mode 100644 index 000000000000..f3b550290470 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionEnum.h @@ -0,0 +1,139 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the LICENSE file +// in the root directory of this source tree. +// +// This file was generated automatically the script +// build_tools/llvm/legacy_pass_manager/make_action_space_genfiles.py. + +#ifndef LLVM_TRANSFORMS_CODESIZEOPT_ACTIONSPACE_ACTIONENUM_H +#define LLVM_TRANSFORMS_CODESIZEOPT_ACTIONSPACE_ACTIONENUM_H + +enum LlvmAction { + ADD_DISCRIMINATORS=1, + ADCE, + AGGRESSIVE_INSTCOMBINE, + ALIGNMENT_FROM_ASSUMPTIONS, + ALWAYS_INLINE, + ARGPROMOTION, + ATTRIBUTOR, + BARRIER, + BDCE, + BREAK_CRIT_EDGES, + SIMPLIFYCFG, + CALLSITE_SPLITTING, + CALLED_VALUE_PROPAGATION, + CANONICALIZE_ALIASES, + CONSTHOIST, + CONSTMERGE, + CONSTPROP, + CORO_CLEANUP, + CORO_EARLY, + CORO_ELIDE, + CORO_SPLIT, + CORRELATED_PROPAGATION, + CROSS_DSO_CFI, + DEADARGELIM, + DCE, + DIE, + DSE, + REG2MEM, + DIV_REM_PAIRS, + EARLY_CSE_MEMSSA, + EARLY_CSE, + ELIM_AVAIL_EXTERN, + EE_INSTRUMENT, + FLATTENCFG, + FLOAT2INT, + FORCEATTRS, + INLINE, + INSERT_GCOV_PROFILING, + GVN_HOIST, + GVN, + GLOBALDCE, + GLOBALOPT, + GLOBALSPLIT, + GUARD_WIDENING, + HOTCOLDSPLIT, + IPCONSTPROP, + IPSCCP, + INDVARS, + IRCE, + INFER_ADDRESS_SPACES, + INFERATTRS, + INJECT_TLI_MAPPINGS, + INSTSIMPLIFY, + INSTCOMBINE, + INSTNAMER, + JUMP_THREADING, + LCSSA, + LICM, + LIBCALLS_SHRINKWRAP, + LOAD_STORE_VECTORIZER, + LOOP_DATA_PREFETCH, + LOOP_DELETION, + LOOP_DISTRIBUTE, + LOOP_FUSION, + LOOP_GUARD_WIDENING, + LOOP_IDIOM, + LOOP_INSTSIMPLIFY, + LOOP_INTERCHANGE, + LOOP_LOAD_ELIM, + LOOP_PREDICATION, + LOOP_REROLL, + LOOP_ROTATE, + LOOP_SIMPLIFYCFG, + LOOP_SIMPLIFY, + LOOP_SINK, + LOOP_REDUCE, + LOOP_UNROLL_AND_JAM, + LOOP_UNROLL, + LOOP_UNSWITCH, + LOOP_VECTORIZE, + LOOP_VERSIONING_LICM, + LOOP_VERSIONING, + LOWERATOMIC, + LOWER_CONSTANT_INTRINSICS, + LOWER_EXPECT, + LOWER_GUARD_INTRINSIC, + LOWERINVOKE, + LOWER_MATRIX_INTRINSICS, + LOWERSWITCH, + LOWER_WIDENABLE_CONDITION, + MEMCPYOPT, + MERGEFUNC, + MERGEICMPS, + MLDST_MOTION, + SANCOV, + NAME_ANON_GLOBALS, + NARY_REASSOCIATE, + NEWGVN, + PGO_MEMOP_OPT, + PARTIAL_INLINER, + PARTIALLY_INLINE_LIBCALLS, + POST_INLINE_EE_INSTRUMENT, + FUNCTIONATTRS, + MEM2REG, + PRUNE_EH, + REASSOCIATE, + REDUNDANT_DBG_INST_ELIM, + RPO_FUNCTIONATTRS, + REWRITE_STATEPOINTS_FOR_GC, + SCCP, + SLP_VECTORIZER, + SROA, + SCALARIZER, + SEPARATE_CONST_OFFSET_FROM_GEP, + SIMPLE_LOOP_UNSWITCH, + SINK, + SPECULATIVE_EXECUTION, + SLSR, + STRIP_DEAD_PROTOTYPES, + STRIP_DEBUG_DECLARE, + STRIP_NONDEBUG, + STRIP, + TAILCALLELIM, + MERGERETURN, +}; + +#endif \ No newline at end of file diff --git a/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionHeaders.h b/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionHeaders.h new file mode 100644 index 000000000000..1187ff3aa487 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionHeaders.h @@ -0,0 +1,109 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the LICENSE file +// in the root directory of this source tree. +// +// This file was generated automatically the script +// build_tools/llvm/legacy_pass_manager/make_action_space_genfiles.py. + +#ifndef LLVM_TRANSFORMS_CODESIZEOPT_ACTIONSPACE_ACTIONHEADER_H +#define LLVM_TRANSFORMS_CODESIZEOPT_ACTIONSPACE_ACTIONHEADER_H + +#include "llvm/LinkAllPasses.h" +#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" +#include "llvm/Transforms/Coroutines.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/ArgumentPromotion.h" +#include "llvm/Transforms/IPO/Attributor.h" +#include "llvm/Transforms/IPO/CalledValuePropagation.h" +#include "llvm/Transforms/IPO/ConstantMerge.h" +#include "llvm/Transforms/IPO/CrossDSOCFI.h" +#include "llvm/Transforms/IPO/DeadArgumentElimination.h" +#include "llvm/Transforms/IPO/ElimAvailExtern.h" +#include "llvm/Transforms/IPO/ForceFunctionAttrs.h" +#include "llvm/Transforms/IPO/FunctionAttrs.h" +#include "llvm/Transforms/IPO/GlobalDCE.h" +#include "llvm/Transforms/IPO/GlobalOpt.h" +#include "llvm/Transforms/IPO/GlobalSplit.h" +#include "llvm/Transforms/IPO/HotColdSplitting.h" +#include "llvm/Transforms/IPO/InferFunctionAttrs.h" +#include "llvm/Transforms/IPO/MergeFunctions.h" +#include "llvm/Transforms/IPO/PartialInlining.h" +#include "llvm/Transforms/IPO/SCCP.h" +#include "llvm/Transforms/IPO/StripDeadPrototypes.h" +#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" +#include "llvm/Transforms/Scalar/ADCE.h" +#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" +#include "llvm/Transforms/Scalar/BDCE.h" +#include "llvm/Transforms/Scalar/CallSiteSplitting.h" +#include "llvm/Transforms/Scalar/ConstantHoisting.h" +#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" +#include "llvm/Transforms/Scalar/DCE.h" +#include "llvm/Transforms/Scalar/DeadStoreElimination.h" +#include "llvm/Transforms/Scalar/DivRemPairs.h" +#include "llvm/Transforms/Scalar/EarlyCSE.h" +#include "llvm/Transforms/Scalar/Float2Int.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/GuardWidening.h" +#include "llvm/Transforms/Scalar/IndVarSimplify.h" +#include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h" +#include "llvm/Transforms/Scalar/InstSimplifyPass.h" +#include "llvm/Transforms/Scalar/JumpThreading.h" +#include "llvm/Transforms/Scalar/LICM.h" +#include "llvm/Transforms/Scalar/LoopDataPrefetch.h" +#include "llvm/Transforms/Scalar/LoopDeletion.h" +#include "llvm/Transforms/Scalar/LoopDistribute.h" +#include "llvm/Transforms/Scalar/LoopFuse.h" +#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" +#include "llvm/Transforms/Scalar/LoopInstSimplify.h" +#include "llvm/Transforms/Scalar/LoopLoadElimination.h" +#include "llvm/Transforms/Scalar/LoopPredication.h" +#include "llvm/Transforms/Scalar/LoopRotation.h" +#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" +#include "llvm/Transforms/Scalar/LoopSink.h" +#include "llvm/Transforms/Scalar/LoopStrengthReduce.h" +#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" +#include "llvm/Transforms/Scalar/LoopUnrollPass.h" +#include "llvm/Transforms/Scalar/LowerAtomic.h" +#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" +#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" +#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h" +#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" +#include "llvm/Transforms/Scalar/LowerWidenableCondition.h" +#include "llvm/Transforms/Scalar/MemCpyOptimizer.h" +#include "llvm/Transforms/Scalar/MergeICmps.h" +#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" +#include "llvm/Transforms/Scalar/NaryReassociate.h" +#include "llvm/Transforms/Scalar/NewGVN.h" +#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" +#include "llvm/Transforms/Scalar/Reassociate.h" +#include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h" +#include "llvm/Transforms/Scalar/SCCP.h" +#include "llvm/Transforms/Scalar/SROA.h" +#include "llvm/Transforms/Scalar/Scalarizer.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" +#include "llvm/Transforms/Scalar/Sink.h" +#include "llvm/Transforms/Scalar/SpeculativeExecution.h" +#include "llvm/Transforms/Scalar/TailRecursionElimination.h" +#include "llvm/Transforms/Utils/AddDiscriminators.h" +#include "llvm/Transforms/Utils/BreakCriticalEdges.h" +#include "llvm/Transforms/Utils/CanonicalizeAliases.h" +#include "llvm/Transforms/Utils/EntryExitInstrumenter.h" +#include "llvm/Transforms/Utils/InjectTLIMappings.h" +#include "llvm/Transforms/Utils/LCSSA.h" +#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LoopVersioning.h" +#include "llvm/Transforms/Utils/LowerInvoke.h" +#include "llvm/Transforms/Utils/Mem2Reg.h" +#include "llvm/Transforms/Utils/NameAnonGlobals.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h" +#include "llvm/Transforms/Vectorize/LoopVectorize.h" +#include "llvm/Transforms/Vectorize/SLPVectorizer.h" + +namespace llvm { +FunctionPass* createEarlyCSEMemSSAPass() { return createEarlyCSEPass(/*UseMemorySSA=*/true); } +} // namespace llvm + +#endif \ No newline at end of file diff --git a/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionSwitch.h b/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionSwitch.h new file mode 100644 index 000000000000..08f75e67805c --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionSwitch.h @@ -0,0 +1,388 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the LICENSE file +// in the root directory of this source tree. +// +// This file was generated automatically the script +// build_tools/llvm/legacy_pass_manager/make_action_space_genfiles.py. + +#ifndef LLVM_TRANSFORMS_CODESIZEOPT_ACTIONSPACE_ACTIONSWITCH_H +#define LLVM_TRANSFORMS_CODESIZEOPT_ACTIONSPACE_ACTIONSWITCH_H + +#define HANDLE_ACTION(action, handlePass) \ + switch (action) { \ + case LlvmAction::ADD_DISCRIMINATORS: \ + handlePass(llvm::createAddDiscriminatorsPass()); \ + break; \ + case LlvmAction::ADCE: \ + handlePass(llvm::createAggressiveDCEPass()); \ + break; \ + case LlvmAction::AGGRESSIVE_INSTCOMBINE: \ + handlePass(llvm::createAggressiveInstCombinerPass()); \ + break; \ + case LlvmAction::ALIGNMENT_FROM_ASSUMPTIONS: \ + handlePass(llvm::createAlignmentFromAssumptionsPass()); \ + break; \ + case LlvmAction::ALWAYS_INLINE: \ + handlePass(llvm::createAlwaysInlinerLegacyPass()); \ + break; \ + case LlvmAction::ARGPROMOTION: \ + handlePass(llvm::createArgumentPromotionPass()); \ + break; \ + case LlvmAction::ATTRIBUTOR: \ + handlePass(llvm::createAttributorLegacyPass()); \ + break; \ + case LlvmAction::BARRIER: \ + handlePass(llvm::createBarrierNoopPass()); \ + break; \ + case LlvmAction::BDCE: \ + handlePass(llvm::createBitTrackingDCEPass()); \ + break; \ + case LlvmAction::BREAK_CRIT_EDGES: \ + handlePass(llvm::createBreakCriticalEdgesPass()); \ + break; \ + case LlvmAction::SIMPLIFYCFG: \ + handlePass(llvm::createCFGSimplificationPass()); \ + break; \ + case LlvmAction::CALLSITE_SPLITTING: \ + handlePass(llvm::createCallSiteSplittingPass()); \ + break; \ + case LlvmAction::CALLED_VALUE_PROPAGATION: \ + handlePass(llvm::createCalledValuePropagationPass()); \ + break; \ + case LlvmAction::CANONICALIZE_ALIASES: \ + handlePass(llvm::createCanonicalizeAliasesPass()); \ + break; \ + case LlvmAction::CONSTHOIST: \ + handlePass(llvm::createConstantHoistingPass()); \ + break; \ + case LlvmAction::CONSTMERGE: \ + handlePass(llvm::createConstantMergePass()); \ + break; \ + case LlvmAction::CONSTPROP: \ + handlePass(llvm::createConstantPropagationPass()); \ + break; \ + case LlvmAction::CORO_CLEANUP: \ + handlePass(llvm::createCoroCleanupLegacyPass()); \ + break; \ + case LlvmAction::CORO_EARLY: \ + handlePass(llvm::createCoroEarlyLegacyPass()); \ + break; \ + case LlvmAction::CORO_ELIDE: \ + handlePass(llvm::createCoroElideLegacyPass()); \ + break; \ + case LlvmAction::CORO_SPLIT: \ + handlePass(llvm::createCoroSplitLegacyPass()); \ + break; \ + case LlvmAction::CORRELATED_PROPAGATION: \ + handlePass(llvm::createCorrelatedValuePropagationPass()); \ + break; \ + case LlvmAction::CROSS_DSO_CFI: \ + handlePass(llvm::createCrossDSOCFIPass()); \ + break; \ + case LlvmAction::DEADARGELIM: \ + handlePass(llvm::createDeadArgEliminationPass()); \ + break; \ + case LlvmAction::DCE: \ + handlePass(llvm::createDeadCodeEliminationPass()); \ + break; \ + case LlvmAction::DIE: \ + handlePass(llvm::createDeadInstEliminationPass()); \ + break; \ + case LlvmAction::DSE: \ + handlePass(llvm::createDeadStoreEliminationPass()); \ + break; \ + case LlvmAction::REG2MEM: \ + handlePass(llvm::createDemoteRegisterToMemoryPass()); \ + break; \ + case LlvmAction::DIV_REM_PAIRS: \ + handlePass(llvm::createDivRemPairsPass()); \ + break; \ + case LlvmAction::EARLY_CSE_MEMSSA: \ + handlePass(llvm::createEarlyCSEMemSSAPass()); \ + break; \ + case LlvmAction::EARLY_CSE: \ + handlePass(llvm::createEarlyCSEPass()); \ + break; \ + case LlvmAction::ELIM_AVAIL_EXTERN: \ + handlePass(llvm::createEliminateAvailableExternallyPass()); \ + break; \ + case LlvmAction::EE_INSTRUMENT: \ + handlePass(llvm::createEntryExitInstrumenterPass()); \ + break; \ + case LlvmAction::FLATTENCFG: \ + handlePass(llvm::createFlattenCFGPass()); \ + break; \ + case LlvmAction::FLOAT2INT: \ + handlePass(llvm::createFloat2IntPass()); \ + break; \ + case LlvmAction::FORCEATTRS: \ + handlePass(llvm::createForceFunctionAttrsLegacyPass()); \ + break; \ + case LlvmAction::INLINE: \ + handlePass(llvm::createFunctionInliningPass()); \ + break; \ + case LlvmAction::INSERT_GCOV_PROFILING: \ + handlePass(llvm::createGCOVProfilerPass()); \ + break; \ + case LlvmAction::GVN_HOIST: \ + handlePass(llvm::createGVNHoistPass()); \ + break; \ + case LlvmAction::GVN: \ + handlePass(llvm::createGVNPass()); \ + break; \ + case LlvmAction::GLOBALDCE: \ + handlePass(llvm::createGlobalDCEPass()); \ + break; \ + case LlvmAction::GLOBALOPT: \ + handlePass(llvm::createGlobalOptimizerPass()); \ + break; \ + case LlvmAction::GLOBALSPLIT: \ + handlePass(llvm::createGlobalSplitPass()); \ + break; \ + case LlvmAction::GUARD_WIDENING: \ + handlePass(llvm::createGuardWideningPass()); \ + break; \ + case LlvmAction::HOTCOLDSPLIT: \ + handlePass(llvm::createHotColdSplittingPass()); \ + break; \ + case LlvmAction::IPCONSTPROP: \ + handlePass(llvm::createIPConstantPropagationPass()); \ + break; \ + case LlvmAction::IPSCCP: \ + handlePass(llvm::createIPSCCPPass()); \ + break; \ + case LlvmAction::INDVARS: \ + handlePass(llvm::createIndVarSimplifyPass()); \ + break; \ + case LlvmAction::IRCE: \ + handlePass(llvm::createInductiveRangeCheckEliminationPass()); \ + break; \ + case LlvmAction::INFER_ADDRESS_SPACES: \ + handlePass(llvm::createInferAddressSpacesPass()); \ + break; \ + case LlvmAction::INFERATTRS: \ + handlePass(llvm::createInferFunctionAttrsLegacyPass()); \ + break; \ + case LlvmAction::INJECT_TLI_MAPPINGS: \ + handlePass(llvm::createInjectTLIMappingsLegacyPass()); \ + break; \ + case LlvmAction::INSTSIMPLIFY: \ + handlePass(llvm::createInstSimplifyLegacyPass()); \ + break; \ + case LlvmAction::INSTCOMBINE: \ + handlePass(llvm::createInstructionCombiningPass()); \ + break; \ + case LlvmAction::INSTNAMER: \ + handlePass(llvm::createInstructionNamerPass()); \ + break; \ + case LlvmAction::JUMP_THREADING: \ + handlePass(llvm::createJumpThreadingPass()); \ + break; \ + case LlvmAction::LCSSA: \ + handlePass(llvm::createLCSSAPass()); \ + break; \ + case LlvmAction::LICM: \ + handlePass(llvm::createLICMPass()); \ + break; \ + case LlvmAction::LIBCALLS_SHRINKWRAP: \ + handlePass(llvm::createLibCallsShrinkWrapPass()); \ + break; \ + case LlvmAction::LOAD_STORE_VECTORIZER: \ + handlePass(llvm::createLoadStoreVectorizerPass()); \ + break; \ + case LlvmAction::LOOP_DATA_PREFETCH: \ + handlePass(llvm::createLoopDataPrefetchPass()); \ + break; \ + case LlvmAction::LOOP_DELETION: \ + handlePass(llvm::createLoopDeletionPass()); \ + break; \ + case LlvmAction::LOOP_DISTRIBUTE: \ + handlePass(llvm::createLoopDistributePass()); \ + break; \ + case LlvmAction::LOOP_FUSION: \ + handlePass(llvm::createLoopFusePass()); \ + break; \ + case LlvmAction::LOOP_GUARD_WIDENING: \ + handlePass(llvm::createLoopGuardWideningPass()); \ + break; \ + case LlvmAction::LOOP_IDIOM: \ + handlePass(llvm::createLoopIdiomPass()); \ + break; \ + case LlvmAction::LOOP_INSTSIMPLIFY: \ + handlePass(llvm::createLoopInstSimplifyPass()); \ + break; \ + case LlvmAction::LOOP_INTERCHANGE: \ + handlePass(llvm::createLoopInterchangePass()); \ + break; \ + case LlvmAction::LOOP_LOAD_ELIM: \ + handlePass(llvm::createLoopLoadEliminationPass()); \ + break; \ + case LlvmAction::LOOP_PREDICATION: \ + handlePass(llvm::createLoopPredicationPass()); \ + break; \ + case LlvmAction::LOOP_REROLL: \ + handlePass(llvm::createLoopRerollPass()); \ + break; \ + case LlvmAction::LOOP_ROTATE: \ + handlePass(llvm::createLoopRotatePass()); \ + break; \ + case LlvmAction::LOOP_SIMPLIFYCFG: \ + handlePass(llvm::createLoopSimplifyCFGPass()); \ + break; \ + case LlvmAction::LOOP_SIMPLIFY: \ + handlePass(llvm::createLoopSimplifyPass()); \ + break; \ + case LlvmAction::LOOP_SINK: \ + handlePass(llvm::createLoopSinkPass()); \ + break; \ + case LlvmAction::LOOP_REDUCE: \ + handlePass(llvm::createLoopStrengthReducePass()); \ + break; \ + case LlvmAction::LOOP_UNROLL_AND_JAM: \ + handlePass(llvm::createLoopUnrollAndJamPass()); \ + break; \ + case LlvmAction::LOOP_UNROLL: \ + handlePass(llvm::createLoopUnrollPass()); \ + break; \ + case LlvmAction::LOOP_UNSWITCH: \ + handlePass(llvm::createLoopUnswitchPass()); \ + break; \ + case LlvmAction::LOOP_VECTORIZE: \ + handlePass(llvm::createLoopVectorizePass()); \ + break; \ + case LlvmAction::LOOP_VERSIONING_LICM: \ + handlePass(llvm::createLoopVersioningLICMPass()); \ + break; \ + case LlvmAction::LOOP_VERSIONING: \ + handlePass(llvm::createLoopVersioningPass()); \ + break; \ + case LlvmAction::LOWERATOMIC: \ + handlePass(llvm::createLowerAtomicPass()); \ + break; \ + case LlvmAction::LOWER_CONSTANT_INTRINSICS: \ + handlePass(llvm::createLowerConstantIntrinsicsPass()); \ + break; \ + case LlvmAction::LOWER_EXPECT: \ + handlePass(llvm::createLowerExpectIntrinsicPass()); \ + break; \ + case LlvmAction::LOWER_GUARD_INTRINSIC: \ + handlePass(llvm::createLowerGuardIntrinsicPass()); \ + break; \ + case LlvmAction::LOWERINVOKE: \ + handlePass(llvm::createLowerInvokePass()); \ + break; \ + case LlvmAction::LOWER_MATRIX_INTRINSICS: \ + handlePass(llvm::createLowerMatrixIntrinsicsPass()); \ + break; \ + case LlvmAction::LOWERSWITCH: \ + handlePass(llvm::createLowerSwitchPass()); \ + break; \ + case LlvmAction::LOWER_WIDENABLE_CONDITION: \ + handlePass(llvm::createLowerWidenableConditionPass()); \ + break; \ + case LlvmAction::MEMCPYOPT: \ + handlePass(llvm::createMemCpyOptPass()); \ + break; \ + case LlvmAction::MERGEFUNC: \ + handlePass(llvm::createMergeFunctionsPass()); \ + break; \ + case LlvmAction::MERGEICMPS: \ + handlePass(llvm::createMergeICmpsLegacyPass()); \ + break; \ + case LlvmAction::MLDST_MOTION: \ + handlePass(llvm::createMergedLoadStoreMotionPass()); \ + break; \ + case LlvmAction::SANCOV: \ + handlePass(llvm::createModuleSanitizerCoverageLegacyPassPass()); \ + break; \ + case LlvmAction::NAME_ANON_GLOBALS: \ + handlePass(llvm::createNameAnonGlobalPass()); \ + break; \ + case LlvmAction::NARY_REASSOCIATE: \ + handlePass(llvm::createNaryReassociatePass()); \ + break; \ + case LlvmAction::NEWGVN: \ + handlePass(llvm::createNewGVNPass()); \ + break; \ + case LlvmAction::PGO_MEMOP_OPT: \ + handlePass(llvm::createPGOMemOPSizeOptLegacyPass()); \ + break; \ + case LlvmAction::PARTIAL_INLINER: \ + handlePass(llvm::createPartialInliningPass()); \ + break; \ + case LlvmAction::PARTIALLY_INLINE_LIBCALLS: \ + handlePass(llvm::createPartiallyInlineLibCallsPass()); \ + break; \ + case LlvmAction::POST_INLINE_EE_INSTRUMENT: \ + handlePass(llvm::createPostInlineEntryExitInstrumenterPass()); \ + break; \ + case LlvmAction::FUNCTIONATTRS: \ + handlePass(llvm::createPostOrderFunctionAttrsLegacyPass()); \ + break; \ + case LlvmAction::MEM2REG: \ + handlePass(llvm::createPromoteMemoryToRegisterPass()); \ + break; \ + case LlvmAction::PRUNE_EH: \ + handlePass(llvm::createPruneEHPass()); \ + break; \ + case LlvmAction::REASSOCIATE: \ + handlePass(llvm::createReassociatePass()); \ + break; \ + case LlvmAction::REDUNDANT_DBG_INST_ELIM: \ + handlePass(llvm::createRedundantDbgInstEliminationPass()); \ + break; \ + case LlvmAction::RPO_FUNCTIONATTRS: \ + handlePass(llvm::createReversePostOrderFunctionAttrsPass()); \ + break; \ + case LlvmAction::REWRITE_STATEPOINTS_FOR_GC: \ + handlePass(llvm::createRewriteStatepointsForGCLegacyPass()); \ + break; \ + case LlvmAction::SCCP: \ + handlePass(llvm::createSCCPPass()); \ + break; \ + case LlvmAction::SLP_VECTORIZER: \ + handlePass(llvm::createSLPVectorizerPass()); \ + break; \ + case LlvmAction::SROA: \ + handlePass(llvm::createSROAPass()); \ + break; \ + case LlvmAction::SCALARIZER: \ + handlePass(llvm::createScalarizerPass()); \ + break; \ + case LlvmAction::SEPARATE_CONST_OFFSET_FROM_GEP: \ + handlePass(llvm::createSeparateConstOffsetFromGEPPass()); \ + break; \ + case LlvmAction::SIMPLE_LOOP_UNSWITCH: \ + handlePass(llvm::createSimpleLoopUnswitchLegacyPass()); \ + break; \ + case LlvmAction::SINK: \ + handlePass(llvm::createSinkingPass()); \ + break; \ + case LlvmAction::SPECULATIVE_EXECUTION: \ + handlePass(llvm::createSpeculativeExecutionPass()); \ + break; \ + case LlvmAction::SLSR: \ + handlePass(llvm::createStraightLineStrengthReducePass()); \ + break; \ + case LlvmAction::STRIP_DEAD_PROTOTYPES: \ + handlePass(llvm::createStripDeadPrototypesPass()); \ + break; \ + case LlvmAction::STRIP_DEBUG_DECLARE: \ + handlePass(llvm::createStripDebugDeclarePass()); \ + break; \ + case LlvmAction::STRIP_NONDEBUG: \ + handlePass(llvm::createStripNonDebugSymbolsPass()); \ + break; \ + case LlvmAction::STRIP: \ + handlePass(llvm::createStripSymbolsPass()); \ + break; \ + case LlvmAction::TAILCALLELIM: \ + handlePass(llvm::createTailCallEliminationPass()); \ + break; \ + case LlvmAction::MERGERETURN: \ + handlePass(llvm::createUnifyFunctionExitNodesPass()); \ + break; \ + } + +#endif \ No newline at end of file diff --git a/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.h b/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.h new file mode 100644 index 000000000000..c4414d684f7d --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.h @@ -0,0 +1,10 @@ +#ifndef LLVM_TRANSFORMS_CODESIZEOPT_RL_H +#define LLVM_TRANSFORMS_CODESIZEOPT_RL_H + +#include "llvm/Pass.h" + +namespace llvm { + + ModulePass *createCodeSizeOptPass(); +} // namespace llvm +#endif \ No newline at end of file diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index e6bff119b094..f753714e4a9a 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -337,3 +337,5 @@ if(LLVM_WITH_Z3) ${Z3_INCLUDE_DIR} ) endif() + +target_link_libraries(LLVMSupport PRIVATE LLVMMLBridge) \ No newline at end of file diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index d3efb8b67be5..190c0fb62a6a 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -41,6 +41,7 @@ #include "llvm/Support/StringSaver.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" +#include "MLModelRunner/Utils/MLConfig.h" #include #include #include @@ -75,6 +76,10 @@ template class opt; } // namespace cl } // namespace llvm +llvm::cl::opt MLBridge::MLConfig::mlconfig( + "ml-config-path", llvm::cl::Hidden, llvm::cl::Optional, + llvm::cl::desc("Path to ML config files"), llvm::cl::init("")); + // Pin the vtables to this file. void GenericOptionValue::anchor() {} void OptionValue::anchor() {} diff --git a/llvm/lib/Transforms/CMakeLists.txt b/llvm/lib/Transforms/CMakeLists.txt index 8b2468080e60..f297328ca443 100644 --- a/llvm/lib/Transforms/CMakeLists.txt +++ b/llvm/lib/Transforms/CMakeLists.txt @@ -6,8 +6,11 @@ add_subdirectory(Scalar) add_subdirectory(IPO) add_subdirectory(Vectorize) add_subdirectory(Hello) +<<<<<<< HEAD add_subdirectory(demoGrpcPass) add_subdirectory(demoInferenceEnginePass) +======= +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp add_subdirectory(Hello-IR2Vec) add_subdirectory(ObjCARC) add_subdirectory(Coroutines) @@ -17,5 +20,8 @@ add_subdirectory(Hello-MLBridge) ======= add_subdirectory(AddSizeAttr) add_subdirectory(MCAInstrumentation) +<<<<<<< HEAD add_subdirectory(PipeIR) >>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO +======= +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp diff --git a/llvm/lib/Transforms/Hello-MLBridge/Hello.cpp b/llvm/lib/Transforms/Hello-MLBridge/Hello.cpp index e0f4a0124851..afbd6dffb2a5 100644 --- a/llvm/lib/Transforms/Hello-MLBridge/Hello.cpp +++ b/llvm/lib/Transforms/Hello-MLBridge/Hello.cpp @@ -239,8 +239,6 @@ using namespace llvm; using namespace grpc; using namespace helloMLBridgegRPC; -// #define DEBUG_TYPE "hello_mlbridge" - STATISTIC(hellomodule, "Counts number of functions greeted"); static cl::opt training("hello-training", cl::Hidden, @@ -283,7 +281,42 @@ void HelloMLBridge::setModelRunner(int n) { "output", M->getContext()); \ break; MODELS(M) +<<<<<<< HEAD #undef M +======= +#undef M + } + // MLRunner = std::make_unique>("output"); + } + + void TFinitCommunication() { + auto StartTime = std::chrono::high_resolution_clock::now(); + + std::pair> p1("x", FeatureVector); + + setTFModelRunner(n); + MLRunner->populateFeatures(p1); + double Out = MLRunner->evaluate(); + + auto EndTime = std::chrono::high_resolution_clock::now(); + + auto Duration = std::chrono::duration_cast( + EndTime - StartTime); + std::ofstream outputFile; + outputFile.open("tf-inference.csv", std::ios_base::app); + outputFile << n << "," << Duration.count() << "\n"; + outputFile.close(); + } + + bool runOnModule(Module &M) override { + // unregister MLConfig::mlconfig + MLConfig::mlconfig.removeArgument(); + this->M = &M; + if (useTF) { + populateFeatureVector(); + TFinitCommunication(); + return false; +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp } } @@ -308,6 +341,7 @@ void HelloMLBridge::initFeatureVector() { for (int i = 0; i < n; i++) { FeatureVector[i] = dis(gen); } +<<<<<<< HEAD } // New PM Registration @@ -331,4 +365,20 @@ llvm::PassPluginLibraryInfo getHelloMLBridgePluginInfo() { extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() { return getHelloMLBridgePluginInfo(); -} \ No newline at end of file +} +======= + +private: + std::unique_ptr MLRunner; + std::string basename; + BaseSerDes::Kind SerDesType; + Module *M; +}; + +} // namespace + +char HelloMLBridge::ID = 0; +static RegisterPass Z("hello-MLBridge", + "Hello World Pass (with MLBridge)"); + +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp diff --git a/llvm/lib/Transforms/Hello/Hello.cpp b/llvm/lib/Transforms/Hello/Hello.cpp index 46527e3933ee..6b6fe5477a64 100644 --- a/llvm/lib/Transforms/Hello/Hello.cpp +++ b/llvm/lib/Transforms/Hello/Hello.cpp @@ -20,6 +20,7 @@ using namespace llvm; #define DEBUG_TYPE "hello" STATISTIC(HelloCounter, "Counts number of functions greeted"); +<<<<<<< HEAD namespace { // Hello - The first implementation, without getAnalysisUsage. @@ -39,6 +40,8 @@ struct Hello : public FunctionPass { char Hello::ID = 0; static RegisterPass X("hello", "Hello World Pass"); +======= +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp namespace { // Hello2 - The second implementation with getAnalysisUsage implemented. struct Hello2 : public FunctionPass { diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index 57b378dd9f23..06580c7bb8ea 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -1,7 +1,6 @@ <<<<<<< HEAD ======= add_subdirectory(PosetRL) -add_subdirectory(CodeSizeOpt) >>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO add_llvm_component_library(LLVMipo @@ -60,6 +59,7 @@ add_llvm_component_library(LLVMipo COMPONENT_NAME IPO +<<<<<<< HEAD LINK_COMPONENTS AggressiveInstCombine Analysis @@ -83,3 +83,7 @@ add_llvm_component_library(LLVMipo target_link_libraries(LLVMipo PUBLIC LLVMPosetRL LLVMCodeSizeOpt) target_link_libraries(LLVMipo PRIVATE LLVMMLBridge) >>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO +======= +target_link_libraries(LLVMipo PUBLIC LLVMPosetRL) +target_link_libraries(LLVMipo PRIVATE LLVMMLBridge) +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp diff --git a/llvm/lib/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.cpp b/llvm/lib/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.cpp index fc3492abd33e..d3fb6e6c2935 100644 --- a/llvm/lib/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.cpp +++ b/llvm/lib/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.cpp @@ -1,10 +1,8 @@ -#include "llvm/Transforms/CodeSizeOpt/CodeSizeOpt.h" +#include "llvm/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.h" #include "MLModelRunner/MLModelRunner.h" #include "MLModelRunner/ONNXModelRunner/ONNXModelRunner.h" #include "MLModelRunner/Utils/MLConfig.h" #include "inference/CodeSizeOptEnv.h" -#include "llvm/ADT/Triple.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/IR2Vec.h" @@ -17,15 +15,17 @@ #include "llvm/Support/JSON.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/CodeSizeOpt/ActionSpace/ActionEnum.h" -#include "llvm/Transforms/CodeSizeOpt/ActionSpace/ActionHeaders.h" -#include "llvm/Transforms/CodeSizeOpt/ActionSpace/ActionSwitch.h" +#include "llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionEnum.h" +#include "llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionHeaders.h" +#include "llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionSwitch.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include #include #include #include +#define DEBUG_TYPE "codesizeopt" + using namespace llvm; namespace { @@ -33,7 +33,7 @@ struct CodeSizeOpt : public ModulePass, public CodeSizeOptEnv { static char ID; CodeSizeOpt() : ModulePass(ID) {} bool runOnModule(Module &M) override { - assert(MLConfig::mlconfig != "" && "ml-config-path required" ); + assert(MLConfig::mlconfig != "" && "ml-config-path required"); this->M = &M; llvm::Triple triple(M.getTargetTriple()); tlii_ = llvm::TargetLibraryInfoImpl(triple); @@ -50,23 +50,26 @@ struct CodeSizeOpt : public ModulePass, public CodeSizeOptEnv { return true; } - inline const llvm::TargetLibraryInfoImpl& tlii() const { return tlii_; } + inline const llvm::TargetLibraryInfoImpl &tlii() const { return tlii_; } - void addPassToPM(llvm::legacy::FunctionPassManager* PM, Pass* P) { - errs() << "Adding Pass: Profilesummaryinfo" << "\n"; + void addPassToPM(llvm::legacy::FunctionPassManager *PM, Pass *P) { + errs() << "Adding Pass: Profilesummaryinfo" + << "\n"; PM->add(new ProfileSummaryInfoWrapperPass()); - errs() << "Adding Pass: TargetLibraryInfo" << "\n"; + errs() << "Adding Pass: TargetLibraryInfo" + << "\n"; PM->add(new TargetLibraryInfoWrapperPass(tlii())); - errs() << "Adding Pass: TargetTransformInfo" << "\n"; + errs() << "Adding Pass: TargetTransformInfo" + << "\n"; PM->add(createTargetTransformInfoWrapperPass(TargetIRAnalysis())); errs() << "Adding Pass: " << P->getPassName() << "\n"; PM->add(P); } Embedding getEmbeddings() override { - auto Ir2vec = - IR2Vec::Embeddings(*M, IR2Vec::IR2VecMode::FlowAware, - MLConfig::mlconfig + "/ir2vec/seedEmbeddingVocab-300-llvm10.txt"); + auto Ir2vec = IR2Vec::Embeddings( + *M, IR2Vec::IR2VecMode::FlowAware, + MLConfig::mlconfig + "/ir2vec/seedEmbeddingVocab-300-llvm10.txt"); auto ProgVector = Ir2vec.getProgramVector(); Embedding Vector(ProgVector.begin(), ProgVector.end()); // errs() << "Embedding: "; diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index e0ebfdf44261..9b082d2a7ade 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -38,8 +38,12 @@ #include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Instrumentation.h" +<<<<<<< HEAD #include "llvm/Transforms/PosetRL/PosetRL.h" #include "llvm/Transforms/CodeSizeOpt/CodeSizeOpt.h" +======= +#include "llvm/Transforms/IPO/PosetRL/PosetRL.h" +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/InstSimplifyPass.h" @@ -59,9 +63,6 @@ static cl::opt OPosetRL("OPosetRL", cl::init(false), cl::Hidden, cl::desc("poset rl pass sequence")); -static cl::opt - OCodeSizeOpt("OCodeSizeOpt", cl::init(false), cl::Hidden, - cl::desc("codesize opt pass sequence")); static cl::opt RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Run Partial inlinining pass")); @@ -1448,12 +1449,6 @@ void PassManagerBuilder::populateModulePassManager( return; } - if(OCodeSizeOpt) { - errs() << "opt level "<< OptLevel << " SizeLevel " << SizeLevel << "\n"; - MPM.add(createCodeSizeOptPass()); - return; - } - if(!RunNoPreDistributionPasses){ if (!PGOSampleUse.empty()) { MPM.add(createPruneEHPass()); diff --git a/llvm/lib/Transforms/demoGrpcPass/CMakeLists.txt b/llvm/lib/Transforms/demoGrpcPass/CMakeLists.txt index 10084ba343b8..5d17af12224e 100644 --- a/llvm/lib/Transforms/demoGrpcPass/CMakeLists.txt +++ b/llvm/lib/Transforms/demoGrpcPass/CMakeLists.txt @@ -18,8 +18,14 @@ add_llvm_library(HelloWorld MODULE BUILDTREE_ONLY DEPENDS intrinsics_gen + LLVMMLBridge + PLUGIN_TOOL opt ) +<<<<<<< HEAD # add_llvm_library(HelloWorld MODULE Hello.cpp) +======= +target_link_libraries(LLVMHelloGRPC PRIVATE LLVMMLBridge) +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp diff --git a/llvm/tools/opt/CMakeLists.txt b/llvm/tools/opt/CMakeLists.txt index 1805d68d1205..97626fb73e48 100644 --- a/llvm/tools/opt/CMakeLists.txt +++ b/llvm/tools/opt/CMakeLists.txt @@ -30,8 +30,11 @@ set(LLVM_LINK_COMPONENTS Vectorize Passes <<<<<<< HEAD +<<<<<<< HEAD ======= HelloMLBridge +======= +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp AddSizeAttr IR2Vec CollectMachineIR diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index a9248efa189f..a4aff0b82745 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -424,6 +424,11 @@ int main(int argc, char **argv) { initializeTransformUtils(Registry); initializeInstCombine(Registry); initializeTarget(Registry); +<<<<<<< HEAD +======= + initializePosetRLPass(Registry); + initializeAddSizeAttrPassPass(Registry); +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp // For codegen passes, only passes that do IR to IR transformation are // supported. initializeExpandLargeDivRemLegacyPassPass(Registry); From 2a0d24b187280204d96af7ac9fb7adaf49dbe2e1 Mon Sep 17 00:00:00 2001 From: Soumyaworks Date: Mon, 22 Jan 2024 01:34:15 +0530 Subject: [PATCH 13/52] Changing the number of optimization sequences (cherry picked from commit 4c458f18f5e4f3c964ac06e93dbe0aa59e645ef9) --- .../IPO/PosetRL/inference/poset_rl_env.h | 71 +++++++++++++++++++ model/POSET-RL/Environment_pipe.py | 2 +- 2 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 llvm/lib/Transforms/IPO/PosetRL/inference/poset_rl_env.h diff --git a/llvm/lib/Transforms/IPO/PosetRL/inference/poset_rl_env.h b/llvm/lib/Transforms/IPO/PosetRL/inference/poset_rl_env.h new file mode 100644 index 000000000000..f155339b6a24 --- /dev/null +++ b/llvm/lib/Transforms/IPO/PosetRL/inference/poset_rl_env.h @@ -0,0 +1,71 @@ +#include "MLModelRunner/ONNXModelRunner/environment.h" +#include "MLModelRunner/ONNXModelRunner/utils.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include + +#define ActionMaskSize 34 +#define EmbeddingSize 300 + +using namespace llvm; + +typedef std::vector Embedding; +typedef std::vector ActionMask; + +using namespace MLBridge; + +class PosetRLEnv : public Environment { + unsigned Actioncount = 0; + Embedding CurrEmbedding; + ActionMask CurrActionMask; + Observation CurrObs; +public: + std::vector Sequence; +public: + PosetRLEnv(); + Observation& reset() override; + Observation& step(Action) override; + virtual Embedding getEmbeddings() = 0; + virtual void applySeq(Action) = 0; +}; + +inline Observation& PosetRLEnv::step(Action Action) { + Sequence.push_back(Action); + applySeq(Action); + + Actioncount += 1; + CurrActionMask[Action % ActionMaskSize] = 0; + CurrEmbedding = getEmbeddings(); + + if (Actioncount >= 34) + setDone(); + + CurrObs.clear(); + std::copy(CurrActionMask.begin(), CurrActionMask.end(), + std::back_inserter(CurrObs)); + std::copy(CurrEmbedding.begin(), CurrEmbedding.end(), + std::back_inserter(CurrObs)); + + return CurrObs; +} + +inline Observation& PosetRLEnv::reset() { + CurrEmbedding = getEmbeddings(); + CurrActionMask.assign(ActionMaskSize, 1); + + CurrObs.clear(); + std::copy(CurrActionMask.begin(), CurrActionMask.end(), + std::back_inserter(CurrObs)); + std::copy(CurrEmbedding.begin(), CurrEmbedding.end(), + std::back_inserter(CurrObs)); + + return CurrObs; +} + +inline PosetRLEnv::PosetRLEnv() { + CurrEmbedding.assign(EmbeddingSize, 0); + CurrActionMask.assign(ActionMaskSize, 1); + setNextAgent("agent"); +} diff --git a/model/POSET-RL/Environment_pipe.py b/model/POSET-RL/Environment_pipe.py index 983908bbe1a1..93ab59c99727 100755 --- a/model/POSET-RL/Environment_pipe.py +++ b/model/POSET-RL/Environment_pipe.py @@ -327,7 +327,7 @@ def step(self, action_index): self.cur_obs = next_observation # Max number of actions (optimaztions sub-sequences) to be applied - if self.action_count >= 15: + if self.action_count >= 34: done = True logging.info(self.cur_action_seq) if self.mode == 'inference': From 4cab513d45edd52eca1f6424dc55237beb7dd30b Mon Sep 17 00:00:00 2001 From: Soumyaworks Date: Mon, 22 Jan 2024 01:41:33 +0530 Subject: [PATCH 14/52] Checkpoints interval related change (cherry picked from commit 9208efddbf427331aac96a028ff193460ad3c4b9) --- model/POSET-RL/experiment.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/model/POSET-RL/experiment.py b/model/POSET-RL/experiment.py index 2aacf21a6ced..ae57a1440d86 100755 --- a/model/POSET-RL/experiment.py +++ b/model/POSET-RL/experiment.py @@ -74,11 +74,12 @@ def experiment(config): for i in range(iterations): train_results = train_agent.train() + if i == iterations - 1 or i%10 == 0: + tune.report(**train_results) + checkpoint = train_agent.save(tune.get_trial_dir()) # train_agent.export_policy_model("/home/cs20btech11018/repos/ML-Phase-Ordering/RLLib-PhaseOrder/poset-RL-onnx-model", onnx=int(os.getenv("ONNX_OPSET", "11"))) # break - - checkpoint = train_agent.save(tune.get_trial_dir()) train_agent.stop() if __name__ == '__main__': From 0253a80bcf766892a44baf9f62516a23dd9745e3 Mon Sep 17 00:00:00 2001 From: Soumyaworks Date: Mon, 22 Jan 2024 01:43:59 +0530 Subject: [PATCH 15/52] Fixing the onnx model dump command (cherry picked from commit f774564df3ebd6c2e0b426a279ce5ec70e4a83c4) --- model/POSET-RL/inference.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/model/POSET-RL/inference.py b/model/POSET-RL/inference.py index 525dfb9c3a08..dc9e9e2466bb 100755 --- a/model/POSET-RL/inference.py +++ b/model/POSET-RL/inference.py @@ -177,8 +177,9 @@ def env_creator(env_config): self.train_agent.restore(checkpoint) self.config = config - - # torch.onnx.export(self.train_agent.get_policy().model, ({"obs": torch.randn(1, 334)}, {}), f="/Pramana/ML_LLVM_Tools/ml-llvm-project/onnx_checkpoints_posetrl/posetrl_model.onnx", verbose=True, input_names=["obs"], output_names=["output"]) + # DO NOT DELETE THE BELOW LINE , uncomment to dump the onnx model from checkpoint + # torch.onnx.export(self.train_agent.get_policy().model, ({"obs": torch.randn(1, 334)}, {}), export_params=True, f="/home/cs22mtech12011/Hackathon/ml-llvm-project/model/POSET-RL/onnx-model/posetrl_model-test.onnx", verbose=True, input_names=["obs"], output_names=["output"]) + def dot_to_json(self, dot_): py_dot_graph = pydot.graph_from_dot_data(dot_)[0] From 24e13855d808f704b36b3bea0263fdcb2a282067 Mon Sep 17 00:00:00 2001 From: Soumyaworks Date: Mon, 22 Jan 2024 19:33:23 +0530 Subject: [PATCH 16/52] Removing ir2vec related dependencies and recursive code cleanup (cherry picked from commit 9f81943cf9d3144fd007ef51f81e68bad2c5c294) --- model/POSET-RL/Environment_pipe.py | 109 ++++++++++++++++++++++++----- model/POSET-RL/inference.py | 12 ++-- 2 files changed, 97 insertions(+), 24 deletions(-) diff --git a/model/POSET-RL/Environment_pipe.py b/model/POSET-RL/Environment_pipe.py index 93ab59c99727..1634958e81a7 100755 --- a/model/POSET-RL/Environment_pipe.py +++ b/model/POSET-RL/Environment_pipe.py @@ -45,7 +45,7 @@ def __init__(self, config): self.embedding = None self.iteration_counter = 0 self.rename_Dir = False - self.FileSys_Obj = fsystem(config["llvm_dir"], config["ir2vec_dir"]) + self.FileSys_Obj = fsystem(config["llvm_dir"], f"{CONFIG_DIR}/ir2vec") self.FileSys_Obj.createFolder("env") self.temporaryDirectory = tempfile.gettempdir() @@ -119,17 +119,17 @@ def make(self, TrainingPath): self.FileSys_Obj.generateTrainingData(TrainingPath) self.Obs = self.FileSys_Obj.LLFileList - def getEmbedding(self, fileName) : - EmbFile = self.Curr_Dir + "/" + str(self.StateIndex) - # Get IR2Vec FlowAware embeddings - command = self.FileSys_Obj.IR2VecBin + " -fa -vocab " + \ - self.FileSys_Obj.SeedEmbeddingPath + " -o " + EmbFile + " -level p " + fileName - os.system(command) - emb = np.loadtxt(EmbFile) - # Threshold for embedding values - emb[emb > 100000.0] = 100000.0 - emb[emb < -100000.0] = -100000.0 - return emb + # def getEmbedding(self, fileName) : + # EmbFile = self.Curr_Dir + "/" + str(self.StateIndex) + # # Get IR2Vec FlowAware embeddings + # command = self.FileSys_Obj.IR2VecBin + " -fa -vocab " + \ + # self.FileSys_Obj.SeedEmbeddingPath + " -o " + EmbFile + " -level p " + fileName + # os.system(command) + # emb = np.loadtxt(EmbFile) + # # Threshold for embedding values + # emb[emb > 100000.0] = 100000.0 + # emb[emb < -100000.0] = -100000.0 + # return emb def createEnv(self, fileName): # env folder will contain folders for separate files with ll and executables @@ -225,8 +225,8 @@ def reset(self, test_file=None, embedding=None): self.embedding = np.array(embedding) else: self.embedding = self.stable_grpc("Action", 0) # LLVMgRPC way - else: - self.embedding = self.getEmbedding(self.BaseIR) + # else: + # self.embedding = self.getEmbedding(self.BaseIR) action_mask = [1] * self.action_space_size next_observation = {'action_mask': np.array( @@ -310,10 +310,10 @@ def step(self, action_index): result = self.readObservation() elif self.use_grpc: result = self.stable_grpc("Action", action_index) # LLVMgRPC way - else: - Reward, NextStateIR = self.getLocalReward(action_index) - result = self.getEmbedding(NextStateIR) - self.CurrIR = NextStateIR + # else: + # Reward, NextStateIR = self.getLocalReward(action_index) + # result = self.getEmbedding(NextStateIR) + # self.CurrIR = NextStateIR if result is None: raise Exception("result is None") else: @@ -397,6 +397,7 @@ def getMCACost(self, new_file): return currMcaThroughtput # Get reward for an action +<<<<<<< HEAD def getLocalReward(self, action): self.StateIndex += 1 fileName = os.path.splitext(os.path.basename(self.BaseIR))[0] @@ -467,6 +468,78 @@ def getLocalReward(self, action): reward = self.alpha*reward_binarySize + self.beta*mca_cost return reward, new_IR +======= + # def getLocalReward(self, action): + # self.StateIndex += 1 + # fileName = os.path.splitext(os.path.basename(self.BaseIR))[0] + + # logging.info("fileName {}".format(fileName)) + # logging.info("StateIndex {}".format(self.StateIndex)) + # logging.info("BaseIR {}".format(self.CurrIR)) + + # # Modified IR path + # new_IR = self.Curr_Dir + "/" + fileName + \ + # "_" + str(self.StateIndex) + ".ll" + # new_file = self.Curr_Dir + "/" + fileName + "_" + str(self.StateIndex) + + # # Applying the action and saving the IR file as _ + # # Here we can use gRPC server to apply the action + # command = self.FileSys_Obj.OptPath + " " + self.opt_arch_flag + \ + # " -S -O34 -SubNum=" + str(action) + " " + \ + # self.CurrIR + " -o " + new_IR + # os.system(command) + # command = self.FileSys_Obj.ClangPath + " " + \ + # self.clang_arch_flag + " -c " + new_IR + " -o " + new_file + ".o" + # os.system(command) + # # Size reward + # currBinarySize = os.path.getsize(new_file + ".o") + + # logging.info("lastBinarySize {}".format(self.lastBinarySize)) + # logging.info("currBinarySize {}".format(currBinarySize)) + + # if ((self.baseBinarySize - self.minBinarySize) > 0): + # reward_binarySize = (self.lastBinarySize - currBinarySize) / \ + # (self.baseBinarySize - self.minBinarySize) + # else: + # reward_binarySize = (self.lastBinarySize - + # currBinarySize) / self.baseBinarySize + + # self.lastBinarySize = currBinarySize + + # # Throughput reward + # currMcaThroughtput = self.getMCACost(new_file) + # logging.info("currMcaThroughtput: {}".format(currMcaThroughtput)) + # logging.info("OzMcaThroughtput: {}".format(self.OzMcaThroughtput)) + # logging.info("lastMcaThroughtput: {}".format(self.lastMcaThroughtput)) + + # if self.lastMcaThroughtput is None: + # mca_cost = (self.OzMcaThroughtput - + # currMcaThroughtput) / self.OzMcaThroughtput + # else: + # mca_cost = (self.lastMcaThroughtput - + # currMcaThroughtput) / self.OzMcaThroughtput + + # self.lastMcaThroughtput = currMcaThroughtput + + # logging.info("Thr-debug:{}".format(mca_cost)) + # logging.info("Size-debug:{}".format(reward_binarySize)) + + # # Reward thresholds + # if mca_cost > self.mca_reward_thresh: + # mca_cost = self.mca_reward_thresh + # elif mca_cost < -self.mca_reward_thresh: + # mca_cost = -self.mca_reward_thresh + + # if reward_binarySize > self.size_reward_thresh: + # reward_binarySize = self.size_reward_thresh + # elif reward_binarySize < -self.size_reward_thresh: + # reward_binarySize = -self.size_reward_thresh + + # # Cumulative reward with alpha and beta hyperparameters + # reward = self.alpha*reward_binarySize + self.beta*mca_cost + + # return reward, new_IR +>>>>>>> 9f81943cf9d3... Removing ir2vec related dependencies and recursive code cleanup def getReward(self, AssemblyFilePath): # object size reward diff --git a/model/POSET-RL/inference.py b/model/POSET-RL/inference.py index dc9e9e2466bb..29200d256078 100755 --- a/model/POSET-RL/inference.py +++ b/model/POSET-RL/inference.py @@ -65,11 +65,11 @@ import traceback parser = argparse.ArgumentParser() -parser.add_argument( - "--ir2vec_dir", - required=False, - help="path to IR2vec directory which has seed embedding and IR2Vec binary files", -) +# parser.add_argument( +# "--ir2vec_dir", +# required=False, +# help="path to IR2vec directory which has seed embedding and IR2Vec binary files", +# ) parser.add_argument( "--test_dir", help="Path to test directory", required=False, default="./" ) @@ -143,7 +143,7 @@ def __init__(self, model_path, use_pipe=False, use_grpc=False, data_format="json "dump_type": "One", "intermediate_data": "./temp", "llvm_dir": BUILD_DIR, - "ir2vec_dir": args.ir2vec_dir, + # "ir2vec_dir": args.ir2vec_dir, "test_dir": args.test_dir, "alpha": args.alpha, "beta": args.beta, From 0a12ef2ac7b54c2f76e15793393325a60c190de2 Mon Sep 17 00:00:00 2001 From: Soumyaworks Date: Mon, 22 Jan 2024 20:22:08 +0530 Subject: [PATCH 17/52] Setting up flag for dumping onnx model and related changes (cherry picked from commit b94235bb2f56c8ab7e2a2ff18136536f628fe7d4) --- model/POSET-RL/inference.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/model/POSET-RL/inference.py b/model/POSET-RL/inference.py index 29200d256078..e725dadb91ed 100755 --- a/model/POSET-RL/inference.py +++ b/model/POSET-RL/inference.py @@ -105,8 +105,10 @@ ) parser.add_argument("--pipe_name",type=str,help="String Pipe name") parser.add_argument("--use_grpc", action='store_true', help = "Use grpc communication", required=False, default=False) +parser.add_argument("--export_onnx", action="store_true", help="Export the model to ONNX") + class PhaseOrderInference: - def __init__(self, model_path, use_pipe=False, use_grpc=False, data_format="json"): + def __init__(self, model_path, use_pipe=False, use_grpc=False, data_format="json", export_onnx=False): print("use_pipe {}".format(use_pipe)) logdir = "/tmp" logger = logging.getLogger(__file__) @@ -154,7 +156,8 @@ def __init__(self, model_path, use_pipe=False, use_grpc=False, data_format="json "data_format": data_format, "use_grpc": use_grpc, "server_port": args.server_port, - "pipe_name": args.pipe_name + "pipe_name": args.pipe_name, + "export_onnx": export_onnx }, "framework": "torch", "explore": False, @@ -177,8 +180,10 @@ def env_creator(env_config): self.train_agent.restore(checkpoint) self.config = config - # DO NOT DELETE THE BELOW LINE , uncomment to dump the onnx model from checkpoint - # torch.onnx.export(self.train_agent.get_policy().model, ({"obs": torch.randn(1, 334)}, {}), export_params=True, f="/home/cs22mtech12011/Hackathon/ml-llvm-project/model/POSET-RL/onnx-model/posetrl_model-test.onnx", verbose=True, input_names=["obs"], output_names=["output"]) + + # Dump the onnx model from the checkpoint + if args.export_onnx: + torch.onnx.export(self.train_agent.get_policy().model, ({"obs": torch.randn(1, 334)}, {}), export_params=True, f="/path/to/ml-llvm-project/model/POSET-RL/onnx-model/posetrl_model.onnx", verbose=True, input_names=["obs"], output_names=["output"]) def dot_to_json(self, dot_): @@ -256,7 +261,7 @@ def getAdvice(self, request, context): ray.init() inference_obj = PhaseOrderInference( - args.model, args.use_pipe, args.use_grpc, args.data_format + args.model, args.use_pipe, args.use_grpc, args.data_format, args.export_onnx ) if args.use_pipe: print("about to enter while loop...") From ecdb42fb9bfc0b08cbb7556adc271e85f734e83a Mon Sep 17 00:00:00 2001 From: anik314159 Date: Tue, 23 Jan 2024 19:59:25 +0530 Subject: [PATCH 18/52] Added default pipe name to posetrl_pipe (cherry picked from commit bbc24bb2020fd9c7e61a06424c5110537e27be3e) --- model/POSET-RL/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/POSET-RL/inference.py b/model/POSET-RL/inference.py index e725dadb91ed..1e036d322992 100755 --- a/model/POSET-RL/inference.py +++ b/model/POSET-RL/inference.py @@ -103,7 +103,7 @@ choices=["json", "protobuf", "bytes"], help="Data format to use for communication", ) -parser.add_argument("--pipe_name",type=str,help="String Pipe name") +parser.add_argument("--pipe_name",type=str,help="String Pipe name",default="posetrl_pipe") parser.add_argument("--use_grpc", action='store_true', help = "Use grpc communication", required=False, default=False) parser.add_argument("--export_onnx", action="store_true", help="Export the model to ONNX") From 958a6ca51fb2d784599a8ffd1792982e476b755d Mon Sep 17 00:00:00 2001 From: anik314159 Date: Tue, 23 Jan 2024 20:12:36 +0530 Subject: [PATCH 19/52] Static flag fix for repeat pass (cherry picked from commit c73fc570893c3a630043a9e3e3ab802f17ede9c5) --- .../llvm/Transforms/IPO/PassManagerBuilder.h | 262 ++++++++++++++++++ .../lib/Transforms/IPO/PassManagerBuilder.cpp | 12 +- 2 files changed, 271 insertions(+), 3 deletions(-) create mode 100644 llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h new file mode 100644 index 000000000000..5d5e77fe3444 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -0,0 +1,262 @@ +// llvm/Transforms/IPO/PassManagerBuilder.h - Build Standard Pass -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the PassManagerBuilder class, which is used to set up a +// "standard" optimization sequence suitable for languages like C and C++. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_PASSMANAGERBUILDER_H +#define LLVM_TRANSFORMS_IPO_PASSMANAGERBUILDER_H + +#include +#include +#include +#include + +namespace llvm { +class ModuleSummaryIndex; +class Pass; +class TargetLibraryInfoImpl; +class TargetMachine; + +// The old pass manager infrastructure is hidden in a legacy namespace now. +namespace legacy { +class FunctionPassManager; +class PassManagerBase; +} + +/// PassManagerBuilder - This class is used to set up a standard optimization +/// sequence for languages like C and C++, allowing some APIs to customize the +/// pass sequence in various ways. A simple example of using it would be: +/// +/// PassManagerBuilder Builder; +/// Builder.OptLevel = 2; +/// Builder.populateFunctionPassManager(FPM); +/// Builder.populateModulePassManager(MPM); +/// +/// In addition to setting up the basic passes, PassManagerBuilder allows +/// frontends to vend a plugin API, where plugins are allowed to add extensions +/// to the default pass manager. They do this by specifying where in the pass +/// pipeline they want to be added, along with a callback function that adds +/// the pass(es). For example, a plugin that wanted to add a loop optimization +/// could do something like this: +/// +/// static void addMyLoopPass(const PMBuilder &Builder, PassManagerBase &PM) { +/// if (Builder.getOptLevel() > 2 && Builder.getOptSizeLevel() == 0) +/// PM.add(createMyAwesomePass()); +/// } +/// ... +/// Builder.addExtension(PassManagerBuilder::EP_LoopOptimizerEnd, +/// addMyLoopPass); +/// ... +class PassManagerBuilder { +public: + /// Extensions are passed to the builder itself (so they can see how it is + /// configured) as well as the pass manager to add stuff to. + + + typedef std::function + ExtensionFn; + typedef int GlobalExtensionID; + + enum ExtensionPointTy { + /// EP_EarlyAsPossible - This extension point allows adding passes before + /// any other transformations, allowing them to see the code as it is coming + /// out of the frontend. + EP_EarlyAsPossible, + + /// EP_ModuleOptimizerEarly - This extension point allows adding passes + /// just before the main module-level optimization passes. + EP_ModuleOptimizerEarly, + + /// EP_LoopOptimizerEnd - This extension point allows adding loop passes to + /// the end of the loop optimizer. + EP_LoopOptimizerEnd, + + /// EP_ScalarOptimizerLate - This extension point allows adding optimization + /// passes after most of the main optimizations, but before the last + /// cleanup-ish optimizations. + EP_ScalarOptimizerLate, + + /// EP_OptimizerLast -- This extension point allows adding passes that + /// run after everything else. + EP_OptimizerLast, + + /// EP_VectorizerStart - This extension point allows adding optimization + /// passes before the vectorizer and other highly target specific + /// optimization passes are executed. + EP_VectorizerStart, + + /// EP_EnabledOnOptLevel0 - This extension point allows adding passes that + /// should not be disabled by O0 optimization level. The passes will be + /// inserted after the inlining pass. + EP_EnabledOnOptLevel0, + + /// EP_Peephole - This extension point allows adding passes that perform + /// peephole optimizations similar to the instruction combiner. These passes + /// will be inserted after each instance of the instruction combiner pass. + EP_Peephole, + + /// EP_LateLoopOptimizations - This extension point allows adding late loop + /// canonicalization and simplification passes. This is the last point in + /// the loop optimization pipeline before loop deletion. Each pass added + /// here must be an instance of LoopPass. + /// This is the place to add passes that can remove loops, such as target- + /// specific loop idiom recognition. + EP_LateLoopOptimizations, + + /// EP_CGSCCOptimizerLate - This extension point allows adding CallGraphSCC + /// passes at the end of the main CallGraphSCC passes and before any + /// function simplification passes run by CGPassManager. + EP_CGSCCOptimizerLate, + + /// EP_FullLinkTimeOptimizationEarly - This extensions point allow adding + /// passes that + /// run at Link Time, before Full Link Time Optimization. + EP_FullLinkTimeOptimizationEarly, + + /// EP_FullLinkTimeOptimizationLast - This extensions point allow adding + /// passes that + /// run at Link Time, after Full Link Time Optimization. + EP_FullLinkTimeOptimizationLast, + }; + + /// The Optimization Level - Specify the basic optimization level. + /// 0 = -O0, 1 = -O1, 2 = -O2, 3 = -O3 + unsigned OptLevel; + + /// SizeLevel - How much we're optimizing for size. + /// 0 = none, 1 = -Os, 2 = -Oz + unsigned SizeLevel; + + /// LibraryInfo - Specifies information about the runtime library for the + /// optimizer. If this is non-null, it is added to both the function and + /// per-module pass pipeline. + TargetLibraryInfoImpl *LibraryInfo; + + /// Inliner - Specifies the inliner to use. If this is non-null, it is + /// added to the per-module passes. + Pass *Inliner; + + /// The module summary index to use for exporting information from the + /// regular LTO phase, for example for the CFI and devirtualization type + /// tests. + ModuleSummaryIndex *ExportSummary = nullptr; + + /// The module summary index to use for importing information to the + /// thin LTO backends, for example for the CFI and devirtualization type + /// tests. + const ModuleSummaryIndex *ImportSummary = nullptr; + + bool DisableTailCalls; + bool DisableUnrollLoops; + bool SLPVectorize; + bool LoopVectorize; + bool LoopsInterleaved; + bool RerollLoops; + bool NewGVN; + bool DisableGVNLoadPRE; + bool ForgetAllSCEVInLoopUnroll; + bool VerifyInput; + bool VerifyOutput; + bool MergeFunctions; + bool PrepareForLTO; + bool PrepareForThinLTO; + bool PerformThinLTO; + bool DivergentTarget; + unsigned LicmMssaOptCap; + unsigned LicmMssaNoAccForPromotionCap; + + /// Enable profile instrumentation pass. + bool EnablePGOInstrGen; + /// Enable profile context sensitive instrumentation pass. + bool EnablePGOCSInstrGen; + /// Enable profile context sensitive profile use pass. + bool EnablePGOCSInstrUse; + /// Profile data file name that the instrumentation will be written to. + std::string PGOInstrGen; + /// Path of the profile data file. + std::string PGOInstrUse; + /// Path of the sample Profile data file. + std::string PGOSampleUse; + static std::string str_check; + + +private: + /// ExtensionList - This is list of all of the extensions that are registered. + std::vector> Extensions; + +public: + PassManagerBuilder(); + ~PassManagerBuilder(); + /// Adds an extension that will be used by all PassManagerBuilder instances. + /// This is intended to be used by plugins, to register a set of + /// optimisations to run automatically. + /// + /// \returns A global extension identifier that can be used to remove the + /// extension. + static GlobalExtensionID addGlobalExtension(ExtensionPointTy Ty, + ExtensionFn Fn); + /// Removes an extension that was previously added using addGlobalExtension. + /// This is also intended to be used by plugins, to remove any extension that + /// was previously registered before being unloaded. + /// + /// \param ExtensionID Identifier of the extension to be removed. + static void removeGlobalExtension(GlobalExtensionID ExtensionID); + void addExtension(ExtensionPointTy Ty, ExtensionFn Fn); + + +private: + void addExtensionsToPM(ExtensionPointTy ETy, + legacy::PassManagerBase &PM) const; + void addInitialAliasAnalysisPasses(legacy::PassManagerBase &PM) const; + void addLTOOptimizationPasses(legacy::PassManagerBase &PM); + void addLateLTOOptimizationPasses(legacy::PassManagerBase &PM); + void addPGOInstrPasses(legacy::PassManagerBase &MPM, bool IsCS); + void addFunctionSimplificationPasses(legacy::PassManagerBase &MPM); + void addInstructionCombiningPass(legacy::PassManagerBase &MPM) const; + +public: + /// populateFunctionPassManager - This fills in the function pass manager, + /// which is expected to be run on each function immediately as it is + /// generated. The idea is to reduce the size of the IR in memory. + void populateFunctionPassManager(legacy::FunctionPassManager &FPM); + void customPopulateFunctionPassManager(legacy::FunctionPassManager &FPM, unsigned customSizeLevel, unsigned subSeqNum); + + /// populateModulePassManager - This sets up the primary pass manager. + void populateModulePassManager(legacy::PassManagerBase &MPM); + void customPopulateModulePassManager(legacy::PassManagerBase &MPM, unsigned customSizeLevel, unsigned subSeqNum); + void populateLTOPassManager(legacy::PassManagerBase &PM); + void populateThinLTOPassManager(legacy::PassManagerBase &PM); +}; + +/// Registers a function for adding a standard set of passes. This should be +/// used by optimizer plugins to allow all front ends to transparently use +/// them. Create a static instance of this class in your plugin, providing a +/// private function that the PassManagerBuilder can use to add your passes. +class RegisterStandardPasses { + PassManagerBuilder::GlobalExtensionID ExtensionID; + +public: + RegisterStandardPasses(PassManagerBuilder::ExtensionPointTy Ty, + PassManagerBuilder::ExtensionFn Fn) { + ExtensionID = PassManagerBuilder::addGlobalExtension(Ty, std::move(Fn)); + } + + ~RegisterStandardPasses() { + // If the collection holding the global extensions is destroyed after the + // plugin is unloaded, the extension has to be removed here. Indeed, the + // destructor of the ExtensionFn may reference code in the plugin. + PassManagerBuilder::removeGlobalExtension(ExtensionID); + } +}; + +} // end namespace llvm +#endif diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 9b082d2a7ade..1c6ed8b804b5 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -58,7 +58,7 @@ using namespace llvm; - +std::string PassManagerBuilder::str_check = "unSet"; static cl::opt OPosetRL("OPosetRL", cl::init(false), cl::Hidden, cl::desc("poset rl pass sequence")); @@ -209,6 +209,8 @@ PassManagerBuilder::PassManagerBuilder() { PrepareForThinLTO = EnablePrepareForThinLTO; PerformThinLTO = EnablePerformThinLTO; DivergentTarget = false; + + } PassManagerBuilder::~PassManagerBuilder() { @@ -1444,8 +1446,11 @@ void PassManagerBuilder::populateModulePassManager( bool DefaultOrPreLinkPipeline = !PerformThinLTO; if (OPosetRL){ - errs() << "opt level "<< OptLevel << " SizeLevel " << SizeLevel << "\n"; - MPM.add(createPosetRLPass()); + if (str_check == "unSet"){ + errs() << "opt level "<< OptLevel << " SizeLevel " << SizeLevel << "\n"; + MPM.add(createPosetRLPass()); + } + str_check = "Set"; return; } @@ -2179,3 +2184,4 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, Builder->populateLTOPassManager(*LPM); } + From 1123aec90d252f591bd216e785e24b1171cb1032 Mon Sep 17 00:00:00 2001 From: anik314159 Date: Tue, 23 Jan 2024 22:26:24 +0530 Subject: [PATCH 20/52] Bool flag edited and spaces deleted (cherry picked from commit a6fde05cc3f442152138e04ed6a0612071b61f76) --- llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h | 2 +- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h index 5d5e77fe3444..e4e97c0f3ad0 100644 --- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -186,7 +186,7 @@ class PassManagerBuilder { std::string PGOInstrUse; /// Path of the sample Profile data file. std::string PGOSampleUse; - static std::string str_check; + static bool check_flag; private: diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 1c6ed8b804b5..a9bbd16dca5b 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -58,7 +58,7 @@ using namespace llvm; -std::string PassManagerBuilder::str_check = "unSet"; +bool PassManagerBuilder::check_flag = false; static cl::opt OPosetRL("OPosetRL", cl::init(false), cl::Hidden, cl::desc("poset rl pass sequence")); @@ -209,8 +209,6 @@ PassManagerBuilder::PassManagerBuilder() { PrepareForThinLTO = EnablePrepareForThinLTO; PerformThinLTO = EnablePerformThinLTO; DivergentTarget = false; - - } PassManagerBuilder::~PassManagerBuilder() { @@ -1446,11 +1444,11 @@ void PassManagerBuilder::populateModulePassManager( bool DefaultOrPreLinkPipeline = !PerformThinLTO; if (OPosetRL){ - if (str_check == "unSet"){ + if (check_flag == false){ errs() << "opt level "<< OptLevel << " SizeLevel " << SizeLevel << "\n"; MPM.add(createPosetRLPass()); } - str_check = "Set"; + check_flag = true; return; } @@ -2184,4 +2182,3 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, Builder->populateLTOPassManager(*LPM); } - From 4704aaf7f8cf8314eaced6c47bb9a7965abfa394 Mon Sep 17 00:00:00 2001 From: M V V S Manoj Kumar Date: Wed, 24 Jan 2024 15:53:58 +0530 Subject: [PATCH 21/52] Updated readme Added a basic structure to incorporate all info about the project, how to build it and also left space for people to fill in about their project (cherry picked from commit beae0d60d8feee0a9dcd60b0b084c7d3f395e08f) --- README.md | 208 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) diff --git a/README.md b/README.md index dbea0a16be14..386c7ed5d9d2 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,146 @@ +<<<<<<< HEAD # ML-LLVM-Project +======= +# ML LLVM Project + +## Contents +- About +- Setup + - Requirements + - Build +- All implemented Passes + +## About + +enter about + +## Setup + +### Requirements + +* cmake (>= 3.10) +* GNU Make (4.2.1) +* LLVM (10.X) - [src](https://github.com/llvm/llvm-project/tree/release/10.x), [release](https://releases.llvm.org/download.html#10.0.1) ## ask isn't it included with the repo +* Python (3.10), C++17 +* gRPC v1.34 and protobuf v3.13 - for gRPC Model Runner + * Building GRPC from Source: Please follow [`Build GRPC with cmake`](https://grpc.io/docs/languages/cpp/quickstart/) **v1.34 (protobuf v3.13)** to build GRPC from source. + * In the above tutorial setting `DCMAKE_INSTALL_PREFIX` is necessary as it would give you an easy way to uninstall GRPC later. +* [ONNXRuntime](https://github.com/microsoft/onnxruntime/releases) v1.16.3 +* TensorFlow - for TF Model Runner (AOT flow) # this should be in the yml only don't need to set it up separately + * Tested with TensorFlow 2.13.0 +* Other python requirements are available in [mlbridge.yml] # needs to be updated with sangamesh's.yml + * Conda/Anaconda based virtual environment is assumed + +(Experiments are done on an Ubuntu 20.04 machine) + +Commands to install the conda evironment and set up onnx + +```bash +#TODO: change this to what ever will be the location of the envs +cp -r /Pramana/ML_LLVM_Tools/AE/envs/ ~/ + +# install the env using the following commands +conda env create -f ~/env/LOF_original_env.yml +conda env create -f ~/env/mlgo-new + +wget https://github.com/microsoft/onnxruntime/releases/download/v1.16.3/onnxruntime-linux-x64-1.16.3.tgz +tar -xvf onnxruntime-linux-x64-1.16.3.tgz + +# get GRPC working +# check GRPC version +# check again it should be exactly 1.34.0 not 1.34.x + +``` + + +### Build + +Following are the requied steps to build the project, if you would like you could run them in a script too after changing the required parameters. + +```bash +# switch to mlgo-new env as you will need it to build the setup +conda activate mlgo-new + +# rename files in your conda enviornment +mv ~/anaconda3/envs/mlgo-new/lib/python3.10/site-packages/tensorflow/include/google/ ~/anaconda3/envs/mlgo-new/lib/python3.10/site-packages/tensorflow/include/google_new/ + +mv ~/anaconda3/envs/mlgo-new/include/google/ ~/anaconda3/envs/mlgo-new/include/google_new/ + +git clone git@github.com:IITH-Compilers/ml-llvm-project.git +cd ml-llvm-project +git checkout mlbridge-lib +git pull +git submodule update --init --recursive +mkdir build +cd build + +# build command + cmake -G "Unix Makefiles" -S ../llvm -B . \ + -DCMAKE_BUILD_TYPE="Release" \ + -DLLVM_ENABLE_PROJECTS="clang;IR2Vec;ml-llvm-tools;mlir;MLCompilerBridge" \ + -DLLVM_TARGETS_TO_BULID="X86" \ + -DLLVM_ENABLE_ASSERTIONS=on \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=1 \ + -DLLVM_CCACHE_BUILD=ON \ + -DONNXRUNTIME_ROOTDIR= # change to your path where you wget the onnxruntime + -DLLVM_TF_AOT_RUNTIME= # change to your path + -DTENSORFLOW_AOT_PATH= # change to your path + -DLLVM_INLINER_MODEL_PATH=download \ + -DLLVM_INLINER_MODEL_CURRENT_URL=https://github.com/google/ml-compiler-opt/releases/download/inlining-Oz-v1.1/inlining-Oz-99f0063-v1.1.tar.gz \ + -DLLVM_RAEVICT_MODEL_PATH=download \ + -DLLVM_RAEVICT_MODEL_CURRENT_URL=https://github.com/google/ml-compiler-opt/releases/download/regalloc-evict-v1.0/regalloc-evict-e67430c-v1.0.tar.gz + + +# don't make all +make clang opt -j50 +``` +## List of optimizations supported + +### Reinforcement Learning assisted Loop Distribution for Locality and Vectorization + +We propose a Reinforcement Learning (RL) approach for loop distribution, optimizing for both vectorization and locality. Using SCC Dependence Graphs (SDGs), our RL model learns loop distribution order through topological walks. The reward is based on instruction cost and cache misses. We introduce a strategy to expand the training set by generating new loops. This method aims to enhance loop parallelization and improve overall code performance. + +#### Try it out !!! + +> We assueme you have already done the setup and built the project. + +```bash +# ONNX command for inference: +# this script will generate the optimized llfile +./build/bin/opt -S \ + -custom_loop_distribution \ + -cld-use-onnx \ + -ml-config-path=/home/intern24007/ml-llvm-project/config \ + +``` +to learn more head to the Pass specific readme [here]. + +### RL4Real + +<\write info here\> + +#### Try it out +```bash +# write your bash commands here +``` + +### POSET-RL + +<\write info here\> + +#### Try it out +```bash +# write your bash commands here +``` + + +--- +Everthing after this is old . This is kept just for reference +--- + +# ML-Register-Allocation +> Support - LLVM 10.0.1 release on **X86** architecture +>>>>>>> beae0d60d8fe... Updated readme Added a basic structure to incorporate all info about the project, how to build it and also left space for people to fill in about their project This is a fork of LLVM repository with IR2Vec and other "tools" to facilitate training and inferencing Machine Learning models for compiler optimizations. @@ -37,7 +179,69 @@ Other components include: the [libc++ C++ standard library](https://libcxx.llvm.org), the [LLD linker](https://lld.llvm.org), and more. +<<<<<<< HEAD ## Getting the Source Code and Building LLVM +======= +`conda env create -f poset-rl-odg.yml` + +`conda activate poset-rl-odg` + +Generate sub-sequences from the Oz pass sequence + +`python gen-odg.py -Oz` + +The graph and sub-sequences can be generated for other LLVM optimization levels. The required optimization flag needs to be provided as an argument when calling the above script. + +## Experiments +Install and activate the conda environment + +`conda env create -f rllib_env.yml` + +`conda activate rllib_env` + +Use `-mcpu=cortex-a72` for AArch64 architecture when calling `clang` or `opt` in (RLLib-PhaseOrder/Environment.py)[RLLib-PhaseOrder/Environment.py] + +### Training + +Add path to directory containing LLVM IR files to be used for training in [RLLib-PhaseOrder/Environment.py](RLLib-PhaseOrder/Environment.py) + +`python experiment.py --llvm_dir --ir2vec_dir ` + +### Inference + +Add paths to `llvm_dir`, `ir2vec_dir` and saved RLLib model to run-inference.sh + +`bash run-inference.sh` + +Print size, throughput and sub-sequences chosen by the model to a csv + +`bash results-binsize-reuse` + +Clean temporary files generated + + * ``-DCMAKE_BUILD_TYPE=type`` --- Valid options for *type* are Debug, + Release, RelWithDebInfo, and MinSizeRel. Default is Debug. + + * ``-DLLVM_ENABLE_ASSERTIONS=On`` --- Compile with assertion checks enabled + (default is Yes for Debug builds, No for all other build types). + + * Run your build tool of choice! + + * The default target (i.e. ``ninja`` or ``make``) will build all of LLVM. + + * The ``check-all`` target (i.e. ``ninja check-all``) will run the + regression tests to ensure everything is in working order. + + * CMake will generate build targets for each tool and library, and most + LLVM sub-projects generate their own ``check-`` target. + + * Running a serial build will be *slow*. To improve speed, try running a + parallel build. That's done by default in Ninja; for ``make``, use + ``make -j NNN`` (NNN is the number of parallel jobs, use e.g. number of + CPUs you have.) + + * For more information see [CMake](https://llvm.org/docs/CMake.html) +>>>>>>> beae0d60d8fe... Updated readme Added a basic structure to incorporate all info about the project, how to build it and also left space for people to fill in about their project Consult the [Getting Started with LLVM](https://llvm.org/docs/GettingStarted.html#getting-the-source-code-and-building-llvm) @@ -52,5 +256,9 @@ Join the [LLVM Discourse forums](https://discourse.llvm.org/), [Discord chat](https://discord.gg/xS7Z362), or #llvm IRC channel on [OFTC](https://oftc.net/). +<<<<<<< HEAD The LLVM project has adopted a [code of conduct](https://llvm.org/docs/CodeOfConduct.html) for participants to all modes of communication within the project. +======= +`make clean` +>>>>>>> beae0d60d8fe... Updated readme Added a basic structure to incorporate all info about the project, how to build it and also left space for people to fill in about their project From 8718d30bd47c1185d174a079b8d7237209b42ffb Mon Sep 17 00:00:00 2001 From: anik314159 Date: Thu, 25 Jan 2024 21:15:58 +0530 Subject: [PATCH 22/52] edited spaces (cherry picked from commit 52d390eaada7a032335b08b186dc9a5b799c82c4) --- llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h index e4e97c0f3ad0..b1d94a15ace9 100644 --- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -59,8 +59,6 @@ class PassManagerBuilder { public: /// Extensions are passed to the builder itself (so they can see how it is /// configured) as well as the pass manager to add stuff to. - - typedef std::function ExtensionFn; @@ -188,7 +186,6 @@ class PassManagerBuilder { std::string PGOSampleUse; static bool check_flag; - private: /// ExtensionList - This is list of all of the extensions that are registered. std::vector> Extensions; @@ -211,7 +208,6 @@ class PassManagerBuilder { /// \param ExtensionID Identifier of the extension to be removed. static void removeGlobalExtension(GlobalExtensionID ExtensionID); void addExtension(ExtensionPointTy Ty, ExtensionFn Fn); - private: void addExtensionsToPM(ExtensionPointTy ETy, From 6599de6505def82fceb37c071826e6e46ec15f7f Mon Sep 17 00:00:00 2001 From: anik314159 Date: Sun, 28 Jan 2024 15:40:39 +0530 Subject: [PATCH 23/52] Adding Readme for Pass side (cherry picked from commit e525344f1e86e14b159e46fe32ee5186766ef57d) --- llvm/lib/Transforms/IPO/PosetRL/README.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 llvm/lib/Transforms/IPO/PosetRL/README.md diff --git a/llvm/lib/Transforms/IPO/PosetRL/README.md b/llvm/lib/Transforms/IPO/PosetRL/README.md new file mode 100644 index 000000000000..e69de29bb2d1 From 4f01740ea7544fc48ce188d832b9fc4fc54ddb42 Mon Sep 17 00:00:00 2001 From: Soumyaworks Date: Mon, 29 Jan 2024 19:50:41 +0530 Subject: [PATCH 24/52] Changes related to README.md (cherry picked from commit 385d7882490b827f9e6f744bf10aecd9a8d8ebce) --- model/POSET-RL/README.md | 52 +++++ model/POSET-RL/posetrl_env.yml | 206 +++++++++++++++++++ model/POSET-RL/src/.env.example | 3 + model/POSET-RL/{ => src}/Environment_pipe.py | 0 model/POSET-RL/{ => src}/Filesystem.py | 0 model/POSET-RL/src/collect-results.sh | 48 +++++ model/POSET-RL/{ => src}/experiment.py | 0 model/POSET-RL/{ => src}/inference.py | 0 model/POSET-RL/src/log_reader.py | 149 ++++++++++++++ model/POSET-RL/{ => src}/model.py | 0 model/POSET-RL/src/onnx.py | 4 + model/POSET-RL/src/po_config.py | 5 + 12 files changed, 467 insertions(+) create mode 100644 model/POSET-RL/README.md create mode 100644 model/POSET-RL/posetrl_env.yml create mode 100644 model/POSET-RL/src/.env.example rename model/POSET-RL/{ => src}/Environment_pipe.py (100%) rename model/POSET-RL/{ => src}/Filesystem.py (100%) create mode 100755 model/POSET-RL/src/collect-results.sh rename model/POSET-RL/{ => src}/experiment.py (100%) rename model/POSET-RL/{ => src}/inference.py (100%) create mode 100644 model/POSET-RL/src/log_reader.py rename model/POSET-RL/{ => src}/model.py (100%) create mode 100644 model/POSET-RL/src/onnx.py create mode 100644 model/POSET-RL/src/po_config.py diff --git a/model/POSET-RL/README.md b/model/POSET-RL/README.md new file mode 100644 index 000000000000..86b497ba534b --- /dev/null +++ b/model/POSET-RL/README.md @@ -0,0 +1,52 @@ +# POSET-RL + +POSET-RL uses a reinforcement learning approach as the search space of optimization sequences is too big to enumerate. For a compiler with m optimization passes, if the sequence length is fixed as n, then there can be potentially mn combinations, allowing repetitions. The reinforcement learning model is trained and evaluated on programs that are represented using IR2Vec embeddings. The action space contains the subsequences created using the Oz dependence graph (ODG). Sequences are constructed from this graph by finding walks that start and end at critical nodes (with degree greater than a value k).[slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf) +> POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta + +## Environment Setup + +- Copy the environment `.yml` files from `/path/to/ml-llvm-project/model/POSET-RL/posetrl_env.yml` to the home directory +- Setup the environment using the `.yml` using the following commands + + ```bash + conda env create -f posetrl_env.yml + ``` +## Setup Environment Variables + +- Create a `.env` file in the path `model\POSETRL\src`. +- The `.env` file contains the necessary environment variables. +- Refer `.env.example` present in `model\POSETRL\src` for setting the required variables. + +- `MODEL_DIR`= +- `BUILD_DIR`= +- `CONFIG_DIR`= + + +## Training +### gRPC + +```bash +cd ml-llvm-project/model/POSET_RL/src + +python experiment.py --llvm_dir= --train_dir= --train_iterations= --use_grpc +``` +### Pipes +```bash +cd ml-llvm-project/model/POSET_RL/src + +python experiment.py --llvm_dir= --train_dir= --train_iterations= --use_pipe --data_format= + +#Model will be generated as a pytorch checkpoint in ml-llvm-project/model/checkpoint_dir after every 10 epochs +#The output of the above generates the training logs +``` +### ONNX + The -export_onnx option in inference.py is responsible for dumping the onnx model + +```bash +cd ml-llvm-project/model/POSET_RL/src + +python inference.py --test-dir= --use_grpc --server_address= --model= --export_onnx + +# The model will be dumped inside the onnx-model directory residing inside /path/to/ml-llvm-project/model/POSET-RL/ +# Copy the generated onnx model from the above mentioned directory into /path/to/ml-llvm-project/config/posetrl +``` \ No newline at end of file diff --git a/model/POSET-RL/posetrl_env.yml b/model/POSET-RL/posetrl_env.yml new file mode 100644 index 000000000000..a1d2f73b3f32 --- /dev/null +++ b/model/POSET-RL/posetrl_env.yml @@ -0,0 +1,206 @@ +name: rllib_env_posetrl +channels: + - pytorch + - conda-forge + - anaconda + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _pytorch_select=0.2=gpu_0 + - absl-py=0.10.0=py37_0 + - aiohttp=3.6.2=py37h7b6447c_0 + - argon2-cffi=20.1.0=py37h4abf009_2 + - async-timeout=3.0.1=py37_0 + - async_generator=1.10=py_0 + - attrs=20.2.0=py_0 + - backcall=0.2.0=py_0 + - blas=1.0=mkl + - bleach=3.2.1=pyh9f0ad1d_0 + - blinker=1.4=py37_0 + - brotlipy=0.7.0=py37h7b6447c_1000 + - c-ares=1.16.1=h7b6447c_0 + - ca-certificates=2020.11.8=ha878542_0 + - cachetools=4.1.1=py_0 + - certifi=2020.11.8=py37h89c1867_0 + - cffi=1.14.2=py37he30daa8_0 + - chardet=3.0.4=py37_1003 + - click=7.1.2=py_0 + - cryptography=3.1.1=py37h1ba5d50_0 + - cudatoolkit=11.0.221=h6bb024c_0 + - cycler=0.10.0=py37_0 + - dbus=1.13.16=hb2f20db_0 + - decorator=4.4.2=py_0 + - defusedxml=0.6.0=py_0 + - entrypoints=0.3=pyhd8ed1ab_1003 + - expat=2.2.9=he6710b0_2 + - fontconfig=2.13.0=h9420a91_0 + - freetype=2.10.2=h5ab3b9f_0 + - glib=2.65.0=h3eb4bd4_0 + - google-auth-oauthlib=0.4.1=py_2 + - grpcio=1.31.0=py37hf8bcb03_0 + - gst-plugins-base=1.14.0=hbbd80ab_1 + - gstreamer=1.14.0=hb31296c_0 + - icu=58.2=he6710b0_3 + - idna=2.10=py_0 + - importlib-metadata=1.7.0=py37_0 + - importlib_metadata=1.7.0=0 + - intel-openmp=2020.2=254 + - ipykernel=5.3.4=py37h5ca1d4c_0 + - ipython=7.19.0=py37hb070fc8_0 + - ipython_genutils=0.2.0=py37_0 + - ipywidgets=7.5.1=pyh9f0ad1d_1 + - jedi=0.17.2=py37_0 + - jinja2=2.11.2=pyh9f0ad1d_0 + - jpeg=9b=h024ee3a_2 + - jsonschema=3.2.0=py_2 + - jupyter_client=6.1.7=py_0 + - jupyter_core=4.6.3=py37_0 + - jupyterlab_pygments=0.1.2=pyh9f0ad1d_0 + - kiwisolver=1.2.0=py37hfd86e86_0 + - lcms2=2.11=h396b838_0 + - ld_impl_linux-64=2.33.1=h53a641e_7 + - libedit=3.1.20191231=h14c3975_1 + - libffi=3.3=he6710b0_2 + - libgcc-ng=9.1.0=hdf63c60_0 + - libpng=1.6.37=hbc83047_0 + - libprotobuf=3.13.0=hd408876_0 + - libsodium=1.0.18=h7b6447c_0 + - libstdcxx-ng=9.1.0=hdf63c60_0 + - libtiff=4.1.0=h2733197_1 + - libuuid=1.0.3=h1bed415_2 + - libuv=1.40.0=h7b6447c_0 + - libxcb=1.14=h7b6447c_0 + - libxml2=2.9.10=he19cac6_1 + - lz4-c=1.9.2=he6710b0_1 + - markdown=3.2.2=py37_0 + - markupsafe=1.1.1=py37hb5d75c8_2 + - matplotlib=3.3.1=0 + - matplotlib-base=3.3.1=py37h817c723_0 + - mistune=0.8.4=py37h4abf009_1002 + - mkl=2020.2=256 + - mkl-service=2.3.0=py37he904b0f_0 + - mkl_fft=1.1.0=py37h23d657b_0 + - mkl_random=1.1.1=py37h0573a6f_0 + - multidict=4.7.6=py37h7b6447c_1 + - nbclient=0.5.1=py_0 + - nbconvert=6.0.7=py37h89c1867_3 + - nbformat=5.0.8=py_0 + - ncurses=6.2=he6710b0_1 + - nest-asyncio=1.4.3=pyhd8ed1ab_0 + - networkx=2.5=py_0 + - ninja=1.10.1=py37hfd86e86_0 + - notebook=6.1.5=py37h89c1867_0 + - numpy=1.19.1=py37hbc911f0_0 + - numpy-base=1.19.1=py37hfa32c7d_0 + - oauthlib=3.1.0=py_0 + - olefile=0.46=py37_0 + - openssl=1.1.1h=h516909a_0 + - packaging=20.4=pyh9f0ad1d_0 + - pandas=1.1.1=py37he6710b0_0 + - pandoc=2.11.2=h36c2ea0_0 + - pcre=8.44=he6710b0_0 + - pexpect=4.8.0=pyhd3eb1b0_3 + - pickleshare=0.7.5=py37_1001 + - pillow=7.2.0=py37hb39fc2d_0 + - pip=20.2.3=py37_0 + - prometheus_client=0.9.0=pyhd3deb0d_0 + - prompt-toolkit=3.0.8=py_0 + - ptyprocess=0.6.0=pyhd3eb1b0_2 + - pyasn1=0.4.8=py_0 + - pyasn1-modules=0.2.8=py_0 + - pycparser=2.20=py_2 + - pydot=1.3.0=py37_0 + - pygments=2.7.2=pyhd3eb1b0_0 + - pyjwt=1.7.1=py37_0 + - pyopenssl=19.1.0=py_1 + - pyparsing=2.4.7=py_0 + - pyqt=5.9.2=py37h22d08a2_1 + - pyrsistent=0.17.3=py37h4abf009_1 + - pysocks=1.7.1=py37_1 + - python=3.7.9=h7579374_0 + - python-dateutil=2.8.1=py_0 + - python_abi=3.7=1_cp37m + - pytorch=1.7.0=py3.7_cuda11.0.221_cudnn8.0.3_0 + - pytz=2020.1=py_0 + - qt=5.9.7=h5867ecd_1 + - readline=8.0=h7b6447c_0 + - requests=2.24.0=py_0 + - requests-oauthlib=1.3.0=py_0 + - rsa=4.6=py_0 + - send2trash=1.5.0=py_0 + - setuptools=49.6.0=py37_0 + - sip=4.19.24=py37he6710b0_0 + - six=1.15.0=py_0 + - sqlite=3.33.0=h62c20be_0 + - tensorboard=2.2.1=pyh532a8cf_0 + - tensorboard-plugin-wit=1.6.0=py_0 + - terminado=0.9.1=py37h89c1867_1 + - testpath=0.4.4=py_0 + - tk=8.6.10=hbc83047_0 + - torchaudio=0.7.0=py37 + - torchvision=0.8.1=py37_cu110 + - tornado=6.0.4=py37h7b6447c_1 + - tqdm=4.51.0=pyhd3eb1b0_0 + - traitlets=5.0.5=py_0 + - typing_extensions=3.7.4.3=py_0 + - urllib3=1.25.10=py_0 + - wcwidth=0.2.5=py_0 + - webencodings=0.5.1=py37_1 + - werkzeug=1.0.1=py_0 + - wheel=0.35.1=py_0 + - widgetsnbextension=3.5.1=py37h89c1867_4 + - xz=5.2.5=h7b6447c_0 + - yarl=1.5.1=py37h7b6447c_0 + - zeromq=4.3.3=he6710b0_3 + - zipp=3.1.0=py_0 + - zlib=1.2.11=h7b6447c_3 + - zstd=1.4.5=h9ceee32_0 + - pip: + - aiohttp-cors==0.7.0 + - aioredis==1.3.1 + - blessings==1.7 + - cached-property==1.5.2 + - cloudpickle==1.6.0 + - colorama==0.4.4 + - dataclasses==0.6 + - dm-tree==0.1.6 + - filelock==3.0.12 + - future==0.18.2 + - google-api-core==1.30.0 + - google-auth==1.32.0 + - googleapis-common-protos==1.53.0 + - gpustat==0.6.0 + - gym==0.18.3 + - h5py==3.1.0 + - hiredis==2.0.0 + - joblib==0.17.0 + - json5==0.9.5 + - jupyterlab==2.2.9 + - jupyterlab-server==1.2.0 + - keras==2.4.3 + - lz4==3.1.3 + - msgpack==1.0.2 + - nvidia-ml-py3==7.352.0 + - online-triplet-loss==0.0.4 + - opencensus==0.7.13 + - opencensus-context==0.1.2 + - opencv-python-headless==4.3.0.36 + - pandocfilters==1.4.3 + - parso==0.7.1 + - protobuf==3.17.3 + - psutil==5.8.0 + - py-spy==0.3.7 + - pydantic==1.8.2 + - pyglet==1.5.15 + - pyyaml==5.3.1 + - pyzmq==20.0.0 + - ray==1.4.0 + - redis==3.5.3 + - scikit-learn==0.23.2 + - scipy==1.5.4 + - sklearn==0.0 + - tabulate==0.8.9 + - tensorboardx==2.3 + - threadpoolctl==2.1.0 + - torchsummary==1.5.1 + - python-decouple diff --git a/model/POSET-RL/src/.env.example b/model/POSET-RL/src/.env.example new file mode 100644 index 000000000000..cb300b3de00d --- /dev/null +++ b/model/POSET-RL/src/.env.example @@ -0,0 +1,3 @@ +CONFIG_DIR = +BUILD_DIR = +MODEL_DIR = \ No newline at end of file diff --git a/model/POSET-RL/Environment_pipe.py b/model/POSET-RL/src/Environment_pipe.py similarity index 100% rename from model/POSET-RL/Environment_pipe.py rename to model/POSET-RL/src/Environment_pipe.py diff --git a/model/POSET-RL/Filesystem.py b/model/POSET-RL/src/Filesystem.py similarity index 100% rename from model/POSET-RL/Filesystem.py rename to model/POSET-RL/src/Filesystem.py diff --git a/model/POSET-RL/src/collect-results.sh b/model/POSET-RL/src/collect-results.sh new file mode 100755 index 000000000000..10fca844ed8a --- /dev/null +++ b/model/POSET-RL/src/collect-results.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Usage: bash collect-results.sh +# Prints size, throughput and pass sequence statistics to a csv file +INFERENCE_DIR=$1 +LLVM_BUILD_DIR=$2 +OUTPUT=$3 +echo "Filename, Oz .o size, Oz RThr, .o < Oz, Num for min size, Pass sequence, Model .o size, Model RThr, Complete .o, Complete RThr" > $OUTPUT +for dir in $INFERENCE_DIR/*; do + if [ -d $dir ] + then + echo -n "${dir##*/}, " >> $OUTPUT + if [ ! -f "${dir}/Oz_binary.o" ] + then + echo -n "No *_Oz.ll, " >> $OUTPUT + else + echo -n "$(du -b ${dir}/Oz_binary.o | awk '{print $1}'), " >> $OUTPUT + $LLVM_BUILD_DIR/bin/llc "${dir}/${dir##*/}_Oz.ll" -o "${dir##*/}_Oz.s" + echo -n "$($LLVM_BUILD_DIR/bin/llvm-mca ${dir##*/}_Oz.s | head -15 | grep 'Block RThroughput' | cut -f 2 -d':' | awk '{print $1}'), " >> $OUTPUT + rm "${dir##*/}_Oz.s" + fi + num="" + minsize=-1 + for i in $(seq 1 15); do + if [ -f "${dir}/${dir##*/}_${i}.o" ] + then + size=$(du -b ${dir}/${dir##*/}_${i}.o | awk '{print $1}') + if [[ $minsize -lt '0' ]] || [[ $minsize -ge $size ]] + then + num=$i + minsize=$size + fi + else + echo "No *_.ll" + fi + done + actionseq=$(cat actionlist.txt | grep -w "${dir##*/}.ll" | cut -f 1 -d ' ') + echo -n ", ${num}, ${actionseq}, ${minsize}, " >> $OUTPUT + echo -n "$($LLVM_BUILD_DIR/bin/llvm-mca ${dir}/${dir##*/}_${num}.s | head -15 | grep 'Block RThroughput' | cut -f 2 -d':' | awk '{print $1}')" >> $OUTPUT + if [ -f "${dir}/${dir##*/}_15.o" ] + then + size=$(du -b ${dir}/${dir##*/}_15.o | awk '{print $1}') + else + echo "No *_.ll" + fi + echo -n ", ${size}, " >> $OUTPUT + echo "$($LLVM_BUILD_DIR/bin/llvm-mca ${dir}/${dir##*/}_15.s | head -15 | grep 'Block RThroughput' | cut -f 2 -d':' | awk '{print $1}')" >> $OUTPUT + fi +done diff --git a/model/POSET-RL/experiment.py b/model/POSET-RL/src/experiment.py similarity index 100% rename from model/POSET-RL/experiment.py rename to model/POSET-RL/src/experiment.py diff --git a/model/POSET-RL/inference.py b/model/POSET-RL/src/inference.py similarity index 100% rename from model/POSET-RL/inference.py rename to model/POSET-RL/src/inference.py diff --git a/model/POSET-RL/src/log_reader.py b/model/POSET-RL/src/log_reader.py new file mode 100644 index 000000000000..147aca9d4ee6 --- /dev/null +++ b/model/POSET-RL/src/log_reader.py @@ -0,0 +1,149 @@ +"""Reader for training log. + +See lib/Analysis/TrainingLogger.cpp for a description of the format. +""" +import ctypes +import dataclasses +import io +import json +import math +import sys +from typing import List, Optional +from functools import reduce +import operator +import numpy + +_element_types = { + "float": ctypes.c_float, + "double": ctypes.c_double, + "int8_t": ctypes.c_int8, + "uint8_t": ctypes.c_uint8, + "int16_t": ctypes.c_int16, + "uint16_t": ctypes.c_uint16, + "int32_t": ctypes.c_int32, + "uint32_t": ctypes.c_uint32, + "int64_t": ctypes.c_int64, + "uint64_t": ctypes.c_uint64, +} + + +@dataclasses.dataclass(frozen=True) +class TensorSpec: + name: str + port: int + shape: List[int] + element_type: type + + @staticmethod + def from_dict(d: dict): + name = d["name"] + port = d["port"] + shape = [int(e) for e in d["shape"]] + element_type_str = d["type"] + if element_type_str not in _element_types: + raise ValueError(f"uknown type: {element_type_str}") + return TensorSpec( + name=name, + port=port, + shape=shape, + element_type=_element_types[element_type_str], + ) + + +class TensorValue: + def __init__(self, spec: TensorSpec, buffer: bytes): + self._spec = spec + self._buffer = buffer + self._view = ctypes.cast(self._buffer, ctypes.POINTER(self._spec.element_type)) + # self._len = math.prod(self._spec.shape) + self._len = reduce(operator.mul, self._spec.shape, 1) + # self._view = numpy.frombuffer(self._buffer, float) + # print("Value of", self._spec.name, "is:", self._view) + + def spec(self) -> TensorSpec: + return self._spec + + def __len__(self) -> int: + return self._len + + def __getitem__(self, index): + if index < 0 or index >= self._len: + raise IndexError(f"Index {index} out of range [0..{self._len})") + return self._view[index] + + +def read_tensor(fs: io.BufferedReader, ts: TensorSpec) -> TensorValue: + size = reduce(operator.mul, ts.shape, 1) * ctypes.sizeof(ts.element_type) + # size = math.prod(ts.shape) * ctypes.sizeof(ts.element_type) + data = fs.read(size) + return TensorValue(ts, data) + + +def pretty_print_tensor_value(tv: TensorValue): + print(f'{tv.spec().name}: {",".join([str(v) for v in tv])}') + + +def read_header(f: io.BufferedReader): + line = f.readline() + header = json.loads(line) + tensor_specs = [TensorSpec.from_dict(ts) for ts in header["features"]] + score_spec = TensorSpec.from_dict(header["score"]) if "score" in header else None + advice_spec = TensorSpec.from_dict(header["advice"]) if "advice" in header else None + return tensor_specs, score_spec, advice_spec + + +def read_one_observation( + context: Optional[str], + event_str: str, + f: io.BufferedReader, + tensor_specs: List[TensorSpec], + score_spec: Optional[TensorSpec], +): + features = [] + for ts in tensor_specs: + features.append(read_tensor(f, ts)) + f.readline() + return context, None, features, None + + +def read_stream(fname: str): + with io.BufferedReader(io.FileIO(fname, "rb")) as f: + tensor_specs, score_spec, _ = read_header(f) + context = None + while True: + event_str = f.readline() + if not event_str: + break + context, observation_id, features, score = read_one_observation( + context, event_str, f, tensor_specs, score_spec + ) + yield context, observation_id, features, score + +def read_stream2(f: io.BufferedReader): + context = None + while True: + tensor_specs, score_spec, _ = read_header(f) + # event_str = f.readline() + # print("Event: ", event_str) + # if not event_str: + # break + context, observation_id, features, score = read_one_observation( + context, '', f, tensor_specs, score_spec + ) + yield context, observation_id, features, score + +def main(args): + last_context = None + for ctx, obs_id, features, score in read_stream(args[1]): + if last_context != ctx: + print(f"context: {ctx}") + last_context = ctx + print(f"observation: {obs_id}") + for fv in features: + pretty_print_tensor_value(fv) + if score: + pretty_print_tensor_value(score) + + +if __name__ == "__main__": + main(sys.argv) diff --git a/model/POSET-RL/model.py b/model/POSET-RL/src/model.py similarity index 100% rename from model/POSET-RL/model.py rename to model/POSET-RL/src/model.py diff --git a/model/POSET-RL/src/onnx.py b/model/POSET-RL/src/onnx.py new file mode 100644 index 000000000000..2575b6d9dfd4 --- /dev/null +++ b/model/POSET-RL/src/onnx.py @@ -0,0 +1,4 @@ +import onnx +model_path = "/home/cs20mtech12003/ML-Phase-Ordering/RLLib-PhaseOrder/poset-RL-onnx-model/model.onnx" +model = onnx.load(model_path) +onnx.checker.check_model(model) diff --git a/model/POSET-RL/src/po_config.py b/model/POSET-RL/src/po_config.py new file mode 100644 index 000000000000..7ae280db3f0b --- /dev/null +++ b/model/POSET-RL/src/po_config.py @@ -0,0 +1,5 @@ +from decouple import config + +CONFIG_DIR = config('CONFIG_DIR') +BUILD_DIR = config('BUILD_DIR') +MODEL_DIR = config('MODEL_DIR') From fa1bfa77ec0658d57de078490d5399e829a92a35 Mon Sep 17 00:00:00 2001 From: anik314159 Date: Mon, 29 Jan 2024 19:59:57 +0530 Subject: [PATCH 25/52] Adding README (cherry picked from commit b670173dc9a5910ddc78fc1999567ad3446b0860) --- llvm/lib/Transforms/IPO/PosetRL/README.md | 46 +++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/llvm/lib/Transforms/IPO/PosetRL/README.md b/llvm/lib/Transforms/IPO/PosetRL/README.md index e69de29bb2d1..0ffb3475f056 100644 --- a/llvm/lib/Transforms/IPO/PosetRL/README.md +++ b/llvm/lib/Transforms/IPO/PosetRL/README.md @@ -0,0 +1,46 @@ +# POSET-RL +POSET-RL uses a reinforcement learning approach as the search space of optimization sequences is too big to enumerate. For a compiler with m optimization passes, if the sequence length is fixed as n, then there can be potentially mn combinations, allowing repetitions. The reinforcement learning model is trained and evaluated on programs that are represented using IR2Vec embeddings. The action space contains the subsequences created using the Oz dependence graph (ODG). Sequences are constructed from this graph by finding walks that start and end at critical nodes (with degree greater than a value k).[slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf) +> POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta + +# Environment setup +```bash + conda env create -f {LLVM_DIR}/model/POSET-RL/rllib_posetrl_env.yml +``` +# Inference on Pre-Trained Models +Currently we support 3 kinds of [ModelRunners](https://compilers.cse.iith.ac.in/publications/mlcompilerbridge) +- [ONNX Model Runner](#ONNX) - This is an in process model runner i.e doesnot require a server/client setup +- [gRPC Model Runner](#gRPC) - This uses gRPC which internally uses Protobuf and the protoc compiler to communicate +- [Pipe Model Runner](#Pipes) - This uses pipes to communicate information between the model and the compiler +## gRPC +Server Side: +```bash +cd ml-llvm-project/model/POSET_RL + +python inference.py --test-dir= / + --use_grpc --server_address=/ + --model= / +``` +Client Side: +```bash +#Open a new terminal +ml-llvm-project/build/bin/opt -poset-rl -use_grpc -ml-config-path=ml-llvm-project/config -server_address= -o / +``` +## Pipes +Through the pipe mode of communication we have 2 ways of Serialisation and Deserialisation of the data (SerDes) +json and bytes. + +Server Side: +```bash +cd ml-llvm-project/model/POSET_RL + +python inference.py --test-dir= --use_pipe --pipe_name= --data_format= --model= +``` +Client_side: +```bash +#Open a new terminal +ml-llvm-project/build/bin/opt -poset-rl -use-pipe -pipe-name= -data-format= -ml-config-path=ml-llvm-project/config -o +``` +## ONNX +```bash +ml-llvm-project/build/bin/opt -poset-rl -use-onnx -ml-config-path=ml-llvm-project/config -o +``` \ No newline at end of file From 0807b26ef83fdf1970cac726b360c45fb4388fdd Mon Sep 17 00:00:00 2001 From: Umesh-k26 Date: Tue, 30 Jan 2024 17:21:03 +0530 Subject: [PATCH 26/52] Fixed cmake linking issues (cherry picked from commit 92e0943e97692fcd4dd16c1ad7c15de08a54d3d8) --- MLCompilerBridge | 2 +- clang/tools/driver/CMakeLists.txt | 3 + llvm/include/llvm/LinkAllPasses.h | 4 + .../Transforms/IPO/AddSizeAttr/AddSizeAttr.h | 19 ++ llvm/lib/Support/CMakeLists.txt | 2 +- llvm/lib/Transforms/CMakeLists.txt | 3 + .../Transforms/Hello-MLBridge/CMakeLists.txt | 6 + .../IPO/AddSizeAttr/AddSizeAttr.cpp | 61 +++++ .../Transforms/IPO/AddSizeAttr/CMakeLists.txt | 12 + llvm/lib/Transforms/IPO/CMakeLists.txt | 5 + .../lib/Transforms/IPO/PassManagerBuilder.cpp | 2 +- llvm/lib/Transforms/IPO/PosetRL/posetRL.cpp | 209 ++++++++++++++++++ llvm/lib/Transforms/PosetRL/CMakeLists.txt | 2 +- .../Transforms/demoGrpcPass/CMakeLists.txt | 4 + llvm/tools/CMakeLists.txt | 3 + llvm/tools/opt/CMakeLists.txt | 3 + 16 files changed, 336 insertions(+), 4 deletions(-) create mode 100644 llvm/include/llvm/Transforms/IPO/AddSizeAttr/AddSizeAttr.h create mode 100644 llvm/lib/Transforms/IPO/AddSizeAttr/AddSizeAttr.cpp create mode 100644 llvm/lib/Transforms/IPO/AddSizeAttr/CMakeLists.txt create mode 100644 llvm/lib/Transforms/IPO/PosetRL/posetRL.cpp diff --git a/MLCompilerBridge b/MLCompilerBridge index 62439c9bc20c..8306513c3da3 160000 --- a/MLCompilerBridge +++ b/MLCompilerBridge @@ -1 +1 @@ -Subproject commit 62439c9bc20ce2b29e459575afcdd1a9c3e57a1a +Subproject commit 8306513c3da3e1bb86c2c82975685417100460e3 diff --git a/clang/tools/driver/CMakeLists.txt b/clang/tools/driver/CMakeLists.txt index c12899092427..c20dca0e0bbe 100644 --- a/clang/tools/driver/CMakeLists.txt +++ b/clang/tools/driver/CMakeLists.txt @@ -17,8 +17,11 @@ set( LLVM_LINK_COMPONENTS TransformUtils Vectorize <<<<<<< HEAD +<<<<<<< HEAD ======= AddSizeAttr +======= +>>>>>>> 92e0943e9769... Fixed cmake linking issues IR2Vec CollectMachineIR >>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 1673ceb26185..5d9067793db7 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -36,6 +36,10 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/Support/Valgrind.h" +<<<<<<< HEAD +======= +#include "llvm/Transforms/IPO/AddSizeAttr/AddSizeAttr.h" +>>>>>>> 92e0943e9769... Fixed cmake linking issues #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" diff --git a/llvm/include/llvm/Transforms/IPO/AddSizeAttr/AddSizeAttr.h b/llvm/include/llvm/Transforms/IPO/AddSizeAttr/AddSizeAttr.h new file mode 100644 index 000000000000..a958c5d63e1c --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/AddSizeAttr/AddSizeAttr.h @@ -0,0 +1,19 @@ +#ifndef LLVM_TRANSFORMS_ADDSIZEATTR_H +#define LLVM_TRANSFORMS_ADDSIZEATTR_H + +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/*class AddSizeAttrPass + : public ModulePass { + +public: + bool runOnModule(Module &M); +};*/ + +ModulePass *createAddSizeAttrPass(); +} + +#endif diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index f753714e4a9a..e5ba187e9538 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -338,4 +338,4 @@ if(LLVM_WITH_Z3) ) endif() -target_link_libraries(LLVMSupport PRIVATE LLVMMLBridge) \ No newline at end of file +target_link_libraries(LLVMSupport PUBLIC ModelRunnerUtils) \ No newline at end of file diff --git a/llvm/lib/Transforms/CMakeLists.txt b/llvm/lib/Transforms/CMakeLists.txt index f297328ca443..b50bb98d3950 100644 --- a/llvm/lib/Transforms/CMakeLists.txt +++ b/llvm/lib/Transforms/CMakeLists.txt @@ -16,9 +16,12 @@ add_subdirectory(ObjCARC) add_subdirectory(Coroutines) add_subdirectory(CFGuard) <<<<<<< HEAD +<<<<<<< HEAD add_subdirectory(Hello-MLBridge) ======= add_subdirectory(AddSizeAttr) +======= +>>>>>>> 92e0943e9769... Fixed cmake linking issues add_subdirectory(MCAInstrumentation) <<<<<<< HEAD add_subdirectory(PipeIR) diff --git a/llvm/lib/Transforms/Hello-MLBridge/CMakeLists.txt b/llvm/lib/Transforms/Hello-MLBridge/CMakeLists.txt index 6cec4e0179f2..d18fc4d9f304 100644 --- a/llvm/lib/Transforms/Hello-MLBridge/CMakeLists.txt +++ b/llvm/lib/Transforms/Hello-MLBridge/CMakeLists.txt @@ -29,7 +29,13 @@ add_llvm_library(LLVMHelloMLBridge MODULE BUILDTREE_ONLY PLUGIN_TOOL opt +<<<<<<< HEAD ) +======= + ) + +target_link_libraries(LLVMHelloMLBridge PUBLIC LLVMMLBridge) +>>>>>>> 92e0943e9769... Fixed cmake linking issues file(GLOB MODEL_OBJECTS ${CMAKE_CURRENT_SOURCE_DIR}/tf_model/*.o) diff --git a/llvm/lib/Transforms/IPO/AddSizeAttr/AddSizeAttr.cpp b/llvm/lib/Transforms/IPO/AddSizeAttr/AddSizeAttr.cpp new file mode 100644 index 000000000000..7cb571439dc1 --- /dev/null +++ b/llvm/lib/Transforms/IPO/AddSizeAttr/AddSizeAttr.cpp @@ -0,0 +1,61 @@ +//Add Code Size reduction attributes + +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/InitializePasses.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/IPO/AddSizeAttr/AddSizeAttr.h" +using namespace llvm; + +#define DEBUG_TYPE "addsizeattr" + +static cl::opt EnableMinSize("enableMinSizeAttr", cl::desc("Option to add minsize function attribute"), cl::value_desc("true/false")); +static cl::opt RemoveNoInline("removeNoInlineAttr", cl::desc("Option to remove noinline function attribute"), cl::value_desc("true/false")); + +namespace { + class AddSizeAttrPass : public ModulePass { + public: + static char ID; + + AddSizeAttrPass() : ModulePass(ID) { + initializeAddSizeAttrPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) { + for (Function &F : M) { + F.addFnAttr(Attribute::OptimizeForSize); + if(EnableMinSize){ + F.addFnAttr(Attribute::MinSize); + } + if(RemoveNoInline){ + F.removeFnAttr(Attribute::NoInline); + } + } + return true; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + } + }; +} + + +char AddSizeAttrPass::ID = 0; +INITIALIZE_PASS_BEGIN(AddSizeAttrPass, + "add-size-attr", + "Add Function Attributes that reduce code size", false, false) +INITIALIZE_PASS_END(AddSizeAttrPass, + "add-size-attr", + "Add Function Attributes that reduce code size", false, false) + +void llvm::initializeAddSizeAttr(PassRegistry &Registry) { + initializeAddSizeAttrPassPass(Registry); +} + +ModulePass *llvm::createAddSizeAttrPass() { + return new AddSizeAttrPass(); +} diff --git a/llvm/lib/Transforms/IPO/AddSizeAttr/CMakeLists.txt b/llvm/lib/Transforms/IPO/AddSizeAttr/CMakeLists.txt new file mode 100644 index 000000000000..1aa0dd5944bd --- /dev/null +++ b/llvm/lib/Transforms/IPO/AddSizeAttr/CMakeLists.txt @@ -0,0 +1,12 @@ +add_llvm_component_library(LLVMAddSizeAttr + AddSizeAttr.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/AddSizeAttr + + DEPENDS + intrinsics_gen + ) + +target_link_libraries(LLVMAddSizeAttr PUBLIC LLVMMLBridge) diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index 06580c7bb8ea..4e183f87e1b0 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -1,6 +1,7 @@ <<<<<<< HEAD ======= add_subdirectory(PosetRL) +add_subdirectory(AddSizeAttr) >>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO add_llvm_component_library(LLVMipo @@ -59,6 +60,7 @@ add_llvm_component_library(LLVMipo COMPONENT_NAME IPO +<<<<<<< HEAD <<<<<<< HEAD LINK_COMPONENTS AggressiveInstCombine @@ -87,3 +89,6 @@ target_link_libraries(LLVMipo PRIVATE LLVMMLBridge) target_link_libraries(LLVMipo PUBLIC LLVMPosetRL) target_link_libraries(LLVMipo PRIVATE LLVMMLBridge) >>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp +======= +target_link_libraries(LLVMipo PUBLIC LLVMPosetRL LLVMAddSizeAttr) +>>>>>>> 92e0943e9769... Fixed cmake linking issues diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index a9bbd16dca5b..c84329a1a494 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -29,7 +29,7 @@ #include "llvm/IR/Verifier.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Transforms/AddSizeAttr/AddSizeAttr.h" +#include "llvm/Transforms/IPO/AddSizeAttr/AddSizeAttr.h" #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/Attributor.h" diff --git a/llvm/lib/Transforms/IPO/PosetRL/posetRL.cpp b/llvm/lib/Transforms/IPO/PosetRL/posetRL.cpp new file mode 100644 index 000000000000..8212aa4e3244 --- /dev/null +++ b/llvm/lib/Transforms/IPO/PosetRL/posetRL.cpp @@ -0,0 +1,209 @@ +#include "llvm/Transforms/IPO/PosetRL/PosetRL.h" +#include "inference/poset_rl_env.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IR2Vec.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassSupport.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include +#include +#include "grpc/posetRL/posetRL.grpc.pb.h" +#include "grpc/posetRL/posetRL.pb.h" +#include +#include +#include +#include "MLModelRunner/MLModelRunner.h" +#include "MLModelRunner/ONNXModelRunner/ONNXModelRunner.h" +#include "MLModelRunner/PipeModelRunner.h" +#include "MLModelRunner/gRPCModelRunner.h" +#include "MLModelRunner/Utils/MLConfig.h" +#include "grpcpp/impl/codegen/status.h" + +using namespace llvm; +using namespace grpc; +using namespace posetRLgRPC; +// using namespace google::protobuf; + +static cl::opt training("training", cl::Hidden, + cl::desc("whether it is training or inference"), + cl::init(false)); + +static cl::opt + usePipe("use-pipe", cl::Hidden, + cl::desc("Use pipe based interation with python model"), + cl::init(false)); + +static cl::opt data_format( + "data-format", cl::Hidden, cl::init("protobuf"), + cl::desc("Data format to use for communication with python model")); +static cl::opt useONNX("use-onnx", cl::Hidden, + cl::desc("Use ONNX for inferencing model"), + cl::init(false)); + +static cl::opt server_address( + "server_address", cl::Hidden, + cl::desc("Starts the server in the given address, format :"), + cl::init("127.0.0.1:50051")); + +static cl::opt pipe_name("pipe-name", cl::Hidden, + cl::init("posetrl_pipe"), + cl::desc("Name for pipe file")); + +namespace { +struct PosetRL : public ModulePass, + public PosetRLEnv, + public posetRLgRPC::PosetRLService::Service { + static char ID; + PosetRL() : ModulePass(ID) {} + bool runOnModule(Module &M) override { + assert(MLConfig::mlconfig != "" && "ml-config-path required" ); + this->M = &M; + // Establish pipe communication + if (usePipe) { + // data_format can take values: protobuf, json, bytes + std::string basename = + "/tmp/" + pipe_name; + + BaseSerDes::Kind SerDesType; + if (data_format == "json") + SerDesType = BaseSerDes::Kind::Json; + else if (data_format == "protobuf") + SerDesType = BaseSerDes::Kind::Protobuf; + else if (data_format == "bytes") + SerDesType = BaseSerDes::Kind::Bitstream; + else { + errs() << "Invalid data format\n"; + exit(1); + } + + MLRunner = std::make_unique( + basename + ".out", basename + ".in", SerDesType, &M.getContext()); + posetRLgRPC::EmbeddingResponse response; + posetRLgRPC::ActionRequest request; + MLRunner->setRequest(&response); + MLRunner->setResponse(&request); + initPipeCommunication(); + } else { + if (training) { + MLRunner = std::make_unique>(server_address, this, &M.getContext()); + } else if (useONNX) { + Agent agent(MLConfig::mlconfig + + "/posetrl/posetrl_model.onnx"); + std::map agents; + agents["agent"] = &agent; + MLRunner = + std::make_unique(this, agents, &M.getContext()); + MLRunner->evaluate(); + errs() << "Sequence: "; + for (auto a : Sequence) + errs() << a << " "; + errs() << "\n"; + } else { + posetRLgRPC::EmbeddingResponse request; + posetRLgRPC::ActionRequest response; + MLRunner = std::make_unique>( + server_address, &request, &response, &M.getContext()); + MLRunner->setRequest(&request); + MLRunner->setResponse(&response); + initPipeCommunication(); + } + } + return true; + } + void initPipeCommunication() { + int passSequence = 0; + while (passSequence != -1) { + std::pair> p1("embedding", + getEmbeddings()); + MLRunner->populateFeatures(p1); + int Res = MLRunner->evaluate(); + processMLAdvice(Res); + passSequence = Res; + errs() << "Sequence : " << passSequence << "\t"; + } + } + + inline void processMLAdvice(int advice) { applySeq(advice); } + + Embedding getEmbeddings() override { + auto Ir2vec = + IR2Vec::Embeddings(*M, IR2Vec::IR2VecMode::FlowAware, + MLConfig::mlconfig + "/ir2vec/seedEmbeddingVocab-300-llvm10.txt"); + auto ProgVector = Ir2vec.getProgramVector(); + Embedding Vector(ProgVector.begin(), ProgVector.end()); + return Vector; + } + + void applySeq(Action Action) override { + PassManagerBuilder Builder; + Builder.OptLevel = 2; + Builder.SizeLevel = 2; + + legacy::FunctionPassManager FPM(M); + legacy::PassManager MPM; + Builder.customPopulateFunctionPassManager(FPM, 34, Action); + Builder.customPopulateModulePassManager(MPM, 34, Action); + // run the passes + MPM.run(*M); + for (auto &F : *M) { + FPM.run(F); + } + } + + grpc::Status + applyActionGetEmbeddings(grpc::ServerContext *context, + const ::posetRLgRPC::ActionRequest *request, + ::posetRLgRPC::EmbeddingResponse *response) override { + // errs() << "Action requested: " << request->action() << "\n"; + if (request->action() == -1) { + return grpc::Status::OK; + } + if (request->action() != 0) + processMLAdvice(request->action()); + + Embedding emb = getEmbeddings(); + for (unsigned long i = 0; i < emb.size(); i++) { + response->add_embedding(emb[i]); + } + return grpc::Status::OK; + } + + grpc::Status + queryCompiler(grpc::ServerContext *context, + const ::posetRLgRPC::ActionRequest *request, + ::posetRLgRPC::EmbeddingResponse *response) { + if (request->action() == -1) { + return grpc::Status::OK; + } else if (request->action() != 0) + processMLAdvice(request->action()); + + Embedding emb = getEmbeddings(); + for (unsigned long i = 0; i < emb.size(); i++) { + response->add_embedding(emb[i]); + } + return grpc::Status::OK; + } + +private: + Module *M; + std::unique_ptr MLRunner; +}; +} // namespace +char PosetRL::ID = 0; +INITIALIZE_PASS_BEGIN(PosetRL, "poset-rl", "poset sequence pass", false, false) +INITIALIZE_PASS_END(PosetRL, "poset-rl", "poset sequence pass", false, false) + +ModulePass *llvm::createPosetRLPass() { return new PosetRL(); } diff --git a/llvm/lib/Transforms/PosetRL/CMakeLists.txt b/llvm/lib/Transforms/PosetRL/CMakeLists.txt index 4230cb5f8252..8a051d08343a 100644 --- a/llvm/lib/Transforms/PosetRL/CMakeLists.txt +++ b/llvm/lib/Transforms/PosetRL/CMakeLists.txt @@ -11,4 +11,4 @@ DEPENDS intrinsics_gen LLVMMLBridge ) -target_link_libraries(LLVMPosetRL PRIVATE LLVMMLBridge) +target_link_libraries(LLVMPosetRL PUBLIC LLVMMLBridge) diff --git a/llvm/lib/Transforms/demoGrpcPass/CMakeLists.txt b/llvm/lib/Transforms/demoGrpcPass/CMakeLists.txt index 5d17af12224e..3fb67ee47fac 100644 --- a/llvm/lib/Transforms/demoGrpcPass/CMakeLists.txt +++ b/llvm/lib/Transforms/demoGrpcPass/CMakeLists.txt @@ -24,8 +24,12 @@ add_llvm_library(HelloWorld MODULE BUILDTREE_ONLY opt ) +<<<<<<< HEAD <<<<<<< HEAD # add_llvm_library(HelloWorld MODULE Hello.cpp) ======= target_link_libraries(LLVMHelloGRPC PRIVATE LLVMMLBridge) >>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp +======= +target_link_libraries(LLVMHelloGRPC PUBLIC LLVMMLBridge) +>>>>>>> 92e0943e9769... Fixed cmake linking issues diff --git a/llvm/tools/CMakeLists.txt b/llvm/tools/CMakeLists.txt index 2a7ca1bfa84d..8cd62085956c 100644 --- a/llvm/tools/CMakeLists.txt +++ b/llvm/tools/CMakeLists.txt @@ -55,7 +55,10 @@ add_llvm_external_project(bolt) add_llvm_implicit_projects() add_llvm_external_project(polly) +<<<<<<< HEAD add_llvm_external_project(MLCompilerBridge) +======= +>>>>>>> 92e0943e9769... Fixed cmake linking issues # Add subprojects specified using LLVM_EXTERNAL_PROJECTS foreach(p ${LLVM_EXTERNAL_PROJECTS}) diff --git a/llvm/tools/opt/CMakeLists.txt b/llvm/tools/opt/CMakeLists.txt index 97626fb73e48..9aa35d71b876 100644 --- a/llvm/tools/opt/CMakeLists.txt +++ b/llvm/tools/opt/CMakeLists.txt @@ -31,11 +31,14 @@ set(LLVM_LINK_COMPONENTS Passes <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD ======= HelloMLBridge ======= >>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp AddSizeAttr +======= +>>>>>>> 92e0943e9769... Fixed cmake linking issues IR2Vec CollectMachineIR >>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO From eb261433bfd63e4007c7f8c8db627701198ef328 Mon Sep 17 00:00:00 2001 From: anik314159 Date: Thu, 1 Feb 2024 18:47:01 +0530 Subject: [PATCH 27/52] removing test_dir support in inference.py and Environment.py (cherry picked from commit dfb18be268ce54a7366d65015c0f445424110800) --- model/POSET-RL/Environment_pipe.py | 614 +++++++++++++++++++++++++++++ model/POSET-RL/inference.py | 266 +++++++++++++ 2 files changed, 880 insertions(+) create mode 100755 model/POSET-RL/Environment_pipe.py create mode 100755 model/POSET-RL/inference.py diff --git a/model/POSET-RL/Environment_pipe.py b/model/POSET-RL/Environment_pipe.py new file mode 100755 index 000000000000..43ef63491379 --- /dev/null +++ b/model/POSET-RL/Environment_pipe.py @@ -0,0 +1,614 @@ +# Defines environment for the RL model + +import os +import gym +import subprocess +import sys +import numpy as np +from gym.spaces import Discrete, Box, Dict +from Filesystem import * +import tempfile +import time +from ray.rllib.utils.torch_ops import FLOAT_MIN, FLOAT_MAX +from tqdm import tqdm +import logging +from google.protobuf.json_format import MessageToJson +import json +from po_config import BUILD_DIR, CONFIG_DIR +import grpc +sys.path.append(f"{BUILD_DIR}/tools/MLCompilerBridge/Python-Utilities") +import posetRL_pb2_grpc, posetRL_pb2 +from google.protobuf.empty_pb2 import Empty +from typing import Union +import signal +sys.path.append(f"{BUILD_DIR}/tools/MLCompilerBridge/CompilerInterface/") +from PipeCompilerInterface import PipeCompilerInterface +from GrpcCompilerInterface import GrpcCompilerInterface + +#import pipeCompilerInterface +empty_message = Empty() + + +class PhaseOrder(gym.Env): + def __init__(self, config): + self.ENV_Dir = None + self.BaseIR = None + self.CurrIR = None + self.Curr_Dir = None + self.minBinarySize = None + self.lastBinarySize = None + self.baseBinarySize = None + self.lastMcaThroughtput = None + self.OzMcaThroughtput = None + self.doneList = [] + self.StateIndex = 0 + self.embedding = None + self.iteration_counter = 0 + self.rename_Dir = False + self.FileSys_Obj = fsystem(config["llvm_dir"], f"{CONFIG_DIR}/ir2vec") + self.FileSys_Obj.createFolder("env") + self.temporaryDirectory = tempfile.gettempdir() + + self.clang_arch_flag = "-mcpu=cortex-a72" if config["target"] == "AArch64" else "" + self.opt_arch_flag = "--mcpu=cortex-a72" if config["target"] == "AArch64" else "" + + self.alpha = config["alpha"] + self.beta = config["beta"] + self.size_reward_thresh = config["size_reward_thresh"] + self.mca_reward_thresh = config["mca_reward_thresh"] + + # Action space size with optimization sub-sequences obtained from ODG + self.action_space_size = config["action_space_size"] + self.action_space = Discrete(self.action_space_size) + self.action_count = 0 + self.cur_action_seq = [] + self.cur_action_mask = [1] * self.action_space_size + self.mode = "train" + self.Obs = None + obs_space = Box(FLOAT_MIN, FLOAT_MAX, + shape=(config["state_size"], ), dtype=np.float32) + self.observation_space = Dict({"action_mask": Box( + 0, 1, shape=(self.action_space_size,)), "state": obs_space}) + + self.mode = config["mode"] + self.grpc_rtt = 0 + if "worker_index" in config.keys(): + self.worker_index = config.worker_index + else: + self.worker_index = 0 + + if self.mode != 'inference': + self.FileSys_Obj.createFolder("env") + self.make(os.path.abspath(config["train_dir"])) + self.train_Dir = os.path.abspath(config["train_dir"]) + + else: + self.FileSys_Obj.createFolder("inference") + self.FileSys_Obj.TrainingDataPath = os.path.join( + self.FileSys_Obj.PhaseOrderDir, "inference") + self.test_Benchmark = os.path.join( + self.FileSys_Obj.PhaseOrderDir, "") + + self.assembly_file_path = f"{self.temporaryDirectory}/assemblyfile_{self.worker_index}.s" + + logger = logging.getLogger("__file__") + log_level = logging.DEBUG + if os.path.exists("env.log"): + os.remove("env.log") + logging.basicConfig( + filename='env.log', format='%(levelname)s - %(filename)s - %(message)s', level=log_level) + + # pipes opening + self.data_format = config["data_format"] + self.use_pipe = config["use_pipe"] + self.tensor_specs = None + self.advice_spec = None + + self.temp_rootname = "/tmp/" + config["pipe_name"] + if self.use_pipe: + # self.temp_rootname = "/tmp/" + config["pipe_name"] + self.compiler_interface = PipeCompilerInterface(self.data_format, self.temp_rootname) + self.use_grpc = config["use_grpc"] + if self.use_grpc: + self.compiler_interface = None + self.is_init = True + + self.server_port = config["server_port"] + + def make(self, TrainingPath): + self.FileSys_Obj.generateTrainingData(TrainingPath) + self.Obs = self.FileSys_Obj.LLFileList + + # def getEmbedding(self, fileName) : + # EmbFile = self.Curr_Dir + "/" + str(self.StateIndex) + # # Get IR2Vec FlowAware embeddings + # command = self.FileSys_Obj.IR2VecBin + " -fa -vocab " + \ + # self.FileSys_Obj.SeedEmbeddingPath + " -o " + EmbFile + " -level p " + fileName + # os.system(command) + # emb = np.loadtxt(EmbFile) + # # Threshold for embedding values + # emb[emb > 100000.0] = 100000.0 + # emb[emb < -100000.0] = -100000.0 + # return emb + + def createEnv(self, fileName): + # env folder will contain folders for separate files with ll and executables + if self.mode != 'inference': + self.ENV_Dir = os.path.join(self.FileSys_Obj.PhaseOrderDir, "env") + else: + self.ENV_Dir = os.path.join( + self.FileSys_Obj.PhaseOrderDir, "inference") + + # setting current directory to point to the folder for the chosen file + self.Curr_Dir = self.ENV_Dir + "/" + os.path.splitext(fileName)[0] + logging.info("Curr_Dir {}".format(self.Curr_Dir)) + + # Creating the folder for the chosen file + self.FileSys_Obj.createFolder(self.Curr_Dir, True) + + # Copying the LL file from training folder to newly created folder + if self.mode != 'inference': + self.FileSys_Obj.copyFile(os.path.join( + self.FileSys_Obj.TrainingDataPath, fileName), self.Curr_Dir) + else: + # quiet# print("test_Benchmark {}".format(self.test_Benchmark)) + logging.info("test_Benchmark {}".format(self.test_Benchmark)) + self.FileSys_Obj.copyFile(os.path.join( + self.test_Benchmark, fileName), self.Curr_Dir) + + # Setting up different Paths and Minimum Size + self.BaseIR = os.path.join(self.Curr_Dir, fileName) + self.baseBinarySize, self.minBinarySize = self.getBinarySize( + self.BaseIR, True) + self.lastBinarySize = self.baseBinarySize + + self.CurrIR = os.path.join(self.Curr_Dir, fileName) + self.prev_action = None + + def reset(self, test_file=None, embedding=None): + self.BaseIR = None + self.CurrIR = None + self.Curr_Dir = None + self.minBinarySize = None + self.lastBinarySize = 0 + self.baseBinarySize = None + self.embedding = None + self.stub = None + self.StateIndex = 0 + self.cur_action_mask = [1] * self.action_space_size + + if self.mode != 'inference': + logging.info("Number of files {}".format(len(self.Obs))) + if (len(self.Obs) >= 1): + + index = np.random.random_integers(0, len(self.Obs) - 1) + + self.serverId = self.startServer( + self.Obs[index], "127.0.0.1:" + str(self.server_port)) + # print("Server started at pid:", self.serverId) + + if self.use_grpc and self.compiler_interface is None: + self.compiler_interface = GrpcCompilerInterface(mode='client', stub_class=posetRL_pb2_grpc.PosetRLServiceStub, hostip='127.0.0.1', hostport= self.server_port) + + self.createEnv(self.Obs[index]) + self.doneList.append(self.Obs[index]) + self.Obs.remove(self.Obs[index]) + if (len(self.Obs) == 0): + self.Obs = self.doneList.copy() + self.doneList.clear() + self.iteration_counter += 1 + self.rename_Dir = True + + else: + if not self.use_pipe and not self.use_grpc: + self.Obs = test_file + logging.info("test_file {}".format(test_file)) + index = np.random.random_integers(0, len(self.Obs) - 1) + logging.info("Obs {}".format(index)) + self.createEnv(test_file) + + + # Opening pipe files + if self.use_pipe: + if self.is_init: + self.compiler_interface.reset_pipes() + self.is_init = False + + result = self.readObservation() # DEBUG + + if result is None: + raise + else: + self.embedding = result + elif self.use_grpc: + if self.mode == 'inference': + self.embedding = np.array(embedding) + else: + self.embedding = self.stable_grpc("Action", 0) # LLVMgRPC way + # else: + # self.embedding = self.getEmbedding(self.BaseIR) + + action_mask = [1] * self.action_space_size + next_observation = {'action_mask': np.array( + action_mask), 'state': self.embedding} + self.cur_obs = next_observation + + return next_observation + + def readObservation(self): + embedding = np.empty([300]) + features = self.compiler_interface.evaluate() + + + if self.data_format == "bytes": + for i in range(len(features[0])): + embedding[i] = features[0][i] + elif self.data_format == "json": + for i in range(len(features["embedding"])): + embedding[i] = features["embedding"][i] + + return embedding + + + + def sendResponse(self, value: Union[int, float]): + self.compiler_interface.populate_buffer(int(value)) + + def getBinarySize(self, IRFile, init=False): + fileName = os.path.splitext(os.path.basename(IRFile))[0] + minBinarySize = 0 + baseBinarySize = 0 + if (init): + # Compute O0 Binary size + command = self.FileSys_Obj.ClangPath + " " + self.clang_arch_flag + " -c " + \ + self.Curr_Dir + "/" + fileName + ".ll -o " + \ + self.Curr_Dir + "/" + "base_binary.o" + os.system(command) + baseBinarySize = os.path.getsize(self.Curr_Dir + "/base_binary.o") + logging.info("base {}".format(baseBinarySize)) + + # Compute Oz Binary size + command = self.FileSys_Obj.OptPath + " " + self.opt_arch_flag + " -S -add-size-attr --enableMinSizeAttr --removeNoInlineAttr " + \ + self.Curr_Dir + "/" + fileName + ".ll -o " + \ + self.Curr_Dir + "/" + fileName + ".ll" + command = self.FileSys_Obj.OptPath + " " + self.opt_arch_flag + " -S -Oz " + \ + self.Curr_Dir + "/" + fileName + ".ll -o " + \ + self.Curr_Dir + "/" + fileName + "_Oz.ll" + os.system(command) + command = self.FileSys_Obj.ClangPath + " " + self.clang_arch_flag + " -c " + \ + self.Curr_Dir + "/" + fileName + "_Oz.ll -o " + \ + self.Curr_Dir + "/" + "Oz_binary.o" + os.system(command) + minBinarySize = os.path.getsize(self.Curr_Dir + "/Oz_binary.o") + + # Get Oz MCA Throughput + self.OzMcaThroughtput = self.getMCACost( + self.Curr_Dir + "/" + fileName + "_Oz") + logging.info("base {}".format(self.OzMcaThroughtput)) + + return baseBinarySize, minBinarySize + + # Get next action (sub-sequence) to be applied on the LLVM IR + def step(self, action_index): + prev_embedding = self.embedding + + Reward = 0 + done = False + # Get embedding for New IR + # here we can use gRPC server to get the new embeddings + # self.embedding = self.applyActionGetEmbeddings(action=action_index) + + # make call to compiler to get the updated embedding + if self.mode == 'inference' and self.use_grpc: + pass + else: + # if self.use_pipe or self.use_grpc: + # result = self.compiler_interface.evaluate() + if self.use_pipe: + self.sendResponse(action_index) + result = self.readObservation() + elif self.use_grpc: + result = self.stable_grpc("Action", action_index) # LLVMgRPC way + # else: + # Reward, NextStateIR = self.getLocalReward(action_index) + # result = self.getEmbedding(NextStateIR) + # self.CurrIR = NextStateIR + if result is None: + raise Exception("result is None") + else: + self.embedding = result + + self.cur_action_mask[action_index] = 0 + self.action_count += 1 + self.cur_action_seq.append(action_index) + next_observation = {'action_mask': np.array( + self.cur_action_mask), 'state': self.embedding} + self.cur_obs = next_observation + + # Max number of actions (optimaztions sub-sequences) to be applied + if self.action_count >= 34: + done = True + logging.info(self.cur_action_seq) + if self.mode == 'inference': + # Write pass sequence to actionfile + with open('actionlist.txt', 'a') as actionfile: + act_flag = 0 + actionfile.write('[') + for act_idx in self.cur_action_seq: + if act_flag == 1: + actionfile.write('-'+str(act_idx)) + else: + act_flag = 1 + actionfile.write(str(act_idx)) + actionfile.write('] ') + + if self.mode != 'inference': + if not self.use_pipe: + self.stable_grpc("Exit", None) + try: + # outs, errs = self.server_pid.communicate(timeout=5) + self.stable_grpc("Exit", None) + except: + self.serverId.kill() + print("Clang failing") + + Reward = self.getReward(self.assembly_file_path) + if self.use_pipe: + self.sendResponse(-1) # self.populate_buffer(-1) + self.compiler_interface.evaluate('exit') + + if self.mode != "inference": + Reward = self.getReward(self.assembly_file_path) + # else: + # self.compiler_interface.reset_pipes() + + self.cur_action_seq = [] + self.action_count = 0 + logging.info("Reward {}".format(Reward)) + logging.info("Action {}".format(action_index)) + logging.info("done {}".format(done)) + + return next_observation, Reward, done, {} + + # Get llvm-mca Block RThroughput for the IR + def getMCACost(self, new_file): + cmd1 = self.FileSys_Obj.LlcPath + " " + self.opt_arch_flag + \ + " " + new_file + ".ll" + " -o " + new_file + ".s" + os.system(cmd1) + cmd2 = self.FileSys_Obj.MCAPath + " " + \ + self.opt_arch_flag + " " + new_file + ".s" + pro = subprocess.Popen(cmd2, executable='/bin/bash', shell=True, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf8') + Output_cmd2 = pro.stdout + + line = Output_cmd2.readline() + if pro.stderr is not None: + logging.critical('Error : {}'.format(pro.stderr)) + if self.use_pipe: + currMcaThroughtput = 0 + while line: + pair = line.split(':') + if pair[0] == 'Block RThroughput': + currMcaThroughtput = float(pair[1].strip(' ')) + line = Output_cmd2.readline() + + logging.info("LLVM-MCA command: {}".format(cmd2)) + + return currMcaThroughtput + + # Get reward for an action + # def getLocalReward(self, action): + # self.StateIndex += 1 + # fileName = os.path.splitext(os.path.basename(self.BaseIR))[0] + + # logging.info("fileName {}".format(fileName)) + # logging.info("StateIndex {}".format(self.StateIndex)) + # logging.info("BaseIR {}".format(self.CurrIR)) + + # # Modified IR path + # new_IR = self.Curr_Dir + "/" + fileName + \ + # "_" + str(self.StateIndex) + ".ll" + # new_file = self.Curr_Dir + "/" + fileName + "_" + str(self.StateIndex) + + # # Applying the action and saving the IR file as _ + # # Here we can use gRPC server to apply the action + # command = self.FileSys_Obj.OptPath + " " + self.opt_arch_flag + \ + # " -S -O34 -SubNum=" + str(action) + " " + \ + # self.CurrIR + " -o " + new_IR + # os.system(command) + # command = self.FileSys_Obj.ClangPath + " " + \ + # self.clang_arch_flag + " -c " + new_IR + " -o " + new_file + ".o" + # os.system(command) + # # Size reward + # currBinarySize = os.path.getsize(new_file + ".o") + + # logging.info("lastBinarySize {}".format(self.lastBinarySize)) + # logging.info("currBinarySize {}".format(currBinarySize)) + + # if ((self.baseBinarySize - self.minBinarySize) > 0): + # reward_binarySize = (self.lastBinarySize - currBinarySize) / \ + # (self.baseBinarySize - self.minBinarySize) + # else: + # reward_binarySize = (self.lastBinarySize - + # currBinarySize) / self.baseBinarySize + + # self.lastBinarySize = currBinarySize + + # # Throughput reward + # currMcaThroughtput = self.getMCACost(new_file) + # logging.info("currMcaThroughtput: {}".format(currMcaThroughtput)) + # logging.info("OzMcaThroughtput: {}".format(self.OzMcaThroughtput)) + # logging.info("lastMcaThroughtput: {}".format(self.lastMcaThroughtput)) + + # if self.lastMcaThroughtput is None: + # mca_cost = (self.OzMcaThroughtput - + # currMcaThroughtput) / self.OzMcaThroughtput + # else: + # mca_cost = (self.lastMcaThroughtput - + # currMcaThroughtput) / self.OzMcaThroughtput + + # self.lastMcaThroughtput = currMcaThroughtput + + # logging.info("Thr-debug:{}".format(mca_cost)) + # logging.info("Size-debug:{}".format(reward_binarySize)) + + # # Reward thresholds + # if mca_cost > self.mca_reward_thresh: + # mca_cost = self.mca_reward_thresh + # elif mca_cost < -self.mca_reward_thresh: + # mca_cost = -self.mca_reward_thresh + + # if reward_binarySize > self.size_reward_thresh: + # reward_binarySize = self.size_reward_thresh + # elif reward_binarySize < -self.size_reward_thresh: + # reward_binarySize = -self.size_reward_thresh + + # # Cumulative reward with alpha and beta hyperparameters + # reward = self.alpha*reward_binarySize + self.beta*mca_cost + + # return reward, new_IR + + def getReward(self, AssemblyFilePath): + # object size reward + objectFilePath = f"{self.temporaryDirectory}/objectfile_{self.worker_index}.o" + objectFileGenerationCommand = self.FileSys_Obj.ClangPath + " -c " + \ + self.clang_arch_flag + " " + AssemblyFilePath + " -o " + objectFilePath + + os.system(objectFileGenerationCommand) + + currentBinarySize = os.path.getsize(objectFilePath) + + if ((self.baseBinarySize - self.minBinarySize) > 0): + reward_binarySize = (self.lastBinarySize - currentBinarySize) / \ + (self.baseBinarySize - self.minBinarySize) + else: + reward_binarySize = (self.lastBinarySize - + currentBinarySize) / self.baseBinarySize + + self.lastBinarySize = currentBinarySize + + llvmMcaCommand = f"{self.FileSys_Obj.MCAPath} {self.opt_arch_flag} {AssemblyFilePath}" + pro = subprocess.Popen(llvmMcaCommand, executable='/bin/bash', shell=True, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf8') + + Output_cmd2 = pro.stdout + + line = Output_cmd2.readline() + if pro.stderr is not None: + logging.critical('Error : {}'.format(pro.stderr)) + + while line: + pair = line.split(':') + if pair[0] == 'Block RThroughput': + currMcaThroughtput = float(pair[1].strip(' ')) + line = Output_cmd2.readline() + + logging.info("currMcaThroughtput: {}".format(currMcaThroughtput)) + logging.info("OzMcaThroughtput: {}".format(self.OzMcaThroughtput)) + logging.info("lastMcaThroughtput: {}".format(self.lastMcaThroughtput)) + + if self.lastMcaThroughtput is None: + mca_cost = (self.OzMcaThroughtput - + currMcaThroughtput) / self.OzMcaThroughtput + else: + mca_cost = (self.lastMcaThroughtput - + currMcaThroughtput) / self.OzMcaThroughtput + + self.lastMcaThroughtput = currMcaThroughtput + + logging.info("Thr-debug:{}".format(mca_cost)) + logging.info("Size-debug:{}".format(reward_binarySize)) + + # Reward thresholds + if mca_cost > self.mca_reward_thresh: + mca_cost = self.mca_reward_thresh + elif mca_cost < -self.mca_reward_thresh: + mca_cost = -self.mca_reward_thresh + + if reward_binarySize > self.size_reward_thresh: + reward_binarySize = self.size_reward_thresh + elif reward_binarySize < -self.size_reward_thresh: + reward_binarySize = -self.size_reward_thresh + + # Cumulative reward with alpha and beta hyperparameters + reward = self.alpha*reward_binarySize + self.beta*mca_cost + + return reward + + def set_config(path): + global config_path + config_path = path + return config_path + + def startServer(self, filename, ip): + optPath = f"{BUILD_DIR}/bin/opt" + clangPath = f"{BUILD_DIR}/bin/clang" + filepath = self.train_Dir + "/" + filename + newfilepath = self.assembly_file_path + data_format = self.data_format + + cmd = f"{clangPath} -S -mllvm --OPosetRL -mllvm -ml-config-path={CONFIG_DIR} -mllvm --training -mllvm -data-format={data_format} -mllvm --server_address={ip} {filepath} -o {newfilepath}" + if self.use_pipe: + cmd = cmd + " -mllvm -use-pipe" + pid = subprocess.Popen(cmd, executable='/bin/bash', + shell=True, preexec_fn=os.setsid) + return pid + + def repeatedgRPCFieldToNumpyArray(self, gRPCObj): + jsonObj = MessageToJson(gRPCObj) + dictObj = json.loads(jsonObj) + array = dictObj['embedding'] + return np.array(array) + + def applyActionGetEmbeddings(self, action): + request = posetRL_pb2.ActionRequest(action=action) + + self.compiler_interface.populate_buffer(request) + response = self.compiler_interface.evaluate() + # response = self.stub.applyActionGetEmbeddings(request) + return self.repeatedgRPCFieldToNumpyArray(response) + + def stopServer(self, sig): + self.serverId.send_signal(sig) + return_code = self.serverId.wait() + print("Return code:", return_code) + + def stable_grpc(self, op, action): + attempt = 0 + max_retries = 5 + retry_wait_seconds = 0.1 + retry_wait_backoff_exponent = 1.5 + + result = None + while True: + try: + t1 = time.time() + if op != "Exit": + result = self.applyActionGetEmbeddings(action=action) + else: + result = self.stopServer(signal.SIGTERM) + t2 = time.time() + self.grpc_rtt += t2-t1 + break + except grpc.RpcError as e: + + if e.code() == grpc.StatusCode.UNAVAILABLE: + # print("Error in grpc") + # if op == 'Exit' and self.last_task_done == 0: + # raise + attempt += 1 + if attempt > max_retries: + print("Maximum attempts completed") + return None + # raise #ServiceTransportError( f"{self.url} {e.details()} ({max_retries} retries)") from None + remaining = max_retries - attempt + time.sleep(retry_wait_seconds) + retry_wait_seconds *= retry_wait_backoff_exponent + else: + if self.mode != 'inference': + print("Unknown error", e.code()) + return None + else: + raise + return result + diff --git a/model/POSET-RL/inference.py b/model/POSET-RL/inference.py new file mode 100755 index 000000000000..721bb31f4132 --- /dev/null +++ b/model/POSET-RL/inference.py @@ -0,0 +1,266 @@ +# Script to perform inference on test LLVM IR files +# Use run-inference.sh to call this script +# Usage: python inference.py --ir2vec_dir \ +# --test_dir \ +# --model \ +# [--isAArch] +# --alpha +# --beta +# --size_reward_thresh +# --mca_reward_thresh +# Example: python inference.py --ir2vec_dir POSET-RL/IR2Vec \ +# --test_dir test_ll \ +# --model POSET_RL/saved_models/model \ +# [--isAArch] +# --alpha 10 +# --beta 5 +# --size_reward_thresh 0.2 +# --mca_reward_thresh 0.2 + +import argparse +import numpy as np +import argparse +import os + +# import utils +import logging +import time + +import ray +from ray import tune +from ray.rllib.agents import ppo +from ray.rllib.agents import dqn +from ray.rllib.agents.dqn import DQNTrainer, DEFAULT_CONFIG +from Environment_pipe import PhaseOrder +from ray.rllib.models import ModelCatalog +from model import CustomPhaseOrderModel +from ray.tune.registry import register_env +from datetime import datetime +from po_config import BUILD_DIR + +import sys +sys.path.append(f"{BUILD_DIR}/tools/MLCompilerBridge/Python-Utilities") +import posetRL_pb2_grpc, posetRL_pb2 + +sys.path.append(f"{BUILD_DIR}/tools/MLCompilerBridge/CompilerInterface/") +from GrpcCompilerInterface import GrpcCompilerInterface +from Filesystem import * + +logger = logging.getLogger(__file__) +logging.basicConfig( + filename="inference.log", + format="%(levelname)s - %(filename)s - %(message)s", + level=logging.DEBUG, +) + +import networkx +from networkx.readwrite import json_graph +import json +import torch +import pydot + +import grpc +from concurrent import futures +import traceback + +parser = argparse.ArgumentParser() +parser.add_argument("--model", help="Path to saved checkpoint") +parser.add_argument( + "-a", "--isAArch", required=False, default=False, action="store_true" +) +parser.add_argument("-alpha", "--alpha", required=False, type=float, default=10) +parser.add_argument("-beta", "--beta", required=False, type=float, default=5) +parser.add_argument( + "-size_reward_thresh", + "--size_reward_thresh", + required=False, + type=float, + default=0.2, +) +parser.add_argument( + "-mca_reward_thresh", "--mca_reward_thresh", required=False, type=float, default=0.2 +) +parser.add_argument( + "--use_pipe", + action="store_true", + help="Use pipe communication", + required=False, + default=False, +) +parser.add_argument("--server_port", type=str, help="Server port", default=50051) +parser.add_argument( + "--data_format", + type=str, + choices=["json", "protobuf", "bytes"], + help="Data format to use for communication", +) +parser.add_argument("--pipe_name",type=str,help="String Pipe name",default="posetrl_pipe") +parser.add_argument("--use_grpc", action='store_true', help = "Use grpc communication", required=False, default=False) +parser.add_argument("--export_onnx", action="store_true", help="Export the model to ONNX") + +class PhaseOrderInference: + def __init__(self, model_path, use_pipe=False, use_grpc=False, data_format="json", export_onnx=False): + print("use_pipe {}".format(use_pipe)) + logdir = "/tmp" + logger = logging.getLogger(__file__) + logging.basicConfig( + filename="running.log", + format="%(levelname)s - %(filename)s - %(message)s", + level=logging.DEBUG, + ) + + config = DEFAULT_CONFIG.copy() + + cfg = { + "hiddens": [], + "dueling": False, + } + + ModelCatalog.register_custom_model("My_torch_model", CustomPhaseOrderModel) + target_arch = "AArch64" if args.isAArch else "X86" + # Define model and environment config + config = dict( + { + "model": { + "custom_model": "My_torch_model", + "custom_model_config": { + "state_size": 300, + "fc1_units": 64, + "fc2_units": 64, + }, + }, + "env_config": { + "target": target_arch, + "state_size": 300, + "mode": "inference", + "dump_type": "One", + "intermediate_data": "./temp", + "llvm_dir": BUILD_DIR, + "alpha": args.alpha, + "beta": args.beta, + "size_reward_thresh": args.size_reward_thresh, + "mca_reward_thresh": args.mca_reward_thresh, + "action_space_size": 34, + "use_pipe": use_pipe, + "data_format": data_format, + "use_grpc": use_grpc, + "server_port": args.server_port, + "pipe_name": args.pipe_name, + "export_onnx": export_onnx + }, + "framework": "torch", + "explore": False, + "num_workers": 0, + "train_batch_size": 1, + }, + **cfg + ) + + def env_creator(env_config): + return PhaseOrder(env_config) + + # Create environment + register_env("Environment", env_creator) + + self.train_agent = DQNTrainer(env="Environment", config=config) + + checkpoint = model_path + # Load saved model + self.train_agent.restore(checkpoint) + + self.config = config + + # Dump the onnx model from the checkpoint + if args.export_onnx: + torch.onnx.export(self.train_agent.get_policy().model, ({"obs": torch.randn(1, 334)}, {}), export_params=True, f="/path/to/ml-llvm-project/model/POSET-RL/onnx-model/posetrl_model.onnx", verbose=True, input_names=["obs"], output_names=["output"]) + + + def dot_to_json(self, dot_): + py_dot_graph = pydot.graph_from_dot_data(dot_)[0] + graph_netx = networkx.drawing.nx_pydot.from_pydot(py_dot_graph) + graph_json = json_graph.adjacency_data(graph_netx) + return graph_json + + # Predict best optimization sequence for the given LLVM IR + def run_predict(self, test_file=None): + env = PhaseOrder(self.config["env_config"]) + + print("test_file {}".format(test_file)) + state = env.reset(test_file) + score = 0 + while True: + logging.debug("-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-") + + action = self.train_agent.compute_action(state) + print("action {}".format(action)) + + next_state, reward, done, response = env.step(action) + + logging.debug("reward : {}".format(reward)) + + state = next_state + if done: + with open("actionlist.txt", "a") as actionfile: + actionfile.write(str(test_file) + "\n") + assert response is not None, "Allocation is not preset." + break + + return reward, response + +class service_server(posetRL_pb2_grpc.PosetRLService): + def __init__(self, inference_obj): + self.inference_obj = inference_obj + self.new_file = True + self.state = None + self.env = None + self.action = None + + def getAdvice(self, request, context): + try: + done = False + if self.new_file: + self.env = PhaseOrder(self.inference_obj.config["env_config"]) + self.state = self.env.reset(embedding=request.embedding) + self.new_file = False + print("Episode Started") + else: + self.env.embedding = np.array(request.embedding) + self.state, reward, done, response = self.env.step(self.action) + if not done: + self.action = self.inference_obj.train_agent.compute_action(self.state) + reply=posetRL_pb2.ActionRequest(action=self.action.item()) + else: + reply=posetRL_pb2.ActionRequest(action=-1) + self.new_file = True + print("Episode Finished") + return reply + except: + print('Error') + traceback.print_exc() + reply=posetRL_pb2.ActionRequest(action=-1) + return reply + + + + +if __name__ == "__main__": + args = parser.parse_args() + logging.info("Start the inference....") + + ray.init() + + inference_obj = PhaseOrderInference( + args.model, args.use_pipe, args.use_grpc, args.data_format, args.export_onnx + ) + if args.use_pipe: + print("about to enter while loop...") + while True: + reward, response = inference_obj.run_predict() + elif args.use_grpc: + # ray.init() + compiler_interface = GrpcCompilerInterface(mode = 'server', add_server_method=posetRL_pb2_grpc.add_PosetRLServiceServicer_to_server, grpc_service_obj=service_server(inference_obj), hostport= args.server_port) + compiler_interface.start_server() + + else: + print("Please use options use_grpc or use_pipe") + From 101eb7601355c7b666b37b52e5404a00710ab071 Mon Sep 17 00:00:00 2001 From: anik314159 Date: Thu, 1 Feb 2024 18:58:56 +0530 Subject: [PATCH 28/52] Adding modified README for pass side (cherry picked from commit 557f7cf811d1b1dc0158a944b644b09b53f1df5b) --- llvm/lib/Transforms/IPO/PosetRL/README.md | 94 +++++++++++++++-------- 1 file changed, 64 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Transforms/IPO/PosetRL/README.md b/llvm/lib/Transforms/IPO/PosetRL/README.md index 0ffb3475f056..9c24400aa220 100644 --- a/llvm/lib/Transforms/IPO/PosetRL/README.md +++ b/llvm/lib/Transforms/IPO/PosetRL/README.md @@ -1,46 +1,80 @@ # POSET-RL POSET-RL uses a reinforcement learning approach as the search space of optimization sequences is too big to enumerate. For a compiler with m optimization passes, if the sequence length is fixed as n, then there can be potentially mn combinations, allowing repetitions. The reinforcement learning model is trained and evaluated on programs that are represented using IR2Vec embeddings. The action space contains the subsequences created using the Oz dependence graph (ODG). Sequences are constructed from this graph by finding walks that start and end at critical nodes (with degree greater than a value k).[slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf) + > POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta # Environment setup +Execute the following command for environment setup + ```bash - conda env create -f {LLVM_DIR}/model/POSET-RL/rllib_posetrl_env.yml +conda env create -f ${LLVM_DIR}/model/POSET-RL/posetrl_env.yml +#LLVM_DIR is the path to the llvm directory ``` -# Inference on Pre-Trained Models -Currently we support 3 kinds of [ModelRunners](https://compilers.cse.iith.ac.in/publications/mlcompilerbridge) -- [ONNX Model Runner](#ONNX) - This is an in process model runner i.e doesnot require a server/client setup -- [gRPC Model Runner](#gRPC) - This uses gRPC which internally uses Protobuf and the protoc compiler to communicate -- [Pipe Model Runner](#Pipes) - This uses pipes to communicate information between the model and the compiler -## gRPC -Server Side: -```bash -cd ml-llvm-project/model/POSET_RL - -python inference.py --test-dir= / - --use_grpc --server_address=/ - --model= / +# Inference on Trained Models +There are three modes of communication via [MLCompilerBridge](https://compilers.cse.iith.ac.in/publications/mlcompilerbridge) +- [gRPC Model Runner](#Inference-flow-using-gRPC-ModelRunner) - gRPC based ModelRunner +- [Pipe Model Runner](#Inference-flow-using-Pipe-ModelRunner) - Unix based pipes ModelRunner +- [ONNX Model Runner](#Inference-flow-using-ONNXModelRunner) - An In process ModelRunner + +## Inference flow using gRPC ModelRunner +gRPC is a server client based communication model that allows communication between the compiler as the client and the ML model as the server. gRPC internally uses the protoc compiler which takes proto files which contain serialisayion and deserialisation information as inputs and generates C++ services as inclde headers in form of structs. +### Starting the gRPC server: +The scripts for running server i.e `inference.py` are present in this directory ```{LLVM_DIR}/model/POSET_RL/src``` + + +```py +python inference.py --use_grpc --server_port= --model= ``` -Client Side: +- `--use_grpc`: flag for using grpc +- `--server_port`: The port where server is hosted +- `--server-address`: The IP and port tuple +- `--ml-config-path`: The path to config directory +- `model-path`: The path to the checkpoint directory for example: ${LLVM_DIR}/model/POSET-RL/checkpoint_dir + +### Invoking the POSET-RL Pass via gRPC: ```bash #Open a new terminal -ml-llvm-project/build/bin/opt -poset-rl -use_grpc -ml-config-path=ml-llvm-project/config -server_address= -o / -``` -## Pipes -Through the pipe mode of communication we have 2 ways of Serialisation and Deserialisation of the data (SerDes) -json and bytes. +${BUILD_DIR}/bin/opt -poset-rl -use_grpc -ml-config-path=${LLVM_DIR}/config -server_address=127.0.0.1: + -o +``` +- `BUILD_DIR`: Directory where the project is built +- `-poset-rl`: The optimisation flag for invoing the pass +- `-use_grpc`: The flag to denote grpc Modelrunner in the pass +- `-ml-config-path`: The path to config directory +- `-server_address`: The address with port number -Server Side: -```bash -cd ml-llvm-project/model/POSET_RL +## Inference flow using Pipe ModelRunner +The pipe mode of commuication makes use of the linux pipes to transmit data bettween the Compiler and the ML model. In the pipe mode of communication we have two ways to serialise and deserialise data, namely json and bytes + +### Starting the Pipe Server: +The scripts for running server i.e `inference.py` are present in this directory ```${LLVM_DIR}/model/POSET_RL/src``` -python inference.py --test-dir= --use_pipe --pipe_name= --data_format= --model= +```py +python inference.py --use_pipe --pipe_name= --data_format= --model= ``` -Client_side: +- `--use_pipe`: flag for using pipes +- `--pipe-name`: identifer for a pipe (name your pipe) +- `--data-format`: The data format is the method in which data is moved b/w server client +- `model-path`: The path to the checkpoint directory for example: ${LLVM_DIR}/model/POSET-RL/checkpoint_dir + +### Invoking the POSET-RL Pass via Pipes ```bash -#Open a new terminal -ml-llvm-project/build/bin/opt -poset-rl -use-pipe -pipe-name= -data-format= -ml-config-path=ml-llvm-project/config -o +${BUILD_DIR}/bin/opt -poset-rl -use-pipe -pipe-name= -data-format= +ml-config-path=${LLVM_DIR}/config -o ``` -## ONNX +- `BUILD_DIR`: Directory where the project is built +- `-poset-rl`: The optimisation flag for invoing the pass +- `-use-pipe`: The flag to denote pipe Modelrunner in the pass +- `-ml-config-path`: The path to config directory +- `pipe-name`: identifer for a pipe (name your pipe name as for the server) + +## Inference flow using ONNXModelRunner +ONNX model runner is an in process model runner, in this type of model runner we donot need the a server client setup because the complier can easily access the model using teh ONNX APIs. +### Invoking the POSET-RL pass via ONNX: ```bash -ml-llvm-project/build/bin/opt -poset-rl -use-onnx -ml-config-path=ml-llvm-project/config -o -``` \ No newline at end of file +${BUILD_DIR}/bin/opt -poset-rl -use-onnx -ml-config-path=${LLVM_DIR}/config -o +``` +- `BUILD_DIR`: Directory where the project is built +- `-poset-rl`: The optimisation flag for invoing the pass +- `-use-onnx`: The flag to denote onnx Modelrunner in the pass +- `-ml-config-path`: The path to config directory From bd2f0fa6b0fa551c80fa4f0c24679d19ee878cc2 Mon Sep 17 00:00:00 2001 From: M V V S Manoj Kumar Date: Sat, 17 Feb 2024 11:45:54 +0530 Subject: [PATCH 29/52] Submodule MLCompilerBridge Updated Pulled the latest changes from the main branch of MLCompilerBridge (cherry picked from commit 493d5948aaf12ebff80fdc44ef5dfdc8c3665616) --- MLCompilerBridge | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MLCompilerBridge b/MLCompilerBridge index 8306513c3da3..d03a012728d1 160000 --- a/MLCompilerBridge +++ b/MLCompilerBridge @@ -1 +1 @@ -Subproject commit 8306513c3da3e1bb86c2c82975685417100460e3 +Subproject commit d03a012728d1757350bdae2ace47b7ae4f44d6ea From f06ac6ef64aedb19b391d9ee0505797ac1a662ae Mon Sep 17 00:00:00 2001 From: anik314159 <78022732+anik314159@users.noreply.github.com> Date: Sat, 17 Feb 2024 17:21:19 +0530 Subject: [PATCH 30/52] Update README.md (cherry picked from commit 74f098b6c7facaf5e8cbb4a225345d7492ba7c24) --- llvm/lib/Transforms/IPO/PosetRL/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/IPO/PosetRL/README.md b/llvm/lib/Transforms/IPO/PosetRL/README.md index 9c24400aa220..96aad32261f2 100644 --- a/llvm/lib/Transforms/IPO/PosetRL/README.md +++ b/llvm/lib/Transforms/IPO/PosetRL/README.md @@ -34,12 +34,11 @@ python inference.py --use_grpc --server_port= --model= ### Invoking the POSET-RL Pass via gRPC: ```bash #Open a new terminal -${BUILD_DIR}/bin/opt -poset-rl -use_grpc -ml-config-path=${LLVM_DIR}/config -server_address=127.0.0.1: +${BUILD_DIR}/bin/opt -poset-rl -ml-config-path=${LLVM_DIR}/config -server_address=127.0.0.1: -o ``` - `BUILD_DIR`: Directory where the project is built - `-poset-rl`: The optimisation flag for invoing the pass -- `-use_grpc`: The flag to denote grpc Modelrunner in the pass - `-ml-config-path`: The path to config directory - `-server_address`: The address with port number From 2e8765b4715de938ac9dea0d193598bf7bc0f728 Mon Sep 17 00:00:00 2001 From: "S. VenkataKeerthy" <31350914+svkeerthy@users.noreply.github.com> Date: Sat, 17 Feb 2024 21:32:30 +0530 Subject: [PATCH 31/52] Update README.md (cherry picked from commit 4b8e384617261862a5727b311bd90167d3d1d1f3) --- llvm/lib/Transforms/IPO/PosetRL/README.md | 68 ++++++++++------------- 1 file changed, 30 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Transforms/IPO/PosetRL/README.md b/llvm/lib/Transforms/IPO/PosetRL/README.md index 96aad32261f2..b4d662896087 100644 --- a/llvm/lib/Transforms/IPO/PosetRL/README.md +++ b/llvm/lib/Transforms/IPO/PosetRL/README.md @@ -1,5 +1,8 @@ # POSET-RL -POSET-RL uses a reinforcement learning approach as the search space of optimization sequences is too big to enumerate. For a compiler with m optimization passes, if the sequence length is fixed as n, then there can be potentially mn combinations, allowing repetitions. The reinforcement learning model is trained and evaluated on programs that are represented using IR2Vec embeddings. The action space contains the subsequences created using the Oz dependence graph (ODG). Sequences are constructed from this graph by finding walks that start and end at critical nodes (with degree greater than a value k).[slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf) +POSET-RL uses a reinforcement learning approach to find pass sequence (for optimal code size + execution time), as the search space of optimization sequences is too big to enumerate. For a compiler with `m` optimization passes, if the sequence length is fixed as `n`, then there can be potentially `mxn` combinations, allowing repetitions. + +This repo contains the source code and relevant information described in the [paper](https://ieeexplore.ieee.org/abstract/document/9804673) ([arXiv](https://arxiv.org/abs/2208.04238), [slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf)). +Please see [here](https://compilers.cse.iith.ac.in/projects/posetrl) for more details. > POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta @@ -12,68 +15,57 @@ conda env create -f ${LLVM_DIR}/model/POSET-RL/posetrl_env.yml ``` # Inference on Trained Models There are three modes of communication via [MLCompilerBridge](https://compilers.cse.iith.ac.in/publications/mlcompilerbridge) -- [gRPC Model Runner](#Inference-flow-using-gRPC-ModelRunner) - gRPC based ModelRunner -- [Pipe Model Runner](#Inference-flow-using-Pipe-ModelRunner) - Unix based pipes ModelRunner -- [ONNX Model Runner](#Inference-flow-using-ONNXModelRunner) - An In process ModelRunner +- [gRPC Model Runner](#Inference-flow-using-gRPC-Model-Runner) - gRPC based Model Runner +- [Pipe Model Runner](#Inference-flow-using-Pipe-Model-Runner) - Unix pipes based Model Runner +- [ONNX Model Runner](#Inference-flow-using-ONNX-Model-Runner) - In-process Model Runner (Suited for stand-alone inference/deployment) + +## Inference flow using gRPC Model Runner -## Inference flow using gRPC ModelRunner -gRPC is a server client based communication model that allows communication between the compiler as the client and the ML model as the server. gRPC internally uses the protoc compiler which takes proto files which contain serialisayion and deserialisation information as inputs and generates C++ services as inclde headers in form of structs. ### Starting the gRPC server: -The scripts for running server i.e `inference.py` are present in this directory ```{LLVM_DIR}/model/POSET_RL/src``` +The script for running the server, `inference.py` is present in this directory ```{LLVM_DIR}/model/POSET_RL/src``` ```py python inference.py --use_grpc --server_port= --model= ``` -- `--use_grpc`: flag for using grpc -- `--server_port`: The port where server is hosted -- `--server-address`: The IP and port tuple -- `--ml-config-path`: The path to config directory -- `model-path`: The path to the checkpoint directory for example: ${LLVM_DIR}/model/POSET-RL/checkpoint_dir +- `port_no`: The port where server is hosted +- `model_path`: The path to the checkpoint directory for example: ${LLVM_DIR}/model/POSET-RL/checkpoint_dir ### Invoking the POSET-RL Pass via gRPC: ```bash -#Open a new terminal -${BUILD_DIR}/bin/opt -poset-rl -ml-config-path=${LLVM_DIR}/config -server_address=127.0.0.1: - -o +# Open a new terminal +${BUILD_DIR}/bin/opt -poset-rl -ml-config-path=${LLVM_DIR}/config -server_address=127.0.0.1: -o ``` - `BUILD_DIR`: Directory where the project is built -- `-poset-rl`: The optimisation flag for invoing the pass -- `-ml-config-path`: The path to config directory -- `-server_address`: The address with port number +- `port_no`: Port number for communication -## Inference flow using Pipe ModelRunner -The pipe mode of commuication makes use of the linux pipes to transmit data bettween the Compiler and the ML model. In the pipe mode of communication we have two ways to serialise and deserialise data, namely json and bytes +## Inference flow using Pipe Model Runner ### Starting the Pipe Server: -The scripts for running server i.e `inference.py` are present in this directory ```${LLVM_DIR}/model/POSET_RL/src``` +The script for running server, `inference.py` is present in this directory ```${LLVM_DIR}/model/POSET_RL/src``` ```py -python inference.py --use_pipe --pipe_name= --data_format= --model= +python inference.py --use_pipe --pipe_name= --data_format= --model= ``` -- `--use_pipe`: flag for using pipes -- `--pipe-name`: identifer for a pipe (name your pipe) -- `--data-format`: The data format is the method in which data is moved b/w server client -- `model-path`: The path to the checkpoint directory for example: ${LLVM_DIR}/model/POSET-RL/checkpoint_dir +- `pipe_identifier`: Name of the pipe for communication. (A pipe with this name will be created during communication) +- `format`: The data format for communication b/w server client. Can either be `json` or `bytes`. +- `model`: The path to the checkpoint directory for example: ${LLVM_DIR}/model/POSET-RL/checkpoint_dir ### Invoking the POSET-RL Pass via Pipes ```bash -${BUILD_DIR}/bin/opt -poset-rl -use-pipe -pipe-name= -data-format= -ml-config-path=${LLVM_DIR}/config -o +${BUILD_DIR}/bin/opt -poset-rl -use-pipe -pipe-name= -data-format= ml-config-path=${LLVM_DIR}/config -o ``` - `BUILD_DIR`: Directory where the project is built -- `-poset-rl`: The optimisation flag for invoing the pass -- `-use-pipe`: The flag to denote pipe Modelrunner in the pass -- `-ml-config-path`: The path to config directory -- `pipe-name`: identifer for a pipe (name your pipe name as for the server) +- `pipe_identifier`: Name of the pipe for communication. (A pipe with this name will be created during communication) +- `format`: The data format for communication b/w server client. Can either be `json` or `bytes`. + +Note: Both `opt` and `inference.py` should be invoked with same `format`. ## Inference flow using ONNXModelRunner -ONNX model runner is an in process model runner, in this type of model runner we donot need the a server client setup because the complier can easily access the model using teh ONNX APIs. -### Invoking the POSET-RL pass via ONNX: + +### Invoking the POSET-RL pass with ONNX: ```bash ${BUILD_DIR}/bin/opt -poset-rl -use-onnx -ml-config-path=${LLVM_DIR}/config -o ``` -- `BUILD_DIR`: Directory where the project is built -- `-poset-rl`: The optimisation flag for invoing the pass -- `-use-onnx`: The flag to denote onnx Modelrunner in the pass -- `-ml-config-path`: The path to config directory + + From 8c433eaab9779672cad5a074dce136cee2594478 Mon Sep 17 00:00:00 2001 From: anik314159 <78022732+anik314159@users.noreply.github.com> Date: Sat, 17 Feb 2024 21:51:17 +0530 Subject: [PATCH 32/52] Update README.md Added The dumping option (cherry picked from commit ceefdb2bda4da9a7ef4e37c53ff5e8312db5f7d5) --- llvm/lib/Transforms/IPO/PosetRL/README.md | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/IPO/PosetRL/README.md b/llvm/lib/Transforms/IPO/PosetRL/README.md index b4d662896087..8c8fd76daa33 100644 --- a/llvm/lib/Transforms/IPO/PosetRL/README.md +++ b/llvm/lib/Transforms/IPO/PosetRL/README.md @@ -66,6 +66,23 @@ Note: Both `opt` and `inference.py` should be invoked with same `format`. ### Invoking the POSET-RL pass with ONNX: ```bash ${BUILD_DIR}/bin/opt -poset-rl -use-onnx -ml-config-path=${LLVM_DIR}/config -o -``` +``` +## Dumping Model files + +```bash +cd ml-llvm-project/model/POSET_RL/src + +python inference.py --test-dir= --use_grpc --server_address= --model= --export_onnx + +``` +- The -export_onnx option in inference.py is responsible for dumping the onnx model +- The Model files will be dumped in `model/onnx-model` +- Rename the Model files in each directory as `_MODEL_PATH.onnx`. +- Move the Model files from their respective directory to the path `config/posetrl/onnx-checkpoint` + +- Make sure to regenerate new ONNX models for new checkpoints. + + +### Model Training: [Refer to Model Training](../../../../../model/POSET-RL/README.md) From 8e7a4faef2c46d24ef8144ee6e01ef3793d3bec7 Mon Sep 17 00:00:00 2001 From: anik314159 <78022732+anik314159@users.noreply.github.com> Date: Sat, 17 Feb 2024 21:54:20 +0530 Subject: [PATCH 33/52] Updating README.md (cherry picked from commit 64b29e3854a331b3836c3a7d07165e0c6fbb3100) --- llvm/lib/Transforms/IPO/PosetRL/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/IPO/PosetRL/README.md b/llvm/lib/Transforms/IPO/PosetRL/README.md index 8c8fd76daa33..17658e2a3784 100644 --- a/llvm/lib/Transforms/IPO/PosetRL/README.md +++ b/llvm/lib/Transforms/IPO/PosetRL/README.md @@ -77,9 +77,8 @@ python inference.py --test-dir= --use_grpc --server ``` - The -export_onnx option in inference.py is responsible for dumping the onnx model - The Model files will be dumped in `model/onnx-model` -- Rename the Model files in each directory as `_MODEL_PATH.onnx`. +- Rename the Model files as `.onnx`. - Move the Model files from their respective directory to the path `config/posetrl/onnx-checkpoint` - - Make sure to regenerate new ONNX models for new checkpoints. From f4d979ac858c2b32edab69c6c9bec8f1f5e9cdb3 Mon Sep 17 00:00:00 2001 From: "S. VenkataKeerthy" <31350914+svkeerthy@users.noreply.github.com> Date: Sat, 17 Feb 2024 22:35:08 +0530 Subject: [PATCH 34/52] Update README.md (cherry picked from commit f677956ac3eed655846393f6f3ff89049045ac48) --- llvm/lib/Transforms/IPO/PosetRL/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/IPO/PosetRL/README.md b/llvm/lib/Transforms/IPO/PosetRL/README.md index 17658e2a3784..957e49ed9c95 100644 --- a/llvm/lib/Transforms/IPO/PosetRL/README.md +++ b/llvm/lib/Transforms/IPO/PosetRL/README.md @@ -75,7 +75,7 @@ cd ml-llvm-project/model/POSET_RL/src python inference.py --test-dir= --use_grpc --server_address= --model= --export_onnx ``` -- The -export_onnx option in inference.py is responsible for dumping the onnx model +- The `-export_onnx` option in `inference.py` is responsible for dumping the onnx model - The Model files will be dumped in `model/onnx-model` - Rename the Model files as `.onnx`. - Move the Model files from their respective directory to the path `config/posetrl/onnx-checkpoint` From 21affc9731e7b0a9a1e8cc35d863d4d7c8d6f9cc Mon Sep 17 00:00:00 2001 From: anik314159 <78022732+anik314159@users.noreply.github.com> Date: Sat, 17 Feb 2024 22:58:29 +0530 Subject: [PATCH 35/52] Update README.md (cherry picked from commit d2a0743a421a965433b10a91fc10d421e9a60b2b) --- model/POSET-RL/README.md | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/model/POSET-RL/README.md b/model/POSET-RL/README.md index 86b497ba534b..2e7ae77a4b8c 100644 --- a/model/POSET-RL/README.md +++ b/model/POSET-RL/README.md @@ -1,8 +1,14 @@ # POSET-RL -POSET-RL uses a reinforcement learning approach as the search space of optimization sequences is too big to enumerate. For a compiler with m optimization passes, if the sequence length is fixed as n, then there can be potentially mn combinations, allowing repetitions. The reinforcement learning model is trained and evaluated on programs that are represented using IR2Vec embeddings. The action space contains the subsequences created using the Oz dependence graph (ODG). Sequences are constructed from this graph by finding walks that start and end at critical nodes (with degree greater than a value k).[slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf) +# POSET-RL +POSET-RL uses a reinforcement learning approach to find pass sequence (for optimal code size + execution time), as the search space of optimization sequences is too big to enumerate. For a compiler with `m` optimization passes, if the sequence length is fixed as `n`, then there can be potentially `mxn` combinations, allowing repetitions. + +This repo contains the source code and relevant information described in the [paper](https://ieeexplore.ieee.org/abstract/document/9804673) ([arXiv](https://arxiv.org/abs/2208.04238), [slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf)). +Please see [here](https://compilers.cse.iith.ac.in/projects/posetrl) for more details. + > POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta + ## Environment Setup - Copy the environment `.yml` files from `/path/to/ml-llvm-project/model/POSET-RL/posetrl_env.yml` to the home directory @@ -28,25 +34,27 @@ POSET-RL uses a reinforcement learning approach as the search space of optimizat ```bash cd ml-llvm-project/model/POSET_RL/src -python experiment.py --llvm_dir= --train_dir= --train_iterations= --use_grpc +python experiment.py --llvm_dir= --train_dir= --train_iterations= --use_grpc + +#The --train_dir option must specify a path to a directory of .ll files ``` ### Pipes ```bash cd ml-llvm-project/model/POSET_RL/src -python experiment.py --llvm_dir= --train_dir= --train_iterations= --use_pipe --data_format= +python experiment.py --llvm_dir= --train_dir= --train_iterations= --use_pipe --data_format= +#The --train_dir option must specify a path to a directory of .ll files #Model will be generated as a pytorch checkpoint in ml-llvm-project/model/checkpoint_dir after every 10 epochs #The output of the above generates the training logs ``` ### ONNX - The -export_onnx option in inference.py is responsible for dumping the onnx model ```bash cd ml-llvm-project/model/POSET_RL/src -python inference.py --test-dir= --use_grpc --server_address= --model= --export_onnx +python inference.py --test-dir= --use_grpc --server_address= --model= + +``` -# The model will be dumped inside the onnx-model directory residing inside /path/to/ml-llvm-project/model/POSET-RL/ -# Copy the generated onnx model from the above mentioned directory into /path/to/ml-llvm-project/config/posetrl -``` \ No newline at end of file +### Model Inference: [Refer to Model Training](../llvm/lib/Transforms/IPO/PosetRL/README.md) From 13588b38cfe6fec64a40177f884f7cbd9c52ca1e Mon Sep 17 00:00:00 2001 From: anik314159 <78022732+anik314159@users.noreply.github.com> Date: Sat, 17 Feb 2024 23:03:02 +0530 Subject: [PATCH 36/52] Update README.md Path to inference (cherry picked from commit 1ab1341a40b72b8757da468f8846c40bca5e31bd) --- model/POSET-RL/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/POSET-RL/README.md b/model/POSET-RL/README.md index 2e7ae77a4b8c..9af64e0f4f63 100644 --- a/model/POSET-RL/README.md +++ b/model/POSET-RL/README.md @@ -57,4 +57,4 @@ python inference.py --test-dir= --use_grpc --server ``` -### Model Inference: [Refer to Model Training](../llvm/lib/Transforms/IPO/PosetRL/README.md) +### Model Inference: [Refer to Model Training](ml-llvm-project/llvm/lib/Transforms/IPO/PosetRL/README.md) From 5dac4388d2f62db4d22ae0ed4d7d7a917f51bffa Mon Sep 17 00:00:00 2001 From: anik314159 <78022732+anik314159@users.noreply.github.com> Date: Sat, 17 Feb 2024 23:06:57 +0530 Subject: [PATCH 37/52] Update README.md (cherry picked from commit 5e70aa4b3652d06f1be815dd1f1d63d0409629e7) --- model/POSET-RL/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/POSET-RL/README.md b/model/POSET-RL/README.md index 9af64e0f4f63..a9c8e36d3368 100644 --- a/model/POSET-RL/README.md +++ b/model/POSET-RL/README.md @@ -57,4 +57,4 @@ python inference.py --test-dir= --use_grpc --server ``` -### Model Inference: [Refer to Model Training](ml-llvm-project/llvm/lib/Transforms/IPO/PosetRL/README.md) +### Model Inference: [Refer to Model Training](../../llvm/lib/Transforms/IPO/PosetRL/README.md ) From 0d3b1e7336b54e49a50205cd19a96d906fd45aee Mon Sep 17 00:00:00 2001 From: M V V S Manoj Kumar Date: Mon, 19 Feb 2024 10:13:17 +0530 Subject: [PATCH 38/52] ReadMe Updated Minor Changes to the Readme structure, added conted to explain what commands do (cherry picked from commit e15d6372eff5f20e4938b876de809cd6de402956) --- README.md | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/README.md b/README.md index 386c7ed5d9d2..a13d5b61799d 100644 --- a/README.md +++ b/README.md @@ -4,11 +4,30 @@ # ML LLVM Project ## Contents +<<<<<<< HEAD - About - Setup - Requirements - Build - All implemented Passes +======= +- [About](#about) +- [Setup](#setup) + - Requirements + - Building the Project + - Clone the Repository + - Setting up the build environment. + - Exporting ONNX Path Variables + - Conda env set-up + - A small hack to prevent the conda environtments from clashing (To Be removed) + - Cmake Command + - make Command +- [List of optimizations supported](#list-of-optimizations-supported) + - Reinforcement Learning assisted Loop Distribution for Locality and Vectorization + - RL4Real + - POSET-RL + +>>>>>>> e15d6372eff5... ReadMe Updated ## About @@ -26,19 +45,55 @@ enter about * Building GRPC from Source: Please follow [`Build GRPC with cmake`](https://grpc.io/docs/languages/cpp/quickstart/) **v1.34 (protobuf v3.13)** to build GRPC from source. * In the above tutorial setting `DCMAKE_INSTALL_PREFIX` is necessary as it would give you an easy way to uninstall GRPC later. * [ONNXRuntime](https://github.com/microsoft/onnxruntime/releases) v1.16.3 +<<<<<<< HEAD * TensorFlow - for TF Model Runner (AOT flow) # this should be in the yml only don't need to set it up separately +======= + + * The following commands will download ONNX Runtime v1.16.3 in your present working directory and then untar the contents. + The path for this will be used in this [section](#exporting-onnx-path-variables) +```bash + wget https://github.com/microsoft/onnxruntime/releases/download/v1.16.3/onnxruntime-linux-x64-1.16.3.tgz + tar -xvf onnxruntime-linux-x64-1.16.3.tgz +``` +* TensorFlow - for TF Model Runner (AOT flow) +>>>>>>> e15d6372eff5... ReadMe Updated * Tested with TensorFlow 2.13.0 * Other python requirements are available in [mlbridge.yml] # needs to be updated with sangamesh's.yml * Conda/Anaconda based virtual environment is assumed (Experiments are done on an Ubuntu 20.04 machine) +<<<<<<< HEAD Commands to install the conda evironment and set up onnx +======= +## Building the Project +The following section outlines the build process for our repository. + +### Clone the Repository +You need to clone the repository and initilize all the sub modules. The following commands would clone the Repository from github in your local and will initialize all submodules i.e clone the all the submodules within it. + +```bash +git clone git@github.com:IITH-Compilers/ml-llvm-project.git +cd ml-llvm-project +git checkout mlbridge-lib +git pull +git submodule update --init --recursive +``` + +#### Exporting ONNX Path Variables +As the name suggests this is the Path to the ONNX Runtime that we downloaded in [Setup](#setup) . The path of ONNX Runtime is required not only for building the project but also it is required when running inference using the ONNX Model Runner. Hence it is a better idea to export these paths and also add them to the PATH and LD_LIBRARY_PATH +>>>>>>> e15d6372eff5... ReadMe Updated ```bash #TODO: change this to what ever will be the location of the envs cp -r /Pramana/ML_LLVM_Tools/AE/envs/ ~/ +<<<<<<< HEAD +======= +#### Conda environment set-up +The following commands will help you install the and set up the nessesary conda environments. +```bash +>>>>>>> e15d6372eff5... ReadMe Updated # install the env using the following commands conda env create -f ~/env/LOF_original_env.yml conda env create -f ~/env/mlgo-new @@ -52,11 +107,15 @@ tar -xvf onnxruntime-linux-x64-1.16.3.tgz ``` +<<<<<<< HEAD ### Build Following are the requied steps to build the project, if you would like you could run them in a script too after changing the required parameters. +======= +#### A small hack to prevent the conda environtments from clashing (To Be removed) +>>>>>>> e15d6372eff5... ReadMe Updated ```bash # switch to mlgo-new env as you will need it to build the setup conda activate mlgo-new @@ -66,6 +125,7 @@ mv ~/anaconda3/envs/mlgo-new/lib/python3.10/site-packages/tensorflow/include/goo mv ~/anaconda3/envs/mlgo-new/include/google/ ~/anaconda3/envs/mlgo-new/include/google_new/ +<<<<<<< HEAD git clone git@github.com:IITH-Compilers/ml-llvm-project.git cd ml-llvm-project git checkout mlbridge-lib @@ -76,26 +136,57 @@ cd build # build command cmake -G "Unix Makefiles" -S ../llvm -B . \ +======= +#### Cmake Command +Now we need to create a build directory for our build. Use the following commands to make a build dir inside the cloned reposiotry + +```bash +# create a build dir and move to it +mkdir build +cd build +``` +After moving to the build directory, we'll use CMake to generate our build files and directories + +```bash +cmake -G "Unix Makefiles" -S ../llvm -B . \ +>>>>>>> e15d6372eff5... ReadMe Updated -DCMAKE_BUILD_TYPE="Release" \ -DLLVM_ENABLE_PROJECTS="clang;IR2Vec;ml-llvm-tools;mlir;MLCompilerBridge" \ -DLLVM_TARGETS_TO_BULID="X86" \ -DLLVM_ENABLE_ASSERTIONS=on \ -DCMAKE_EXPORT_COMPILE_COMMANDS=1 \ -DLLVM_CCACHE_BUILD=ON \ +<<<<<<< HEAD -DONNXRUNTIME_ROOTDIR= # change to your path where you wget the onnxruntime -DLLVM_TF_AOT_RUNTIME= # change to your path -DTENSORFLOW_AOT_PATH= # change to your path +======= + -DONNXRUNTIME_ROOTDIR= # path to your onnx runtime, use $ONNX_DIR if you already exported this environment variable \ + -DLLVM_TF_AOT_RUNTIME= # \ + -DTENSORFLOW_AOT_PATH= # \ +>>>>>>> e15d6372eff5... ReadMe Updated -DLLVM_INLINER_MODEL_PATH=download \ -DLLVM_INLINER_MODEL_CURRENT_URL=https://github.com/google/ml-compiler-opt/releases/download/inlining-Oz-v1.1/inlining-Oz-99f0063-v1.1.tar.gz \ -DLLVM_RAEVICT_MODEL_PATH=download \ -DLLVM_RAEVICT_MODEL_CURRENT_URL=https://github.com/google/ml-compiler-opt/releases/download/regalloc-evict-v1.0/regalloc-evict-e67430c-v1.0.tar.gz +<<<<<<< HEAD # don't make all +======= +#### Make command +After following the above steps you have successfully exproted all the required environment variables and have also created the Makefile which shall be used to build the project. Use the following command to start your build. +```bash +>>>>>>> e15d6372eff5... ReadMe Updated make clang opt -j50 ``` ## List of optimizations supported +This section will contain information about all the ML driven optimizations. Here is a brief about each optimization, and a simple onnx command which we can use to get one output (i.e give it an input .c/.cpp/.ll and get the optimized binary) . + +> [!TIP] +> if you'd like to see the LLVM IR that is resulted from these optimization , you can pass the appropriate flags to generate the .ll files + ### Reinforcement Learning assisted Loop Distribution for Locality and Vectorization We propose a Reinforcement Learning (RL) approach for loop distribution, optimizing for both vectorization and locality. Using SCC Dependence Graphs (SDGs), our RL model learns loop distribution order through topological walks. The reward is based on instruction cost and cache misses. We introduce a strategy to expand the training set by generating new loops. This method aims to enhance loop parallelization and improve overall code performance. @@ -130,7 +221,15 @@ to learn more head to the Pass specific readme [here]. #### Try it out ```bash +<<<<<<< HEAD # write your bash commands here +======= +./build/bin/opt \ + -poset-rl \ + -use-onnx \ + -ml-config-path= # path to your ml config \ + \ +>>>>>>> e15d6372eff5... ReadMe Updated ``` From e1d8b84300763e7ed3999aa2153b4d7ad2fc63b9 Mon Sep 17 00:00:00 2001 From: "S. VenkataKeerthy" <31350914+svkeerthy@users.noreply.github.com> Date: Wed, 28 Feb 2024 16:38:02 +0530 Subject: [PATCH 39/52] Minor updates to README.md (cherry picked from commit c3b6dd0d8e2ca926de91a2866d59d26b37eb3040) --- README.md | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a13d5b61799d..ac0c352a6e61 100644 --- a/README.md +++ b/README.md @@ -30,8 +30,24 @@ >>>>>>> e15d6372eff5... ReadMe Updated ## About +<<<<<<< HEAD enter about +======= +This GitHub repository encompasses the complete Compiler Infrastructure for ML-Driven Optimizations developed by the Compilers group at IITH. The repository integrates ML-driven optimization techniques into the LLVM project through the ML Compiler Bridge infrastructure and IR2Vec embeddings. + +We strongly encourage you to delve into this repository, explore its contents, and consider building additional tools leveraging the existing infrastructure. We presume you are fimiliar with LLVM and build upon that, but if you are not fimiliar with llvm them, here are a few resources that might help : + +* [Getting Started with LLVM](https://llvm.org/docs/GettingStarted.html#getting-started-with-llvm) +page for detailed information on configuring and compiling LLVM. You can visit +* [Directory Layout](https://llvm.org/docs/GettingStarted.html#directory-layout) +to learn about the layout of the source code tree. + +### ML Compiler Bridge +As a part of the [ML-Compiler-Bridge](https://arxiv.org/pdf/2311.10800.pdf), it is possible to have multiple ways of integrating compiler and the Machine learning model. These methods primarily use server client communication techniques like gRPC, and pipes. The ONNX flow which is capable of representation of ML models into DAG-based IRs with callable APIs in multiple langugages (C/C++/Python),does not require a server-client model or inter process communication. Additionally, TensorFlow's AOT compiled models are also supported for inference. + +> The Next 700 ML-Enabled Compiler Optimizations: S.VenkataKeerthy, Siddharth Jain, Umesh Kalvakuntla, Pranav Sai Gorantla, Rajiv Sailesh Chitale, Eugene Brevdo, Albert Cohen, Mircea Troffin, Ramakrishna Upadrasta +>>>>>>> c3b6dd0d8e2c... Minor updates to README.md ## Setup @@ -85,8 +101,18 @@ As the name suggests this is the Path to the ONNX Runtime that we downloaded in >>>>>>> e15d6372eff5... ReadMe Updated ```bash +<<<<<<< HEAD #TODO: change this to what ever will be the location of the envs cp -r /Pramana/ML_LLVM_Tools/AE/envs/ ~/ +======= + export ONNX_DIR= #path to your onnx runtime + export LD_LIBRARY_PATH=${ONNX_DIR}:$LD_LIBRARY_PATH + export LIBRARY_PATH=${ONNX_DIR}:$LIBRARY_PATH + export PATH=${ONNX_DIR}/include:$PATH +``` +> [!TIP] +> It is adviced to add these commands to your **~/.bashrc** as they'll be needed when you switch shells. +>>>>>>> c3b6dd0d8e2c... Minor updates to README.md <<<<<<< HEAD ======= @@ -193,7 +219,7 @@ We propose a Reinforcement Learning (RL) approach for loop distribution, optimiz #### Try it out !!! -> We assueme you have already done the setup and built the project. +> We assume you have already done the setup and built the project. ```bash # ONNX command for inference: From ebd0b5cd24907d17ad0ff53e750f4d408d89289f Mon Sep 17 00:00:00 2001 From: anik314159 <78022732+anik314159@users.noreply.github.com> Date: Wed, 28 Feb 2024 17:05:53 +0530 Subject: [PATCH 40/52] Update README.md (cherry picked from commit 4e2daba6ef8e2db8c28f30d011668406872afdc7) --- model/POSET-RL/README.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/model/POSET-RL/README.md b/model/POSET-RL/README.md index a9c8e36d3368..6653d8ef47ba 100644 --- a/model/POSET-RL/README.md +++ b/model/POSET-RL/README.md @@ -1,9 +1,7 @@ -# POSET-RL - # POSET-RL POSET-RL uses a reinforcement learning approach to find pass sequence (for optimal code size + execution time), as the search space of optimization sequences is too big to enumerate. For a compiler with `m` optimization passes, if the sequence length is fixed as `n`, then there can be potentially `mxn` combinations, allowing repetitions. -This repo contains the source code and relevant information described in the [paper](https://ieeexplore.ieee.org/abstract/document/9804673) ([arXiv](https://arxiv.org/abs/2208.04238), [slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf)). +This repo contains the source code and relevant information described in the [paper](https://ieeexplore.ieee.org/abstract/document/9804673) ([arXiv](https://arxiv.org/abs/2208.04238) ,[slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf)),ISPASS 2022. Please see [here](https://compilers.cse.iith.ac.in/projects/posetrl) for more details. > POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta @@ -11,9 +9,7 @@ Please see [here](https://compilers.cse.iith.ac.in/projects/posetrl) for more de ## Environment Setup -- Copy the environment `.yml` files from `/path/to/ml-llvm-project/model/POSET-RL/posetrl_env.yml` to the home directory - Setup the environment using the `.yml` using the following commands - ```bash conda env create -f posetrl_env.yml ``` From 3be9d12ab7ad453c78a31601b56499d2decca23d Mon Sep 17 00:00:00 2001 From: anik314159 <78022732+anik314159@users.noreply.github.com> Date: Wed, 28 Feb 2024 17:09:44 +0530 Subject: [PATCH 41/52] Update README.md (cherry picked from commit e7727edc089002f2ea9f23f1a6fe20f6728556d1) --- llvm/lib/Transforms/IPO/PosetRL/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/IPO/PosetRL/README.md b/llvm/lib/Transforms/IPO/PosetRL/README.md index 957e49ed9c95..9f56b173e5dc 100644 --- a/llvm/lib/Transforms/IPO/PosetRL/README.md +++ b/llvm/lib/Transforms/IPO/PosetRL/README.md @@ -1,7 +1,7 @@ # POSET-RL POSET-RL uses a reinforcement learning approach to find pass sequence (for optimal code size + execution time), as the search space of optimization sequences is too big to enumerate. For a compiler with `m` optimization passes, if the sequence length is fixed as `n`, then there can be potentially `mxn` combinations, allowing repetitions. -This repo contains the source code and relevant information described in the [paper](https://ieeexplore.ieee.org/abstract/document/9804673) ([arXiv](https://arxiv.org/abs/2208.04238), [slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf)). +This repo contains the source code and relevant information described in the [paper](https://ieeexplore.ieee.org/abstract/document/9804673) ([arXiv](https://arxiv.org/abs/2208.04238), [slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf)) ,ISPASS 2022. Please see [here](https://compilers.cse.iith.ac.in/projects/posetrl) for more details. > POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta From b67f083a9986092e250158368ae2a8b3a0d037c7 Mon Sep 17 00:00:00 2001 From: anik314159 <78022732+anik314159@users.noreply.github.com> Date: Wed, 28 Feb 2024 17:11:45 +0530 Subject: [PATCH 42/52] Update README.md (cherry picked from commit 8f352a69cfc011c0322abb352ebdfecfeffa99b6) --- llvm/lib/Transforms/IPO/PosetRL/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/IPO/PosetRL/README.md b/llvm/lib/Transforms/IPO/PosetRL/README.md index 9f56b173e5dc..a8c62c24fd2d 100644 --- a/llvm/lib/Transforms/IPO/PosetRL/README.md +++ b/llvm/lib/Transforms/IPO/PosetRL/README.md @@ -1,10 +1,10 @@ # POSET-RL POSET-RL uses a reinforcement learning approach to find pass sequence (for optimal code size + execution time), as the search space of optimization sequences is too big to enumerate. For a compiler with `m` optimization passes, if the sequence length is fixed as `n`, then there can be potentially `mxn` combinations, allowing repetitions. -This repo contains the source code and relevant information described in the [paper](https://ieeexplore.ieee.org/abstract/document/9804673) ([arXiv](https://arxiv.org/abs/2208.04238), [slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf)) ,ISPASS 2022. +This repo contains the source code and relevant information described in the [paper](https://ieeexplore.ieee.org/abstract/document/9804673) ([arXiv](https://arxiv.org/abs/2208.04238), [slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf)) Please see [here](https://compilers.cse.iith.ac.in/projects/posetrl) for more details. -> POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta +> POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta ,ISPASS 2022 # Environment setup Execute the following command for environment setup From 2908129f02f72a8a3048a19a2537204bf4665ebc Mon Sep 17 00:00:00 2001 From: anik314159 <78022732+anik314159@users.noreply.github.com> Date: Wed, 28 Feb 2024 17:12:36 +0530 Subject: [PATCH 43/52] Update README.md for Venue and Year of POSETRL (cherry picked from commit 6c0ddd923ead955e1856d15697fc1c82147bc2f1) --- model/POSET-RL/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/model/POSET-RL/README.md b/model/POSET-RL/README.md index 6653d8ef47ba..ca74dea193fb 100644 --- a/model/POSET-RL/README.md +++ b/model/POSET-RL/README.md @@ -1,10 +1,10 @@ # POSET-RL POSET-RL uses a reinforcement learning approach to find pass sequence (for optimal code size + execution time), as the search space of optimization sequences is too big to enumerate. For a compiler with `m` optimization passes, if the sequence length is fixed as `n`, then there can be potentially `mxn` combinations, allowing repetitions. -This repo contains the source code and relevant information described in the [paper](https://ieeexplore.ieee.org/abstract/document/9804673) ([arXiv](https://arxiv.org/abs/2208.04238) ,[slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf)),ISPASS 2022. +This repo contains the source code and relevant information described in the [paper](https://ieeexplore.ieee.org/abstract/document/9804673) ([arXiv](https://arxiv.org/abs/2208.04238) ,[slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf)). Please see [here](https://compilers.cse.iith.ac.in/projects/posetrl) for more details. -> POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta +> POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta, ISPASS 2022 ## Environment Setup From 08e847e0634e9c776d31643aa8ecb570a92c1398 Mon Sep 17 00:00:00 2001 From: anik314159 <78022732+anik314159@users.noreply.github.com> Date: Wed, 28 Feb 2024 19:15:47 +0530 Subject: [PATCH 44/52] Update README.md (cherry picked from commit 7dca81987b8a7bf6d332d8d1c85e21481a212669) --- model/POSET-RL/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/POSET-RL/README.md b/model/POSET-RL/README.md index ca74dea193fb..f549bbf5e95b 100644 --- a/model/POSET-RL/README.md +++ b/model/POSET-RL/README.md @@ -53,4 +53,4 @@ python inference.py --test-dir= --use_grpc --server ``` -### Model Inference: [Refer to Model Training](../../llvm/lib/Transforms/IPO/PosetRL/README.md ) +### Model Inference: [Refer to Model Inference](../../llvm/lib/Transforms/IPO/PosetRL/README.md ) From 5def3855e36c481b2382841e41200a02a821b558 Mon Sep 17 00:00:00 2001 From: VikasPatnala Date: Wed, 28 Feb 2024 21:15:08 +0530 Subject: [PATCH 45/52] Changed the llvm-mca.cpp to get the MCA throughput changed lines 220 and 466 (cherry picked from commit d83580ac9157cc5181b50888c78247de71ad780f) --- llvm/tools/llvm-mca/llvm-mca.cpp | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp index eb71cffba6dd..85f534b2cb8d 100644 --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -225,6 +225,7 @@ static cl::opt ShowBarriers( cl::desc("Print memory barrier information in the instruction info view"), cl::cat(ViewOptions), cl::init(false)); +<<<<<<< HEAD static cl::opt DisableCustomBehaviour( "disable-cb", cl::desc( @@ -236,6 +237,11 @@ static cl::opt DisableInstrumentManager( cl::desc("Disable instrumentation manager (use the default class which " "ignores instruments.)."), cl::cat(ViewOptions), cl::init(false)); +======= +static cl::opt + loopID("lc-lID", cl::Hidden, cl::Optional, + cl::desc("ID of the loop set by RDG/loop distribution pass"), cl::init(0)); +>>>>>>> d83580ac9157... Changed the llvm-mca.cpp to get the MCA throughput changed lines 220 and 466 namespace { @@ -563,7 +569,24 @@ int main(int argc, char **argv) { if (Region->empty()) continue; +<<<<<<< HEAD IB.clear(); +======= + // Don't print the header of this region if it is the default region, and + // it doesn't have an end location. + if (Region->startLoc().isValid() || Region->endLoc().isValid()) { + StringRef Desc = Region->getDescription(); + std::string DescToMatch = funcName + "-" + std::to_string(loopID); + // errs () << "funcName : " << funcName << " loop id : " << loopID << "\n"; + // errs () << Desc << " " << DescToMatch << "\n"; + if (loopID && !Desc.equals(DescToMatch)) + continue; + TOF->os() << "\n[" << RegionIdx++ << "] Code Region"; + if (!Desc.empty()) + TOF->os() << " - " << Desc; + TOF->os() << "\n\n"; + } +>>>>>>> d83580ac9157... Changed the llvm-mca.cpp to get the MCA throughput changed lines 220 and 466 // Lower the MCInst sequence into an mca::Instruction sequence. ArrayRef Insts = Region->getInstructions(); From d0c6bbd00871f175684c5130109bfbf187d08d8e Mon Sep 17 00:00:00 2001 From: M V V S Manoj Kumar Date: Wed, 28 Feb 2024 22:01:11 +0530 Subject: [PATCH 46/52] Read Me update updated references and few links to connect this readme to the other files (cherry picked from commit 1912ecab9aebf7a66653bce5c32c8593503d0b3b) --- README.md | 101 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 87 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index ac0c352a6e61..3184db66f7a2 100644 --- a/README.md +++ b/README.md @@ -13,19 +13,19 @@ ======= - [About](#about) - [Setup](#setup) - - Requirements - - Building the Project - - Clone the Repository - - Setting up the build environment. - - Exporting ONNX Path Variables - - Conda env set-up - - A small hack to prevent the conda environtments from clashing (To Be removed) - - Cmake Command - - make Command + - [Requirements](#requirements) + - [Building the Project](#building-the-project) + - [Clone the Repository](#clone-the-repository) + - [Setting up the build environment.](#setting-up-the-build-environment) + - [Exporting ONNX Path Variables](#exporting-onnx-path-variables) + - [Conda env set-up](#conda-environment-set-up) + - [A small hack to prevent the conda environtments from clashing (To Be removed)](#a-small-hack-to-prevent-the-conda-environtments-from-clashing-to-be-removed) + - [Cmake Command](#cmake-command) + - [make Command](#make-command) - [List of optimizations supported](#list-of-optimizations-supported) - - Reinforcement Learning assisted Loop Distribution for Locality and Vectorization - - RL4Real - - POSET-RL + - [Reinforcement Learning assisted Loop Distribution for Locality and Vectorization](#reinforcement-learning-assisted-loop-distribution-for-locality-and-vectorization) + - [RL4Real](#rl4real) + - [POSET-RL](#poset-rl) >>>>>>> e15d6372eff5... ReadMe Updated @@ -43,6 +43,11 @@ page for detailed information on configuring and compiling LLVM. You can visit * [Directory Layout](https://llvm.org/docs/GettingStarted.html#directory-layout) to learn about the layout of the source code tree. +### IR2Vec +[IR2Vec](https://arxiv.org/abs/1909.06228) is a LLVM IR based framework to generate distributed representations for the source code in an unsupervised manner, which can be used to represent programs as input to solve machine learning tasks that take programs as inputs. It can capture intrinsic characteristics of the program. This is achieved by using the flow analyses information like Use-Def, Reaching Definitions and Live Variable information of the program. + +>IR2Vec: LLVM IR based Scalable Program Embeddings : S. VenkataKeerthy, Rohit Aggarwal, Shalini Jain, Maunendra Sankar Desarkar, Ramakrishna Upadrasta, Y. N. Srikant. + ### ML Compiler Bridge As a part of the [ML-Compiler-Bridge](https://arxiv.org/pdf/2311.10800.pdf), it is possible to have multiple ways of integrating compiler and the Machine learning model. These methods primarily use server client communication techniques like gRPC, and pipes. The ONNX flow which is capable of representation of ML models into DAG-based IRs with callable APIs in multiple langugages (C/C++/Python),does not require a server-client model or inter process communication. Additionally, TensorFlow's AOT compiled models are also supported for inference. @@ -55,15 +60,31 @@ As a part of the [ML-Compiler-Bridge](https://arxiv.org/pdf/2311.10800.pdf), it * cmake (>= 3.10) * GNU Make (4.2.1) -* LLVM (10.X) - [src](https://github.com/llvm/llvm-project/tree/release/10.x), [release](https://releases.llvm.org/download.html#10.0.1) ## ask isn't it included with the repo * Python (3.10), C++17 * gRPC v1.34 and protobuf v3.13 - for gRPC Model Runner * Building GRPC from Source: Please follow [`Build GRPC with cmake`](https://grpc.io/docs/languages/cpp/quickstart/) **v1.34 (protobuf v3.13)** to build GRPC from source. * In the above tutorial setting `DCMAKE_INSTALL_PREFIX` is necessary as it would give you an easy way to uninstall GRPC later. +<<<<<<< HEAD * [ONNXRuntime](https://github.com/microsoft/onnxruntime/releases) v1.16.3 <<<<<<< HEAD * TensorFlow - for TF Model Runner (AOT flow) # this should be in the yml only don't need to set it up separately ======= +======= +> [!WARNING] +> The version of gRPC that you clone should be 1.34.0 not 1.34.x +* Eigen library (3.3.7) + * If your system already have Eigen (3.3.7) setup, you can skip this step. + * Download and extract the released version. +```bash + wget https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.tar.gz + tar -xvzf eigen-3.3.7.tar.gz + mkdir eigen-build && cd eigen-build + cmake ../eigen-3.3.7 && make + cd ../ +``` + +* [ONNXRuntime v1.16.3](https://github.com/microsoft/onnxruntime/releases) +>>>>>>> 1912ecab9aeb... Read Me update * The following commands will download ONNX Runtime v1.16.3 in your present working directory and then untar the contents. The path for this will be used in this [section](#exporting-onnx-path-variables) @@ -74,7 +95,11 @@ As a part of the [ML-Compiler-Bridge](https://arxiv.org/pdf/2311.10800.pdf), it * TensorFlow - for TF Model Runner (AOT flow) >>>>>>> e15d6372eff5... ReadMe Updated * Tested with TensorFlow 2.13.0 +<<<<<<< HEAD * Other python requirements are available in [mlbridge.yml] # needs to be updated with sangamesh's.yml +======= +* Other python requirements are available in [mlbridge.yml]() +>>>>>>> 1912ecab9aeb... Read Me update * Conda/Anaconda based virtual environment is assumed (Experiments are done on an Ubuntu 20.04 machine) @@ -96,6 +121,8 @@ git pull git submodule update --init --recursive ``` +### Setting up the build environment. + #### Exporting ONNX Path Variables As the name suggests this is the Path to the ONNX Runtime that we downloaded in [Setup](#setup) . The path of ONNX Runtime is required not only for building the project but also it is required when running inference using the ONNX Model Runner. Hence it is a better idea to export these paths and also add them to the PATH and LD_LIBRARY_PATH >>>>>>> e15d6372eff5... ReadMe Updated @@ -171,7 +198,7 @@ Now we need to create a build directory for our build. Use the following command mkdir build cd build ``` -After moving to the build directory, we'll use CMake to generate our build files and directories +After moving to the build directory, we'll use CMake to generate our build files and directories. Here we are using Makefiles, you may choose any generator of your choice. ```bash cmake -G "Unix Makefiles" -S ../llvm -B . \ @@ -190,6 +217,7 @@ cmake -G "Unix Makefiles" -S ../llvm -B . \ -DONNXRUNTIME_ROOTDIR= # path to your onnx runtime, use $ONNX_DIR if you already exported this environment variable \ -DLLVM_TF_AOT_RUNTIME= # \ -DTENSORFLOW_AOT_PATH= # \ +<<<<<<< HEAD >>>>>>> e15d6372eff5... ReadMe Updated -DLLVM_INLINER_MODEL_PATH=download \ -DLLVM_INLINER_MODEL_CURRENT_URL=https://github.com/google/ml-compiler-opt/releases/download/inlining-Oz-v1.1/inlining-Oz-99f0063-v1.1.tar.gz \ @@ -206,6 +234,17 @@ After following the above steps you have successfully exproted all the required >>>>>>> e15d6372eff5... ReadMe Updated make clang opt -j50 ``` +======= +``` + +#### Build command +After following the above steps, you have successfully exproted all the required environment variables and have also created the generator files which will be used to build the project. Use the following command to start your build. Example: +```bash +make clang opt -j $(nproc) +``` +> [!WARNING] +> For now building all targets is broken. Only build clang and opt +>>>>>>> 1912ecab9aeb... Read Me update ## List of optimizations supported This section will contain information about all the ML driven optimizations. Here is a brief about each optimization, and a simple onnx command which we can use to get one output (i.e give it an input .c/.cpp/.ll and get the optimized binary) . @@ -217,6 +256,16 @@ This section will contain information about all the ML driven optimizations. Her We propose a Reinforcement Learning (RL) approach for loop distribution, optimizing for both vectorization and locality. Using SCC Dependence Graphs (SDGs), our RL model learns loop distribution order through topological walks. The reward is based on instruction cost and cache misses. We introduce a strategy to expand the training set by generating new loops. This method aims to enhance loop parallelization and improve overall code performance. +<<<<<<< HEAD +======= +This is described in the paper [here](https://ieeexplore.ieee.org/abstract/document/10026979) . +Please see [here](https://compilers.cse.iith.ac.in/publications/rl_loop_distribution/) for more details. + +> Reinforcement Learning assisted Loop Distribution for Locality and Vectorization, Shalini Jain, S. VenkataKeerthy, Rohit Aggarwal, Tharun Kumar Dangeti, Dibyendu Das, Ramakrishna Upadrasta + +Implimentaion here : [Model Training](./model/LoopDistribution/src/Readme.md) , [Inference](./llvm/lib/Transforms/Scalar/IR2Vec-LOF/custom_loop_distribution/Readme.md) + +>>>>>>> 1912ecab9aeb... Read Me update #### Try it out !!! > We assume you have already done the setup and built the project. @@ -234,7 +283,19 @@ to learn more head to the Pass specific readme [here]. ### RL4Real +<<<<<<< HEAD <\write info here\> +======= +`RL4ReAl` is a retargetable Reinforcement Learning (RL) approach for solving the REgister ALlocation (REAL) problem on diverse architectures. + +This is described in the paper [here](https://dl.acm.org/doi/abs/10.1145/3578360.3580273). +Please see [here](https://compilers.cse.iith.ac.in/publications/rl4real/) for more details. +>>>>>>> 1912ecab9aeb... Read Me update + +>RL4ReAl: Reinforcement Learning for Register Allocation : S. VenkataKeerthy, Siddharth Jain, Anilava Kundu, Rohit Aggarwal, Albert Cohen, Ramakrishna Upadrasta LLVM-HPC, 2022. + +Implimentaion here : [Model Training](./model/RL4ReAl/README.md) , [Inference](./llvm/lib/CodeGen/MLRegAlloc/README.md) + #### Try it out ```bash @@ -243,7 +304,19 @@ to learn more head to the Pass specific readme [here]. ### POSET-RL +<<<<<<< HEAD <\write info here\> +======= +POSET-RL uses a reinforcement learning approach as the search space of optimization sequences is too big to enumerate. For a compiler with m optimization passes, if the sequence length is fixed as n, then there can be potentially mn combinations, allowing repetitions. The reinforcement learning model is trained and evaluated on programs that are represented using IR2Vec embeddings. + +This is described in the paper ([arXiv](https://arxiv.org/abs/2204.02013)). +Please see [slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf) for more details. + +Implimentaion here : [Model Training](./model/POSET-RL/README.md) , [Inference](./llvm/lib/Transforms/IPO/PosetRL/README.md) + + +> POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta, ISSPASS, 2022 +>>>>>>> 1912ecab9aeb... Read Me update #### Try it out ```bash From 237935d9def590f29966bcfd4bf8a3585e4f1dff Mon Sep 17 00:00:00 2001 From: M V V S Manoj Kumar Date: Wed, 28 Feb 2024 22:21:12 +0530 Subject: [PATCH 47/52] Updated Readme Brought the branch up to date to include all the readmes and have added the changes requested. (cherry picked from commit 2a48b0c91d4ed3c35820bde3ff328c9fc81a0fd2) --- README.md | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 3184db66f7a2..cf5817035ff2 100644 --- a/README.md +++ b/README.md @@ -96,10 +96,14 @@ As a part of the [ML-Compiler-Bridge](https://arxiv.org/pdf/2311.10800.pdf), it >>>>>>> e15d6372eff5... ReadMe Updated * Tested with TensorFlow 2.13.0 <<<<<<< HEAD +<<<<<<< HEAD * Other python requirements are available in [mlbridge.yml] # needs to be updated with sangamesh's.yml ======= * Other python requirements are available in [mlbridge.yml]() >>>>>>> 1912ecab9aeb... Read Me update +======= +* Other python requirements are available in [mlbridge.yml](./mlopt.yml) +>>>>>>> 2a48b0c91d4e... Updated Readme * Conda/Anaconda based virtual environment is assumed (Experiments are done on an Ubuntu 20.04 machine) @@ -148,6 +152,7 @@ The following commands will help you install the and set up the nessesary conda ```bash >>>>>>> e15d6372eff5... ReadMe Updated # install the env using the following commands +<<<<<<< HEAD conda env create -f ~/env/LOF_original_env.yml conda env create -f ~/env/mlgo-new @@ -157,6 +162,9 @@ tar -xvf onnxruntime-linux-x64-1.16.3.tgz # get GRPC working # check GRPC version # check again it should be exactly 1.34.0 not 1.34.x +======= +conda env create -f ./mlopt.yml +>>>>>>> 2a48b0c91d4e... Updated Readme ``` @@ -279,7 +287,6 @@ Implimentaion here : [Model Training](./model/LoopDistribution/src/Readme.md) , -ml-config-path=/home/intern24007/ml-llvm-project/config \ ``` -to learn more head to the Pass specific readme [here]. ### RL4Real @@ -296,8 +303,7 @@ Please see [here](https://compilers.cse.iith.ac.in/publications/rl4real/) for mo Implimentaion here : [Model Training](./model/RL4ReAl/README.md) , [Inference](./llvm/lib/CodeGen/MLRegAlloc/README.md) - -#### Try it out +#### Try it out !!! ```bash # write your bash commands here ``` @@ -312,13 +318,12 @@ POSET-RL uses a reinforcement learning approach as the search space of optimizat This is described in the paper ([arXiv](https://arxiv.org/abs/2204.02013)). Please see [slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf) for more details. -Implimentaion here : [Model Training](./model/POSET-RL/README.md) , [Inference](./llvm/lib/Transforms/IPO/PosetRL/README.md) - - > POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta, ISSPASS, 2022 >>>>>>> 1912ecab9aeb... Read Me update -#### Try it out +Implimentaion here : [Model Training](./model/POSET-RL/README.md) , [Inference](./llvm/lib/Transforms/IPO/PosetRL/README.md) + +#### Try it out !!! ```bash <<<<<<< HEAD # write your bash commands here From 5ec41708486b931260d34027a2f3aeeba0a20797 Mon Sep 17 00:00:00 2001 From: M V V S Manoj Kumar Date: Wed, 28 Feb 2024 22:27:36 +0530 Subject: [PATCH 48/52] Changed Env Names changed env names and added changes to readme (cherry picked from commit 9cb366a5644dd9c22e8998208cf7a57313e679df) --- README.md | 5 + mlopt.yml | 480 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 485 insertions(+) create mode 100644 mlopt.yml diff --git a/README.md b/README.md index cf5817035ff2..b68e0316b069 100644 --- a/README.md +++ b/README.md @@ -166,6 +166,11 @@ tar -xvf onnxruntime-linux-x64-1.16.3.tgz conda env create -f ./mlopt.yml >>>>>>> 2a48b0c91d4e... Updated Readme +<<<<<<< HEAD +======= +# switch to mlgo-new env which would be required for the build process +conda activate mlopt +>>>>>>> 9cb366a5644d... Changed Env Names ``` <<<<<<< HEAD diff --git a/mlopt.yml b/mlopt.yml new file mode 100644 index 000000000000..418e1f656a9d --- /dev/null +++ b/mlopt.yml @@ -0,0 +1,480 @@ +name: mlopt +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - abseil-cpp=20211102.0=hd4dd3e8_0 + - aiobotocore=2.5.0=py310h06a4308_0 + - aiofiles=22.1.0=py310h06a4308_0 + - aiohttp=3.8.3=py310h5eee18b_0 + - aioitertools=0.7.1=pyhd3eb1b0_0 + - aiosignal=1.2.0=pyhd3eb1b0_0 + - aiosqlite=0.18.0=py310h06a4308_0 + - alabaster=0.7.12=pyhd3eb1b0_0 + - anaconda=2023.07=py310_1 + - anyio=3.5.0=py310h06a4308_0 + - appdirs=1.4.4=pyhd3eb1b0_0 + - argon2-cffi=21.3.0=pyhd3eb1b0_0 + - argon2-cffi-bindings=21.2.0=py310h7f8727e_0 + - arrow=1.2.3=py310h06a4308_1 + - arrow-cpp=11.0.0=py310h7516544_0 + - astroid=2.14.2=py310h06a4308_0 + - astropy=5.1=py310ha9d4c09_0 + - asttokens=2.0.5=pyhd3eb1b0_0 + - async-timeout=4.0.2=py310h06a4308_0 + - atomicwrites=1.4.0=py_0 + - attrs=22.1.0=py310h06a4308_0 + - automat=20.2.0=py_0 + - autopep8=1.6.0=pyhd3eb1b0_1 + - aws-c-common=0.4.57=he6710b0_1 + - aws-c-event-stream=0.1.6=h2531618_5 + - aws-checksums=0.1.9=he6710b0_0 + - aws-sdk-cpp=1.8.185=hce553d0_0 + - babel=2.11.0=py310h06a4308_0 + - backcall=0.2.0=pyhd3eb1b0_0 + - bcrypt=3.2.0=py310h5eee18b_1 + - beautifulsoup4=4.12.2=py310h06a4308_0 + - binaryornot=0.4.4=pyhd3eb1b0_1 + - black=23.3.0=py310h06a4308_0 + - blas=1.0=mkl + - bleach=4.1.0=pyhd3eb1b0_0 + - blosc=1.21.3=h6a678d5_0 + - bokeh=3.2.1=py310h2f386ee_0 + - boost-cpp=1.73.0=h7f8727e_12 + - botocore=1.29.76=py310h06a4308_0 + - bottleneck=1.3.5=py310ha9d4c09_0 + - brotli=1.0.9=h5eee18b_7 + - brotli-bin=1.0.9=h5eee18b_7 + - brotlipy=0.7.0=py310h7f8727e_1002 + - brunsli=0.1=h2531618_0 + - bzip2=1.0.8=h7b6447c_0 + - c-ares=1.19.0=h5eee18b_0 + - c-blosc2=2.8.0=h6a678d5_0 + - ca-certificates=2023.05.30=h06a4308_0 + - certifi=2023.7.22=py310h06a4308_0 + - cffi=1.15.1=py310h5eee18b_3 + - cfitsio=3.470=h5893167_7 + - chardet=4.0.0=py310h06a4308_1003 + - charls=2.2.0=h2531618_0 + - charset-normalizer=2.0.4=pyhd3eb1b0_0 + - click=8.0.4=py310h06a4308_0 + - cloudpickle=2.2.1=py310h06a4308_0 + - colorama=0.4.6=py310h06a4308_0 + - colorcet=3.0.1=py310h06a4308_0 + - comm=0.1.2=py310h06a4308_0 + - constantly=15.1.0=py310h06a4308_0 + - contourpy=1.0.5=py310hdb19cb5_0 + - cookiecutter=1.7.3=pyhd3eb1b0_0 + - cryptography=41.0.2=py310h774aba0_0 + - cssselect=1.1.0=pyhd3eb1b0_0 + - curl=8.1.1=h37d81fd_2 + - cycler=0.11.0=pyhd3eb1b0_0 + - cytoolz=0.12.0=py310h5eee18b_0 + - daal4py=2023.1.1=py310h3c18c91_0 + - dal=2023.1.1=hdb19cb5_48679 + - dask=2023.6.0=py310h06a4308_0 + - dask-core=2023.6.0=py310h06a4308_0 + - datasets=2.12.0=py310h06a4308_0 + - datashader=0.15.1=py310h06a4308_0 + - datashape=0.5.4=py310h06a4308_1 + - dbus=1.13.18=hb2f20db_0 + - debugpy=1.6.7=py310h6a678d5_0 + - decorator=5.1.1=pyhd3eb1b0_0 + - defusedxml=0.7.1=pyhd3eb1b0_0 + - diff-match-patch=20200713=pyhd3eb1b0_0 + - dill=0.3.6=py310h06a4308_0 + - distributed=2023.6.0=py310h06a4308_0 + - docstring-to-markdown=0.11=py310h06a4308_0 + - docutils=0.18.1=py310h06a4308_3 + - entrypoints=0.4=py310h06a4308_0 + - et_xmlfile=1.1.0=py310h06a4308_0 + - exceptiongroup=1.0.4=py310h06a4308_0 + - executing=0.8.3=pyhd3eb1b0_0 + - expat=2.4.9=h6a678d5_0 + - filelock=3.9.0=py310h06a4308_0 + - flake8=6.0.0=py310h06a4308_0 + - flask=2.2.2=py310h06a4308_0 + - fontconfig=2.14.1=h52c9d5c_1 + - fonttools=4.25.0=pyhd3eb1b0_0 + - freetype=2.12.1=h4a9f257_0 + - frozenlist=1.3.3=py310h5eee18b_0 + - fsspec=2023.4.0=py310h06a4308_0 + - gensim=4.3.0=py310h1128e8f_0 + - gflags=2.2.2=he6710b0_0 + - giflib=5.2.1=h5eee18b_3 + - glib=2.69.1=he621ea3_2 + - glog=0.5.0=h2531618_0 + - gmp=6.2.1=h295c915_3 + - gmpy2=2.1.2=py310heeb90bb_0 + - greenlet=2.0.1=py310h6a678d5_0 + - grpc-cpp=1.46.1=h33aed49_1 + - gst-plugins-base=1.14.1=h6a678d5_1 + - gstreamer=1.14.1=h5eee18b_1 + - h5py=3.7.0=py310he06866b_0 + - hdf5=1.10.6=h3ffc7dd_1 + - heapdict=1.0.1=pyhd3eb1b0_0 + - holoviews=1.17.0=py310h06a4308_0 + - huggingface_hub=0.15.1=py310h06a4308_0 + - hvplot=0.8.4=py310h06a4308_0 + - hyperlink=21.0.0=pyhd3eb1b0_0 + - icu=58.2=he6710b0_3 + - idna=3.4=py310h06a4308_0 + - imagecodecs=2021.8.26=py310h46e8fbd_2 + - imageio=2.31.1=py310h06a4308_0 + - imagesize=1.4.1=py310h06a4308_0 + - imbalanced-learn=0.10.1=py310h06a4308_1 + - importlib-metadata=6.0.0=py310h06a4308_0 + - importlib_metadata=6.0.0=hd3eb1b0_0 + - incremental=21.3.0=pyhd3eb1b0_0 + - inflection=0.5.1=py310h06a4308_0 + - iniconfig=1.1.1=pyhd3eb1b0_0 + - intake=0.6.8=py310h06a4308_0 + - intel-openmp=2023.1.0=hdb19cb5_46305 + - intervaltree=3.1.0=pyhd3eb1b0_0 + - ipykernel=6.19.2=py310h2f386ee_0 + - ipython=8.12.0=py310h06a4308_0 + - ipython_genutils=0.2.0=pyhd3eb1b0_1 + - ipywidgets=8.0.4=py310h06a4308_0 + - isort=5.9.3=pyhd3eb1b0_0 + - itemadapter=0.3.0=pyhd3eb1b0_0 + - itemloaders=1.0.4=pyhd3eb1b0_1 + - itsdangerous=2.0.1=pyhd3eb1b0_0 + - jaraco.classes=3.2.1=pyhd3eb1b0_0 + - jedi=0.18.1=py310h06a4308_1 + - jeepney=0.7.1=pyhd3eb1b0_0 + - jellyfish=0.9.0=py310h7f8727e_0 + - jinja2=3.1.2=py310h06a4308_0 + - jinja2-time=0.2.0=pyhd3eb1b0_3 + - jmespath=0.10.0=pyhd3eb1b0_0 + - joblib=1.2.0=py310h06a4308_0 + - jpeg=9e=h5eee18b_1 + - jq=1.6=h27cfd23_1000 + - json5=0.9.6=pyhd3eb1b0_0 + - jsonschema=4.17.3=py310h06a4308_0 + - jupyter=1.0.0=py310h06a4308_8 + - jupyter_client=7.4.9=py310h06a4308_0 + - jupyter_console=6.6.3=py310h06a4308_0 + - jupyter_core=5.3.0=py310h06a4308_0 + - jupyter_events=0.6.3=py310h06a4308_0 + - jupyter_server=1.23.4=py310h06a4308_0 + - jupyter_server_fileid=0.9.0=py310h06a4308_0 + - jupyter_server_ydoc=0.8.0=py310h06a4308_1 + - jupyter_ydoc=0.2.4=py310h06a4308_0 + - jupyterlab=3.6.3=py310h06a4308_0 + - jupyterlab_pygments=0.1.2=py_0 + - jupyterlab_server=2.22.0=py310h06a4308_0 + - jupyterlab_widgets=3.0.5=py310h06a4308_0 + - jxrlib=1.1=h7b6447c_2 + - keyring=23.13.1=py310h06a4308_0 + - kiwisolver=1.4.4=py310h6a678d5_0 + - krb5=1.20.1=h568e23c_1 + - lazy-object-proxy=1.6.0=py310h7f8727e_0 + - lazy_loader=0.2=py310h06a4308_0 + - lcms2=2.12=h3be6417_0 + - ld_impl_linux-64=2.38=h1181459_1 + - lerc=3.0=h295c915_0 + - libaec=1.0.4=he6710b0_1 + - libboost=1.73.0=h28710b8_12 + - libbrotlicommon=1.0.9=h5eee18b_7 + - libbrotlidec=1.0.9=h5eee18b_7 + - libbrotlienc=1.0.9=h5eee18b_7 + - libcurl=8.1.1=h91b91d3_2 + - libdeflate=1.17=h5eee18b_0 + - libedit=3.1.20221030=h5eee18b_0 + - libev=4.33=h7f8727e_1 + - libevent=2.1.12=h8f2d780_0 + - libffi=3.4.4=h6a678d5_0 + - libgcc-ng=11.2.0=h1234567_1 + - libgfortran-ng=11.2.0=h00389a5_1 + - libgfortran5=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libllvm10=10.0.1=hbcb73fb_5 + - libllvm14=14.0.6=hdb19cb5_3 + - libnghttp2=1.52.0=ha637b67_1 + - libpng=1.6.39=h5eee18b_0 + - libpq=12.15=h37d81fd_1 + - libprotobuf=3.20.3=he621ea3_0 + - libsodium=1.0.18=h7b6447c_0 + - libspatialindex=1.9.3=h2531618_0 + - libssh2=1.10.0=h37d81fd_2 + - libstdcxx-ng=11.2.0=h1234567_1 + - libthrift=0.15.0=h0d84882_2 + - libtiff=4.5.0=h6a678d5_2 + - libuuid=1.41.5=h5eee18b_0 + - libwebp=1.2.4=h11a3e52_1 + - libwebp-base=1.2.4=h5eee18b_1 + - libxcb=1.15=h7f8727e_0 + - libxkbcommon=1.0.1=hfa300c1_0 + - libxml2=2.9.14=h74e7548_0 + - libxslt=1.1.35=h4e12654_0 + - libzopfli=1.0.3=he6710b0_0 + - linkify-it-py=2.0.0=py310h06a4308_0 + - llvmlite=0.40.0=py310he621ea3_0 + - locket=1.0.0=py310h06a4308_0 + - lxml=4.9.1=py310h1edc446_0 + - lz4=4.3.2=py310h5eee18b_0 + - lz4-c=1.9.4=h6a678d5_0 + - lzo=2.10=h7b6447c_2 + - markdown=3.4.1=py310h06a4308_0 + - markdown-it-py=2.2.0=py310h06a4308_1 + - markupsafe=2.1.1=py310h7f8727e_0 + - matplotlib=3.7.1=py310h06a4308_1 + - matplotlib-base=3.7.1=py310h1128e8f_1 + - matplotlib-inline=0.1.6=py310h06a4308_0 + - mccabe=0.7.0=pyhd3eb1b0_0 + - mdit-py-plugins=0.3.0=py310h06a4308_0 + - mdurl=0.1.0=py310h06a4308_0 + - mistune=0.8.4=py310h7f8727e_1000 + - mkl=2023.1.0=h6d00ec8_46342 + - mkl-service=2.4.0=py310h5eee18b_1 + - mkl_fft=1.3.6=py310h1128e8f_1 + - mkl_random=1.2.2=py310h1128e8f_1 + - more-itertools=8.12.0=pyhd3eb1b0_0 + - mpc=1.1.0=h10f8cd9_1 + - mpfr=4.0.2=hb69a4c5_1 + - mpi=1.0=mpich + - mpich=4.1.1=hbae89fd_0 + - mpmath=1.3.0=py310h06a4308_0 + - msgpack-python=1.0.3=py310hd09550d_0 + - multidict=6.0.2=py310h5eee18b_0 + - multipledispatch=0.6.0=py310h06a4308_0 + - multiprocess=0.70.14=py310h06a4308_0 + - munkres=1.1.4=py_0 + - mypy_extensions=0.4.3=py310h06a4308_0 + - nbclassic=0.5.5=py310h06a4308_0 + - nbclient=0.5.13=py310h06a4308_0 + - nbconvert=6.5.4=py310h06a4308_0 + - nbformat=5.7.0=py310h06a4308_0 + - ncurses=6.4=h6a678d5_0 + - nest-asyncio=1.5.6=py310h06a4308_0 + - networkx=3.1=py310h06a4308_0 + - ninja=1.10.2=h06a4308_5 + - ninja-base=1.10.2=hd09550d_5 + - nltk=3.8.1=py310h06a4308_0 + - notebook=6.5.4=py310h06a4308_1 + - notebook-shim=0.2.2=py310h06a4308_0 + - nspr=4.35=h6a678d5_0 + - nss=3.89.1=h6a678d5_0 + - numba=0.57.0=py310h1128e8f_0 + - numexpr=2.8.4=py310h85018f9_1 + - numpy=1.24.3=py310h5f9d8c6_1 + - numpy-base=1.24.3=py310hb5e798b_1 + - numpydoc=1.5.0=py310h06a4308_0 + - oniguruma=6.9.7.1=h27cfd23_0 + - openjpeg=2.4.0=h3ad879b_0 + - openpyxl=3.0.10=py310h5eee18b_0 + - openssl=1.1.1u=h7f8727e_0 + - orc=1.7.4=hb3bc3d3_1 + - packaging=23.0=py310h06a4308_0 + - pandas=1.5.3=py310h1128e8f_0 + - pandocfilters=1.5.0=pyhd3eb1b0_0 + - panel=1.2.1=py310h06a4308_0 + - param=1.13.0=py310h06a4308_0 + - parsel=1.6.0=py310h06a4308_0 + - parso=0.8.3=pyhd3eb1b0_0 + - partd=1.2.0=pyhd3eb1b0_1 + - pathspec=0.10.3=py310h06a4308_0 + - patsy=0.5.3=py310h06a4308_0 + - pcre=8.45=h295c915_0 + - pep8=1.7.1=py310h06a4308_1 + - pexpect=4.8.0=pyhd3eb1b0_3 + - pickleshare=0.7.5=pyhd3eb1b0_1003 + - pillow=9.4.0=py310h6a678d5_0 + - pip=23.2.1=py310h06a4308_0 + - platformdirs=2.5.2=py310h06a4308_0 + - plotly=5.9.0=py310h06a4308_0 + - pluggy=1.0.0=py310h06a4308_1 + - ply=3.11=py310h06a4308_0 + - pooch=1.4.0=pyhd3eb1b0_0 + - poyo=0.5.0=pyhd3eb1b0_0 + - prometheus_client=0.14.1=py310h06a4308_0 + - prompt-toolkit=3.0.36=py310h06a4308_0 + - prompt_toolkit=3.0.36=hd3eb1b0_0 + - protego=0.1.16=py_0 + - psutil=5.9.0=py310h5eee18b_0 + - ptyprocess=0.7.0=pyhd3eb1b0_2 + - pure_eval=0.2.2=pyhd3eb1b0_0 + - py-cpuinfo=8.0.0=pyhd3eb1b0_1 + - pyarrow=11.0.0=py310h468efa6_0 + - pyasn1=0.4.8=pyhd3eb1b0_0 + - pyasn1-modules=0.2.8=py_0 + - pycodestyle=2.10.0=py310h06a4308_0 + - pycparser=2.21=pyhd3eb1b0_0 + - pyct=0.5.0=py310h06a4308_0 + - pycurl=7.45.2=py310h37d81fd_0 + - pydispatcher=2.0.5=py310h06a4308_2 + - pydocstyle=6.3.0=py310h06a4308_0 + - pyerfa=2.0.0=py310h7f8727e_0 + - pyflakes=3.0.1=py310h06a4308_0 + - pygments=2.15.1=py310h06a4308_1 + - pylint=2.16.2=py310h06a4308_0 + - pylint-venv=2.3.0=py310h06a4308_0 + - pyls-spyder=0.4.0=pyhd3eb1b0_0 + - pyodbc=4.0.34=py310h6a678d5_0 + - pyopenssl=23.2.0=py310h06a4308_0 + - pyparsing=3.0.9=py310h06a4308_0 + - pyqt=5.15.7=py310h6a678d5_1 + - pyqtwebengine=5.15.7=py310h6a678d5_1 + - pyrsistent=0.18.0=py310h7f8727e_0 + - pysocks=1.7.1=py310h06a4308_0 + - pytables=3.8.0=py310h43249b6_2 + - pytest=7.4.0=py310h06a4308_0 + - python=3.10.12=h7a1cb2a_0 + - python-dateutil=2.8.2=pyhd3eb1b0_0 + - python-fastjsonschema=2.16.2=py310h06a4308_0 + - python-json-logger=2.0.7=py310h06a4308_0 + - python-lmdb=1.4.1=py310h6a678d5_0 + - python-lsp-black=1.2.1=py310h06a4308_0 + - python-lsp-jsonrpc=1.0.0=pyhd3eb1b0_0 + - python-lsp-server=1.7.2=py310h06a4308_0 + - python-slugify=5.0.2=pyhd3eb1b0_0 + - python-snappy=0.6.1=py310h6a678d5_0 + - python-xxhash=2.0.2=py310h5eee18b_1 + - pytoolconfig=1.2.5=py310h06a4308_1 + - pytorch=2.0.1=cpu_py310hdc00b08_0 + - pytz=2022.7=py310h06a4308_0 + - pyviz_comms=2.3.0=py310h06a4308_0 + - pywavelets=1.4.1=py310h5eee18b_0 + - pyxdg=0.27=pyhd3eb1b0_0 + - pyyaml=6.0=py310h5eee18b_1 + - pyzmq=23.2.0=py310h6a678d5_0 + - qdarkstyle=3.0.2=pyhd3eb1b0_0 + - qstylizer=0.2.2=py310h06a4308_0 + - qt-main=5.15.2=h327a75a_7 + - qt-webengine=5.15.9=hd2b0992_4 + - qtawesome=1.2.2=py310h06a4308_0 + - qtconsole=5.4.2=py310h06a4308_0 + - qtpy=2.2.0=py310h06a4308_0 + - qtwebkit=5.212=h4eab89a_4 + - queuelib=1.5.0=py310h06a4308_0 + - re2=2022.04.01=h295c915_0 + - readline=8.2=h5eee18b_0 + - regex=2022.7.9=py310h5eee18b_0 + - requests=2.31.0=py310h06a4308_0 + - requests-file=1.5.1=pyhd3eb1b0_0 + - responses=0.13.3=pyhd3eb1b0_0 + - rfc3339-validator=0.1.4=py310h06a4308_0 + - rfc3986-validator=0.1.1=py310h06a4308_0 + - rope=1.7.0=py310h06a4308_0 + - rtree=1.0.1=py310h06a4308_0 + - s3fs=2023.4.0=py310h06a4308_0 + - sacremoses=0.0.43=pyhd3eb1b0_0 + - scikit-image=0.20.0=py310h6a678d5_0 + - scikit-learn=1.3.0=py310h1128e8f_0 + - scikit-learn-intelex=2023.1.1=py310h06a4308_0 + - scipy=1.10.1=py310h5f9d8c6_1 + - scrapy=2.8.0=py310h06a4308_0 + - seaborn=0.12.2=py310h06a4308_0 + - secretstorage=3.3.1=py310h06a4308_1 + - send2trash=1.8.0=pyhd3eb1b0_1 + - service_identity=18.1.0=pyhd3eb1b0_1 + - setuptools=68.0.0=py310h06a4308_0 + - sip=6.6.2=py310h6a678d5_0 + - six=1.16.0=pyhd3eb1b0_1 + - smart_open=5.2.1=py310h06a4308_0 + - snappy=1.1.9=h295c915_0 + - sniffio=1.2.0=py310h06a4308_1 + - snowballstemmer=2.2.0=pyhd3eb1b0_0 + - sortedcontainers=2.4.0=pyhd3eb1b0_0 + - soupsieve=2.4=py310h06a4308_0 + - sphinx=5.0.2=py310h06a4308_0 + - sphinxcontrib-applehelp=1.0.2=pyhd3eb1b0_0 + - sphinxcontrib-devhelp=1.0.2=pyhd3eb1b0_0 + - sphinxcontrib-htmlhelp=2.0.0=pyhd3eb1b0_0 + - sphinxcontrib-jsmath=1.0.1=pyhd3eb1b0_0 + - sphinxcontrib-qthelp=1.0.3=pyhd3eb1b0_0 + - sphinxcontrib-serializinghtml=1.1.5=pyhd3eb1b0_0 + - spyder=5.4.3=py310h06a4308_1 + - spyder-kernels=2.4.3=py310h06a4308_0 + - sqlalchemy=1.4.39=py310h5eee18b_0 + - sqlite=3.41.2=h5eee18b_0 + - stack_data=0.2.0=pyhd3eb1b0_0 + - statsmodels=0.14.0=py310ha9d4c09_0 + - sympy=1.11.1=py310h06a4308_0 + - tabulate=0.8.10=py310h06a4308_0 + - tbb=2021.8.0=hdb19cb5_0 + - tbb4py=2021.8.0=py310hdb19cb5_0 + - tblib=1.7.0=pyhd3eb1b0_0 + - tenacity=8.2.2=py310h06a4308_0 + - terminado=0.17.1=py310h06a4308_0 + - text-unidecode=1.3=pyhd3eb1b0_0 + - textdistance=4.2.1=pyhd3eb1b0_0 + - threadpoolctl=2.2.0=pyh0d69192_0 + - three-merge=0.1.1=pyhd3eb1b0_0 + - tifffile=2021.7.2=pyhd3eb1b0_2 + - tinycss2=1.2.1=py310h06a4308_0 + - tk=8.6.12=h1ccaba5_0 + - tldextract=3.2.0=pyhd3eb1b0_0 + - tokenizers=0.13.2=py310he7d60b5_1 + - toml=0.10.2=pyhd3eb1b0_0 + - tomli=2.0.1=py310h06a4308_0 + - tomlkit=0.11.1=py310h06a4308_0 + - toolz=0.12.0=py310h06a4308_0 + - tornado=6.3.2=py310h5eee18b_0 + - tqdm=4.65.0=py310h2f386ee_0 + - traitlets=5.7.1=py310h06a4308_0 + - transformers=4.29.2=py310h06a4308_0 + - twisted=22.10.0=py310h5eee18b_0 + - tzdata=2023c=h04d1e81_0 + - uc-micro-py=1.0.1=py310h06a4308_0 + - ujson=5.4.0=py310h6a678d5_0 + - unidecode=1.2.0=pyhd3eb1b0_0 + - unixodbc=2.3.11=h5eee18b_0 + - urllib3=1.26.16=py310h06a4308_0 + - utf8proc=2.6.1=h27cfd23_0 + - w3lib=1.21.0=pyhd3eb1b0_0 + - watchdog=2.1.6=py310h06a4308_0 + - wcwidth=0.2.5=pyhd3eb1b0_0 + - webencodings=0.5.1=py310h06a4308_1 + - websocket-client=0.58.0=py310h06a4308_4 + - werkzeug=2.2.3=py310h06a4308_0 + - whatthepatch=1.0.2=py310h06a4308_0 + - wheel=0.38.4=py310h06a4308_0 + - widgetsnbextension=4.0.5=py310h06a4308_0 + - wrapt=1.14.1=py310h5eee18b_0 + - wurlitzer=3.0.2=py310h06a4308_0 + - xarray=2023.6.0=py310h06a4308_0 + - xxhash=0.8.0=h7f8727e_3 + - xyzservices=2022.9.0=py310h06a4308_1 + - xz=5.4.2=h5eee18b_0 + - y-py=0.5.9=py310h52d8a92_0 + - yaml=0.2.5=h7b6447c_0 + - yapf=0.31.0=pyhd3eb1b0_0 + - yarl=1.8.1=py310h5eee18b_0 + - ypy-websocket=0.8.2=py310h06a4308_0 + - zeromq=4.3.4=h2531618_0 + - zfp=0.5.5=h295c915_6 + - zict=2.2.0=py310h06a4308_0 + - zipp=3.11.0=py310h06a4308_0 + - zlib=1.2.13=h5eee18b_0 + - zlib-ng=2.0.7=h5eee18b_0 + - zope=1.0=py310h06a4308_1 + - zope.interface=5.4.0=py310h7f8727e_0 + - zstd=1.5.5=hc292b87_0 + - pip: + - absl-py==1.4.0 + - astunparse==1.6.3 + - cachetools==5.3.1 + - flatbuffers==23.5.26 + - gast==0.4.0 + - google-auth==2.22.0 + - google-auth-oauthlib==1.0.0 + - google-pasta==0.2.0 + - grpcio==1.57.0 + - keras==2.13.1 + - libclang==16.0.6 + - oauthlib==3.2.2 + - opt-einsum==3.3.0 + - protobuf==4.24.2 + - pyqt5-sip==12.11.0 + - requests-oauthlib==1.3.1 + - rsa==4.9 + - tensorboard==2.13.0 + - tensorboard-data-server==0.7.1 + - tensorflow==2.13.0 + - tensorflow-estimator==2.13.0 + - tensorflow-io-gcs-filesystem==0.33.0 + - termcolor==2.3.0 + - typing-extensions==4.5.0 + - python-decouple From 5b1754598bed76c5858b95d7bc05c7deeb567aa0 Mon Sep 17 00:00:00 2001 From: VikasPatnala Date: Thu, 29 Feb 2024 00:24:29 +0530 Subject: [PATCH 49/52] Changed the StopServer logic to the new logic of sending the text to stdin (cherry picked from commit 146ca72982db29879a495821a99da6e1e9cf3c2f) --- model/POSET-RL/src/Environment_pipe.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/model/POSET-RL/src/Environment_pipe.py b/model/POSET-RL/src/Environment_pipe.py index 1634958e81a7..6e446641053f 100755 --- a/model/POSET-RL/src/Environment_pipe.py +++ b/model/POSET-RL/src/Environment_pipe.py @@ -345,7 +345,7 @@ def step(self, action_index): if self.mode != 'inference': if not self.use_pipe: - self.stable_grpc("Exit", None) + # self.stable_grpc("Exit", None) try: outs, errs = self.server_pid.communicate(timeout=5) except: @@ -624,7 +624,8 @@ def startServer(self, filename, ip): if self.use_pipe: cmd = cmd + " -mllvm -use-pipe" pid = subprocess.Popen(cmd, executable='/bin/bash', - shell=True, preexec_fn=os.setsid) + shell=True, preexec_fn=os.setsid, stdin=subprocess.PIPE, text=True) + return pid def repeatedgRPCFieldToNumpyArray(self, gRPCObj): @@ -642,11 +643,23 @@ def applyActionGetEmbeddings(self, action): return self.repeatedgRPCFieldToNumpyArray(response) def stopServer(self): +<<<<<<< HEAD request = posetRL_pb2.ActionRequest(action=-1) self.compiler_interface.populate_buffer(request) self.compiler_interface.evaluate() # self.stub.applyActionGetEmbeddings(request) +======= + self.serverId.stdin.write("Terminate\n") + self.serverId.stdin.flush() + try: + out, errs = self.serverId.communicate(timeout=15) + except: + self.serverId.kill() + out, errs = self.serverId.communicate() + print("Force Stop") + +>>>>>>> 146ca72982db... Changed the StopServer logic to the new logic of sending the text to stdin def stable_grpc(self, op, action): attempt = 0 max_retries = 5 From dbf8e51597655b485d5ec23fc1dc21a0b2c75d32 Mon Sep 17 00:00:00 2001 From: M V V S Manoj Kumar Date: Fri, 1 Mar 2024 18:05:15 +0530 Subject: [PATCH 50/52] ReadMe Updated Broken Links Fixed and conference and website dates also added. (cherry picked from commit fa22a6d1719170c24737327ce6a2d86e8f0c1aea) --- README.md | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index b68e0316b069..b8bfc31e8f9f 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ - [Conda env set-up](#conda-environment-set-up) - [A small hack to prevent the conda environtments from clashing (To Be removed)](#a-small-hack-to-prevent-the-conda-environtments-from-clashing-to-be-removed) - [Cmake Command](#cmake-command) - - [make Command](#make-command) + - [Build Command](#build-command) - [List of optimizations supported](#list-of-optimizations-supported) - [Reinforcement Learning assisted Loop Distribution for Locality and Vectorization](#reinforcement-learning-assisted-loop-distribution-for-locality-and-vectorization) - [RL4Real](#rl4real) @@ -97,6 +97,7 @@ As a part of the [ML-Compiler-Bridge](https://arxiv.org/pdf/2311.10800.pdf), it * Tested with TensorFlow 2.13.0 <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD * Other python requirements are available in [mlbridge.yml] # needs to be updated with sangamesh's.yml ======= * Other python requirements are available in [mlbridge.yml]() @@ -106,6 +107,14 @@ As a part of the [ML-Compiler-Bridge](https://arxiv.org/pdf/2311.10800.pdf), it >>>>>>> 2a48b0c91d4e... Updated Readme * Conda/Anaconda based virtual environment is assumed +======= +* Other python requirements are available in [mlopt.yml](./mlopt.yml) + * Conda/Anaconda based virtual environment is assumed + + + +>>>>>>> fa22a6d17191... ReadMe Updated (Experiments are done on an Ubuntu 20.04 machine) <<<<<<< HEAD @@ -272,11 +281,11 @@ We propose a Reinforcement Learning (RL) approach for loop distribution, optimiz <<<<<<< HEAD ======= This is described in the paper [here](https://ieeexplore.ieee.org/abstract/document/10026979) . -Please see [here](https://compilers.cse.iith.ac.in/publications/rl_loop_distribution/) for more details. +Please see [here](https://compilers.cse.iith.ac.in/publications/rl_loop_distribution/) for more details. [Website link](https://compilers.cse.iith.ac.in/publications/rl_loop_distribution/) -> Reinforcement Learning assisted Loop Distribution for Locality and Vectorization, Shalini Jain, S. VenkataKeerthy, Rohit Aggarwal, Tharun Kumar Dangeti, Dibyendu Das, Ramakrishna Upadrasta +> Reinforcement Learning assisted Loop Distribution for Locality and Vectorization, Shalini Jain, S. VenkataKeerthy, Rohit Aggarwal, Tharun Kumar Dangeti, Dibyendu Das, Ramakrishna Upadrasta LLVM-HPC, 2022. -Implimentaion here : [Model Training](./model/LoopDistribution/src/Readme.md) , [Inference](./llvm/lib/Transforms/Scalar/IR2Vec-LOF/custom_loop_distribution/Readme.md) +Implimentaion here : [Model Training](./model/LoopDistribution/src/README.md) , [Inference](./llvm/lib/Transforms/Scalar/IR2Vec-LOF/custom_loop_distribution/Readme.md) >>>>>>> 1912ecab9aeb... Read Me update #### Try it out !!! @@ -301,10 +310,14 @@ Implimentaion here : [Model Training](./model/LoopDistribution/src/Readme.md) , `RL4ReAl` is a retargetable Reinforcement Learning (RL) approach for solving the REgister ALlocation (REAL) problem on diverse architectures. This is described in the paper [here](https://dl.acm.org/doi/abs/10.1145/3578360.3580273). +<<<<<<< HEAD Please see [here](https://compilers.cse.iith.ac.in/publications/rl4real/) for more details. >>>>>>> 1912ecab9aeb... Read Me update +======= +Please see [here](https://compilers.cse.iith.ac.in/publications/rl4real/) for more details. [Website link](https://compilers.cse.iith.ac.in/publications/rl4real/) +>>>>>>> fa22a6d17191... ReadMe Updated ->RL4ReAl: Reinforcement Learning for Register Allocation : S. VenkataKeerthy, Siddharth Jain, Anilava Kundu, Rohit Aggarwal, Albert Cohen, Ramakrishna Upadrasta LLVM-HPC, 2022. +>RL4ReAl: Reinforcement Learning for Register Allocation : S. VenkataKeerthy, Siddharth Jain, Anilava Kundu, Rohit Aggarwal, Albert Cohen, Ramakrishna Upadrasta CC 2023 Implimentaion here : [Model Training](./model/RL4ReAl/README.md) , [Inference](./llvm/lib/CodeGen/MLRegAlloc/README.md) @@ -320,11 +333,15 @@ Implimentaion here : [Model Training](./model/RL4ReAl/README.md) , [Inference](. ======= POSET-RL uses a reinforcement learning approach as the search space of optimization sequences is too big to enumerate. For a compiler with m optimization passes, if the sequence length is fixed as n, then there can be potentially mn combinations, allowing repetitions. The reinforcement learning model is trained and evaluated on programs that are represented using IR2Vec embeddings. -This is described in the paper ([arXiv](https://arxiv.org/abs/2204.02013)). -Please see [slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf) for more details. +This is described in the arxiv link ([here](https://arxiv.org/abs/2204.02013)). +Please see [slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf) for more details. [Website here](https://compilers.cse.iith.ac.in/projects/posetrl/). +<<<<<<< HEAD > POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta, ISSPASS, 2022 >>>>>>> 1912ecab9aeb... Read Me update +======= +> POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta, ISSPASS, 2022. +>>>>>>> fa22a6d17191... ReadMe Updated Implimentaion here : [Model Training](./model/POSET-RL/README.md) , [Inference](./llvm/lib/Transforms/IPO/PosetRL/README.md) From 8524ef45d70307d3bce3bbf06370db703944ae6d Mon Sep 17 00:00:00 2001 From: M V V S Manoj Kumar Date: Fri, 1 Mar 2024 18:08:48 +0530 Subject: [PATCH 51/52] ReadMe Updated changed onnx link was poining to setup should point to requirements. (cherry picked from commit 49b56365881b0db9af060e37124007e77f8157a8) --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index b8bfc31e8f9f..31c2046ed67f 100644 --- a/README.md +++ b/README.md @@ -137,8 +137,12 @@ git submodule update --init --recursive ### Setting up the build environment. #### Exporting ONNX Path Variables +<<<<<<< HEAD As the name suggests this is the Path to the ONNX Runtime that we downloaded in [Setup](#setup) . The path of ONNX Runtime is required not only for building the project but also it is required when running inference using the ONNX Model Runner. Hence it is a better idea to export these paths and also add them to the PATH and LD_LIBRARY_PATH >>>>>>> e15d6372eff5... ReadMe Updated +======= +As the name suggests this is the Path to the ONNX Runtime that we downloaded in [Requirements](#requirements) . The path of ONNX Runtime is required not only for building the project but also it is required when running inference using the ONNX Model Runner. Hence it is a better idea to export these paths and also add them to the PATH and LD_LIBRARY_PATH +>>>>>>> 49b56365881b... ReadMe Updated ```bash <<<<<<< HEAD From ebe76f34afcc3731ba1dda11c22477b154d35588 Mon Sep 17 00:00:00 2001 From: M V V S Manoj Kumar Date: Fri, 1 Mar 2024 18:29:14 +0530 Subject: [PATCH 52/52] ReadMe Updated Removed the small hack section and changed links. (cherry picked from commit d0945024c81667c8d48b6c621740c1c89a3ba043) --- README.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 31c2046ed67f..44588e58b4eb 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,6 @@ - [Setting up the build environment.](#setting-up-the-build-environment) - [Exporting ONNX Path Variables](#exporting-onnx-path-variables) - [Conda env set-up](#conda-environment-set-up) - - [A small hack to prevent the conda environtments from clashing (To Be removed)](#a-small-hack-to-prevent-the-conda-environtments-from-clashing-to-be-removed) - [Cmake Command](#cmake-command) - [Build Command](#build-command) - [List of optimizations supported](#list-of-optimizations-supported) @@ -186,6 +185,7 @@ conda activate mlopt >>>>>>> 9cb366a5644d... Changed Env Names ``` +<<<<<<< HEAD <<<<<<< HEAD ### Build @@ -216,6 +216,8 @@ cd build # build command cmake -G "Unix Makefiles" -S ../llvm -B . \ ======= +======= +>>>>>>> d0945024c816... ReadMe Updated #### Cmake Command Now we need to create a build directory for our build. Use the following commands to make a build dir inside the cloned reposiotry @@ -285,7 +287,7 @@ We propose a Reinforcement Learning (RL) approach for loop distribution, optimiz <<<<<<< HEAD ======= This is described in the paper [here](https://ieeexplore.ieee.org/abstract/document/10026979) . -Please see [here](https://compilers.cse.iith.ac.in/publications/rl_loop_distribution/) for more details. [Website link](https://compilers.cse.iith.ac.in/publications/rl_loop_distribution/) +[Website link](https://compilers.cse.iith.ac.in/publications/rl_loop_distribution/) > Reinforcement Learning assisted Loop Distribution for Locality and Vectorization, Shalini Jain, S. VenkataKeerthy, Rohit Aggarwal, Tharun Kumar Dangeti, Dibyendu Das, Ramakrishna Upadrasta LLVM-HPC, 2022. @@ -315,13 +317,17 @@ Implimentaion here : [Model Training](./model/LoopDistribution/src/README.md) , This is described in the paper [here](https://dl.acm.org/doi/abs/10.1145/3578360.3580273). <<<<<<< HEAD +<<<<<<< HEAD Please see [here](https://compilers.cse.iith.ac.in/publications/rl4real/) for more details. >>>>>>> 1912ecab9aeb... Read Me update ======= Please see [here](https://compilers.cse.iith.ac.in/publications/rl4real/) for more details. [Website link](https://compilers.cse.iith.ac.in/publications/rl4real/) >>>>>>> fa22a6d17191... ReadMe Updated +======= +[Website link](https://compilers.cse.iith.ac.in/publications/rl4real/) +>>>>>>> d0945024c816... ReadMe Updated ->RL4ReAl: Reinforcement Learning for Register Allocation : S. VenkataKeerthy, Siddharth Jain, Anilava Kundu, Rohit Aggarwal, Albert Cohen, Ramakrishna Upadrasta CC 2023 +>RL4ReAl: Reinforcement Learning for Register Allocation : S. VenkataKeerthy, Siddharth Jain, Anilava Kundu, Rohit Aggarwal, Albert Cohen, Ramakrishna Upadrasta CC, 2023. Implimentaion here : [Model Training](./model/RL4ReAl/README.md) , [Inference](./llvm/lib/CodeGen/MLRegAlloc/README.md) @@ -338,7 +344,7 @@ Implimentaion here : [Model Training](./model/RL4ReAl/README.md) , [Inference](. POSET-RL uses a reinforcement learning approach as the search space of optimization sequences is too big to enumerate. For a compiler with m optimization passes, if the sequence length is fixed as n, then there can be potentially mn combinations, allowing repetitions. The reinforcement learning model is trained and evaluated on programs that are represented using IR2Vec embeddings. This is described in the arxiv link ([here](https://arxiv.org/abs/2204.02013)). -Please see [slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf) for more details. [Website here](https://compilers.cse.iith.ac.in/projects/posetrl/). +Please see [slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf) for more details. [Website link](https://compilers.cse.iith.ac.in/projects/posetrl/). <<<<<<< HEAD > POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta, ISSPASS, 2022