diff --git a/MLCompilerBridge b/MLCompilerBridge index 3d59126519db..d03a012728d1 160000 --- a/MLCompilerBridge +++ b/MLCompilerBridge @@ -1 +1 @@ -Subproject commit 3d59126519dbd8c56b60ad95ec2220c0b8be3957 +Subproject commit d03a012728d1757350bdae2ace47b7ae4f44d6ea diff --git a/README.md b/README.md index dbea0a16be14..44588e58b4eb 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,381 @@ +<<<<<<< HEAD # ML-LLVM-Project +======= +# ML LLVM Project + +## Contents +<<<<<<< HEAD +- About +- Setup + - Requirements + - Build +- All implemented Passes +======= +- [About](#about) +- [Setup](#setup) + - [Requirements](#requirements) + - [Building the Project](#building-the-project) + - [Clone the Repository](#clone-the-repository) + - [Setting up the build environment.](#setting-up-the-build-environment) + - [Exporting ONNX Path Variables](#exporting-onnx-path-variables) + - [Conda env set-up](#conda-environment-set-up) + - [Cmake Command](#cmake-command) + - [Build Command](#build-command) +- [List of optimizations supported](#list-of-optimizations-supported) + - [Reinforcement Learning assisted Loop Distribution for Locality and Vectorization](#reinforcement-learning-assisted-loop-distribution-for-locality-and-vectorization) + - [RL4Real](#rl4real) + - [POSET-RL](#poset-rl) + +>>>>>>> e15d6372eff5... ReadMe Updated + +## About +<<<<<<< HEAD + +enter about +======= +This GitHub repository encompasses the complete Compiler Infrastructure for ML-Driven Optimizations developed by the Compilers group at IITH. The repository integrates ML-driven optimization techniques into the LLVM project through the ML Compiler Bridge infrastructure and IR2Vec embeddings. + +We strongly encourage you to delve into this repository, explore its contents, and consider building additional tools leveraging the existing infrastructure. We presume you are fimiliar with LLVM and build upon that, but if you are not fimiliar with llvm them, here are a few resources that might help : + +* [Getting Started with LLVM](https://llvm.org/docs/GettingStarted.html#getting-started-with-llvm) +page for detailed information on configuring and compiling LLVM. You can visit +* [Directory Layout](https://llvm.org/docs/GettingStarted.html#directory-layout) +to learn about the layout of the source code tree. + +### IR2Vec +[IR2Vec](https://arxiv.org/abs/1909.06228) is a LLVM IR based framework to generate distributed representations for the source code in an unsupervised manner, which can be used to represent programs as input to solve machine learning tasks that take programs as inputs. It can capture intrinsic characteristics of the program. This is achieved by using the flow analyses information like Use-Def, Reaching Definitions and Live Variable information of the program. + +>IR2Vec: LLVM IR based Scalable Program Embeddings : S. VenkataKeerthy, Rohit Aggarwal, Shalini Jain, Maunendra Sankar Desarkar, Ramakrishna Upadrasta, Y. N. Srikant. + +### ML Compiler Bridge +As a part of the [ML-Compiler-Bridge](https://arxiv.org/pdf/2311.10800.pdf), it is possible to have multiple ways of integrating compiler and the Machine learning model. These methods primarily use server client communication techniques like gRPC, and pipes. The ONNX flow which is capable of representation of ML models into DAG-based IRs with callable APIs in multiple langugages (C/C++/Python),does not require a server-client model or inter process communication. Additionally, TensorFlow's AOT compiled models are also supported for inference. + +> The Next 700 ML-Enabled Compiler Optimizations: S.VenkataKeerthy, Siddharth Jain, Umesh Kalvakuntla, Pranav Sai Gorantla, Rajiv Sailesh Chitale, Eugene Brevdo, Albert Cohen, Mircea Troffin, Ramakrishna Upadrasta +>>>>>>> c3b6dd0d8e2c... Minor updates to README.md + +## Setup + +### Requirements + +* cmake (>= 3.10) +* GNU Make (4.2.1) +* Python (3.10), C++17 +* gRPC v1.34 and protobuf v3.13 - for gRPC Model Runner + * Building GRPC from Source: Please follow [`Build GRPC with cmake`](https://grpc.io/docs/languages/cpp/quickstart/) **v1.34 (protobuf v3.13)** to build GRPC from source. + * In the above tutorial setting `DCMAKE_INSTALL_PREFIX` is necessary as it would give you an easy way to uninstall GRPC later. +<<<<<<< HEAD +* [ONNXRuntime](https://github.com/microsoft/onnxruntime/releases) v1.16.3 +<<<<<<< HEAD +* TensorFlow - for TF Model Runner (AOT flow) # this should be in the yml only don't need to set it up separately +======= +======= +> [!WARNING] +> The version of gRPC that you clone should be 1.34.0 not 1.34.x +* Eigen library (3.3.7) + * If your system already have Eigen (3.3.7) setup, you can skip this step. + * Download and extract the released version. +```bash + wget https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.tar.gz + tar -xvzf eigen-3.3.7.tar.gz + mkdir eigen-build && cd eigen-build + cmake ../eigen-3.3.7 && make + cd ../ +``` + +* [ONNXRuntime v1.16.3](https://github.com/microsoft/onnxruntime/releases) +>>>>>>> 1912ecab9aeb... Read Me update + + * The following commands will download ONNX Runtime v1.16.3 in your present working directory and then untar the contents. + The path for this will be used in this [section](#exporting-onnx-path-variables) +```bash + wget https://github.com/microsoft/onnxruntime/releases/download/v1.16.3/onnxruntime-linux-x64-1.16.3.tgz + tar -xvf onnxruntime-linux-x64-1.16.3.tgz +``` +* TensorFlow - for TF Model Runner (AOT flow) +>>>>>>> e15d6372eff5... ReadMe Updated + * Tested with TensorFlow 2.13.0 +<<<<<<< HEAD +<<<<<<< HEAD +<<<<<<< HEAD +* Other python requirements are available in [mlbridge.yml] # needs to be updated with sangamesh's.yml +======= +* Other python requirements are available in [mlbridge.yml]() +>>>>>>> 1912ecab9aeb... Read Me update +======= +* Other python requirements are available in [mlbridge.yml](./mlopt.yml) +>>>>>>> 2a48b0c91d4e... Updated Readme + * Conda/Anaconda based virtual environment is assumed + +======= +* Other python requirements are available in [mlopt.yml](./mlopt.yml) + * Conda/Anaconda based virtual environment is assumed + + + +>>>>>>> fa22a6d17191... ReadMe Updated +(Experiments are done on an Ubuntu 20.04 machine) + +<<<<<<< HEAD +Commands to install the conda evironment and set up onnx +======= +## Building the Project +The following section outlines the build process for our repository. + +### Clone the Repository +You need to clone the repository and initilize all the sub modules. The following commands would clone the Repository from github in your local and will initialize all submodules i.e clone the all the submodules within it. + +```bash +git clone git@github.com:IITH-Compilers/ml-llvm-project.git +cd ml-llvm-project +git checkout mlbridge-lib +git pull +git submodule update --init --recursive +``` + +### Setting up the build environment. + +#### Exporting ONNX Path Variables +<<<<<<< HEAD +As the name suggests this is the Path to the ONNX Runtime that we downloaded in [Setup](#setup) . The path of ONNX Runtime is required not only for building the project but also it is required when running inference using the ONNX Model Runner. Hence it is a better idea to export these paths and also add them to the PATH and LD_LIBRARY_PATH +>>>>>>> e15d6372eff5... ReadMe Updated +======= +As the name suggests this is the Path to the ONNX Runtime that we downloaded in [Requirements](#requirements) . The path of ONNX Runtime is required not only for building the project but also it is required when running inference using the ONNX Model Runner. Hence it is a better idea to export these paths and also add them to the PATH and LD_LIBRARY_PATH +>>>>>>> 49b56365881b... ReadMe Updated + +```bash +<<<<<<< HEAD +#TODO: change this to what ever will be the location of the envs +cp -r /Pramana/ML_LLVM_Tools/AE/envs/ ~/ +======= + export ONNX_DIR= #path to your onnx runtime + export LD_LIBRARY_PATH=${ONNX_DIR}:$LD_LIBRARY_PATH + export LIBRARY_PATH=${ONNX_DIR}:$LIBRARY_PATH + export PATH=${ONNX_DIR}/include:$PATH +``` +> [!TIP] +> It is adviced to add these commands to your **~/.bashrc** as they'll be needed when you switch shells. +>>>>>>> c3b6dd0d8e2c... Minor updates to README.md + +<<<<<<< HEAD +======= +#### Conda environment set-up +The following commands will help you install the and set up the nessesary conda environments. +```bash +>>>>>>> e15d6372eff5... ReadMe Updated +# install the env using the following commands +<<<<<<< HEAD +conda env create -f ~/env/LOF_original_env.yml +conda env create -f ~/env/mlgo-new + +wget https://github.com/microsoft/onnxruntime/releases/download/v1.16.3/onnxruntime-linux-x64-1.16.3.tgz +tar -xvf onnxruntime-linux-x64-1.16.3.tgz + +# get GRPC working +# check GRPC version +# check again it should be exactly 1.34.0 not 1.34.x +======= +conda env create -f ./mlopt.yml +>>>>>>> 2a48b0c91d4e... Updated Readme + +<<<<<<< HEAD +======= +# switch to mlgo-new env which would be required for the build process +conda activate mlopt +>>>>>>> 9cb366a5644d... Changed Env Names +``` + +<<<<<<< HEAD +<<<<<<< HEAD + +### Build + +Following are the requied steps to build the project, if you would like you could run them in a script too after changing the required parameters. + +======= +#### A small hack to prevent the conda environtments from clashing (To Be removed) +>>>>>>> e15d6372eff5... ReadMe Updated +```bash +# switch to mlgo-new env as you will need it to build the setup +conda activate mlgo-new + +# rename files in your conda enviornment +mv ~/anaconda3/envs/mlgo-new/lib/python3.10/site-packages/tensorflow/include/google/ ~/anaconda3/envs/mlgo-new/lib/python3.10/site-packages/tensorflow/include/google_new/ + +mv ~/anaconda3/envs/mlgo-new/include/google/ ~/anaconda3/envs/mlgo-new/include/google_new/ + +<<<<<<< HEAD +git clone git@github.com:IITH-Compilers/ml-llvm-project.git +cd ml-llvm-project +git checkout mlbridge-lib +git pull +git submodule update --init --recursive +mkdir build +cd build + +# build command + cmake -G "Unix Makefiles" -S ../llvm -B . \ +======= +======= +>>>>>>> d0945024c816... ReadMe Updated +#### Cmake Command +Now we need to create a build directory for our build. Use the following commands to make a build dir inside the cloned reposiotry + +```bash +# create a build dir and move to it +mkdir build +cd build +``` +After moving to the build directory, we'll use CMake to generate our build files and directories. Here we are using Makefiles, you may choose any generator of your choice. + +```bash +cmake -G "Unix Makefiles" -S ../llvm -B . \ +>>>>>>> e15d6372eff5... ReadMe Updated + -DCMAKE_BUILD_TYPE="Release" \ + -DLLVM_ENABLE_PROJECTS="clang;IR2Vec;ml-llvm-tools;mlir;MLCompilerBridge" \ + -DLLVM_TARGETS_TO_BULID="X86" \ + -DLLVM_ENABLE_ASSERTIONS=on \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=1 \ + -DLLVM_CCACHE_BUILD=ON \ +<<<<<<< HEAD + -DONNXRUNTIME_ROOTDIR= # change to your path where you wget the onnxruntime + -DLLVM_TF_AOT_RUNTIME= # change to your path + -DTENSORFLOW_AOT_PATH= # change to your path +======= + -DONNXRUNTIME_ROOTDIR= # path to your onnx runtime, use $ONNX_DIR if you already exported this environment variable \ + -DLLVM_TF_AOT_RUNTIME= # \ + -DTENSORFLOW_AOT_PATH= # \ +<<<<<<< HEAD +>>>>>>> e15d6372eff5... ReadMe Updated + -DLLVM_INLINER_MODEL_PATH=download \ + -DLLVM_INLINER_MODEL_CURRENT_URL=https://github.com/google/ml-compiler-opt/releases/download/inlining-Oz-v1.1/inlining-Oz-99f0063-v1.1.tar.gz \ + -DLLVM_RAEVICT_MODEL_PATH=download \ + -DLLVM_RAEVICT_MODEL_CURRENT_URL=https://github.com/google/ml-compiler-opt/releases/download/regalloc-evict-v1.0/regalloc-evict-e67430c-v1.0.tar.gz + + +<<<<<<< HEAD +# don't make all +======= +#### Make command +After following the above steps you have successfully exproted all the required environment variables and have also created the Makefile which shall be used to build the project. Use the following command to start your build. +```bash +>>>>>>> e15d6372eff5... ReadMe Updated +make clang opt -j50 +``` +======= +``` + +#### Build command +After following the above steps, you have successfully exproted all the required environment variables and have also created the generator files which will be used to build the project. Use the following command to start your build. Example: +```bash +make clang opt -j $(nproc) +``` +> [!WARNING] +> For now building all targets is broken. Only build clang and opt +>>>>>>> 1912ecab9aeb... Read Me update +## List of optimizations supported + +This section will contain information about all the ML driven optimizations. Here is a brief about each optimization, and a simple onnx command which we can use to get one output (i.e give it an input .c/.cpp/.ll and get the optimized binary) . + +> [!TIP] +> if you'd like to see the LLVM IR that is resulted from these optimization , you can pass the appropriate flags to generate the .ll files + +### Reinforcement Learning assisted Loop Distribution for Locality and Vectorization + +We propose a Reinforcement Learning (RL) approach for loop distribution, optimizing for both vectorization and locality. Using SCC Dependence Graphs (SDGs), our RL model learns loop distribution order through topological walks. The reward is based on instruction cost and cache misses. We introduce a strategy to expand the training set by generating new loops. This method aims to enhance loop parallelization and improve overall code performance. + +<<<<<<< HEAD +======= +This is described in the paper [here](https://ieeexplore.ieee.org/abstract/document/10026979) . +[Website link](https://compilers.cse.iith.ac.in/publications/rl_loop_distribution/) + +> Reinforcement Learning assisted Loop Distribution for Locality and Vectorization, Shalini Jain, S. VenkataKeerthy, Rohit Aggarwal, Tharun Kumar Dangeti, Dibyendu Das, Ramakrishna Upadrasta LLVM-HPC, 2022. + +Implimentaion here : [Model Training](./model/LoopDistribution/src/README.md) , [Inference](./llvm/lib/Transforms/Scalar/IR2Vec-LOF/custom_loop_distribution/Readme.md) + +>>>>>>> 1912ecab9aeb... Read Me update +#### Try it out !!! + +> We assume you have already done the setup and built the project. + +```bash +# ONNX command for inference: +# this script will generate the optimized llfile +./build/bin/opt -S \ + -custom_loop_distribution \ + -cld-use-onnx \ + -ml-config-path=/home/intern24007/ml-llvm-project/config \ + +``` + +### RL4Real + +<<<<<<< HEAD +<\write info here\> +======= +`RL4ReAl` is a retargetable Reinforcement Learning (RL) approach for solving the REgister ALlocation (REAL) problem on diverse architectures. + +This is described in the paper [here](https://dl.acm.org/doi/abs/10.1145/3578360.3580273). +<<<<<<< HEAD +<<<<<<< HEAD +Please see [here](https://compilers.cse.iith.ac.in/publications/rl4real/) for more details. +>>>>>>> 1912ecab9aeb... Read Me update +======= +Please see [here](https://compilers.cse.iith.ac.in/publications/rl4real/) for more details. [Website link](https://compilers.cse.iith.ac.in/publications/rl4real/) +>>>>>>> fa22a6d17191... ReadMe Updated +======= +[Website link](https://compilers.cse.iith.ac.in/publications/rl4real/) +>>>>>>> d0945024c816... ReadMe Updated + +>RL4ReAl: Reinforcement Learning for Register Allocation : S. VenkataKeerthy, Siddharth Jain, Anilava Kundu, Rohit Aggarwal, Albert Cohen, Ramakrishna Upadrasta CC, 2023. + +Implimentaion here : [Model Training](./model/RL4ReAl/README.md) , [Inference](./llvm/lib/CodeGen/MLRegAlloc/README.md) + +#### Try it out !!! +```bash +# write your bash commands here +``` + +### POSET-RL + +<<<<<<< HEAD +<\write info here\> +======= +POSET-RL uses a reinforcement learning approach as the search space of optimization sequences is too big to enumerate. For a compiler with m optimization passes, if the sequence length is fixed as n, then there can be potentially mn combinations, allowing repetitions. The reinforcement learning model is trained and evaluated on programs that are represented using IR2Vec embeddings. + +This is described in the arxiv link ([here](https://arxiv.org/abs/2204.02013)). +Please see [slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf) for more details. [Website link](https://compilers.cse.iith.ac.in/projects/posetrl/). + +<<<<<<< HEAD +> POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta, ISSPASS, 2022 +>>>>>>> 1912ecab9aeb... Read Me update +======= +> POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta, ISSPASS, 2022. +>>>>>>> fa22a6d17191... ReadMe Updated + +Implimentaion here : [Model Training](./model/POSET-RL/README.md) , [Inference](./llvm/lib/Transforms/IPO/PosetRL/README.md) + +#### Try it out !!! +```bash +<<<<<<< HEAD +# write your bash commands here +======= +./build/bin/opt \ + -poset-rl \ + -use-onnx \ + -ml-config-path= # path to your ml config \ + \ +>>>>>>> e15d6372eff5... ReadMe Updated +``` + + +--- +Everthing after this is old . This is kept just for reference +--- + +# ML-Register-Allocation +> Support - LLVM 10.0.1 release on **X86** architecture +>>>>>>> beae0d60d8fe... Updated readme Added a basic structure to incorporate all info about the project, how to build it and also left space for people to fill in about their project This is a fork of LLVM repository with IR2Vec and other "tools" to facilitate training and inferencing Machine Learning models for compiler optimizations. @@ -37,7 +414,69 @@ Other components include: the [libc++ C++ standard library](https://libcxx.llvm.org), the [LLD linker](https://lld.llvm.org), and more. +<<<<<<< HEAD ## Getting the Source Code and Building LLVM +======= +`conda env create -f poset-rl-odg.yml` + +`conda activate poset-rl-odg` + +Generate sub-sequences from the Oz pass sequence + +`python gen-odg.py -Oz` + +The graph and sub-sequences can be generated for other LLVM optimization levels. The required optimization flag needs to be provided as an argument when calling the above script. + +## Experiments +Install and activate the conda environment + +`conda env create -f rllib_env.yml` + +`conda activate rllib_env` + +Use `-mcpu=cortex-a72` for AArch64 architecture when calling `clang` or `opt` in (RLLib-PhaseOrder/Environment.py)[RLLib-PhaseOrder/Environment.py] + +### Training + +Add path to directory containing LLVM IR files to be used for training in [RLLib-PhaseOrder/Environment.py](RLLib-PhaseOrder/Environment.py) + +`python experiment.py --llvm_dir --ir2vec_dir ` + +### Inference + +Add paths to `llvm_dir`, `ir2vec_dir` and saved RLLib model to run-inference.sh + +`bash run-inference.sh` + +Print size, throughput and sub-sequences chosen by the model to a csv + +`bash results-binsize-reuse` + +Clean temporary files generated + + * ``-DCMAKE_BUILD_TYPE=type`` --- Valid options for *type* are Debug, + Release, RelWithDebInfo, and MinSizeRel. Default is Debug. + + * ``-DLLVM_ENABLE_ASSERTIONS=On`` --- Compile with assertion checks enabled + (default is Yes for Debug builds, No for all other build types). + + * Run your build tool of choice! + + * The default target (i.e. ``ninja`` or ``make``) will build all of LLVM. + + * The ``check-all`` target (i.e. ``ninja check-all``) will run the + regression tests to ensure everything is in working order. + + * CMake will generate build targets for each tool and library, and most + LLVM sub-projects generate their own ``check-`` target. + + * Running a serial build will be *slow*. To improve speed, try running a + parallel build. That's done by default in Ninja; for ``make``, use + ``make -j NNN`` (NNN is the number of parallel jobs, use e.g. number of + CPUs you have.) + + * For more information see [CMake](https://llvm.org/docs/CMake.html) +>>>>>>> beae0d60d8fe... Updated readme Added a basic structure to incorporate all info about the project, how to build it and also left space for people to fill in about their project Consult the [Getting Started with LLVM](https://llvm.org/docs/GettingStarted.html#getting-the-source-code-and-building-llvm) @@ -52,5 +491,9 @@ Join the [LLVM Discourse forums](https://discourse.llvm.org/), [Discord chat](https://discord.gg/xS7Z362), or #llvm IRC channel on [OFTC](https://oftc.net/). +<<<<<<< HEAD The LLVM project has adopted a [code of conduct](https://llvm.org/docs/CodeOfConduct.html) for participants to all modes of communication within the project. +======= +`make clean` +>>>>>>> beae0d60d8fe... Updated readme Added a basic structure to incorporate all info about the project, how to build it and also left space for people to fill in about their project diff --git a/clang/tools/driver/CMakeLists.txt b/clang/tools/driver/CMakeLists.txt index 2182486f93a5..c20dca0e0bbe 100644 --- a/clang/tools/driver/CMakeLists.txt +++ b/clang/tools/driver/CMakeLists.txt @@ -16,6 +16,15 @@ set( LLVM_LINK_COMPONENTS TargetParser TransformUtils Vectorize +<<<<<<< HEAD +<<<<<<< HEAD +======= + AddSizeAttr +======= +>>>>>>> 92e0943e9769... Fixed cmake linking issues + IR2Vec + CollectMachineIR +>>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO ) # Support plugins. diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 070688178d69..3cf1ec9023ce 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -351,6 +351,24 @@ void initializeWasmEHPreparePass(PassRegistry&); void initializeWinEHPreparePass(PassRegistry&); void initializeWriteBitcodePassPass(PassRegistry&); void initializeXRayInstrumentationPass(PassRegistry&); +<<<<<<< HEAD +======= +void initializePosetRLPass(PassRegistry &); +void initializeHelloMLBridgePass(PassRegistry &); +void initializeCollectMachineIRPass(PassRegistry &); +void initializeConfigGenPass(PassRegistry &); + + +void initializeRDGWrapperPassPass(PassRegistry&); + +void initializeLoopDistributionWrapperPassPass(PassRegistry&); + +void initializecustom_loop_distributionPass(PassRegistry&); + +void initializeInnerMostLoopPassPass(PassRegistry &); + +void initializeLoopDistributionServerPassPass(PassRegistry &); +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp } // end namespace llvm diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 7dd41b86700d..5d9067793db7 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -36,6 +36,10 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/Support/Valgrind.h" +<<<<<<< HEAD +======= +#include "llvm/Transforms/IPO/AddSizeAttr/AddSizeAttr.h" +>>>>>>> 92e0943e9769... Fixed cmake linking issues #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" @@ -45,6 +49,11 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/BoundsChecking.h" #include "llvm/Transforms/ObjCARC.h" +<<<<<<< HEAD +======= +#include "llvm/Transforms/IPO/PosetRL/PosetRL.h" +#include "llvm/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.h" +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/InstSimplifyPass.h" @@ -141,7 +150,11 @@ namespace { (void) llvm::createMergeICmpsLegacyPass(); (void) llvm::createExpandLargeDivRemPass(); (void) llvm::createExpandMemCmpPass(); +<<<<<<< HEAD (void) llvm::createExpandVectorPredicationPass(); +======= + (void)llvm::createPosetRLPass(); +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp std::string buf; llvm::raw_string_ostream os(buf); (void) llvm::createPrintModulePass(os); diff --git a/llvm/include/llvm/Transforms/IPO/AddSizeAttr/AddSizeAttr.h b/llvm/include/llvm/Transforms/IPO/AddSizeAttr/AddSizeAttr.h new file mode 100644 index 000000000000..a958c5d63e1c --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/AddSizeAttr/AddSizeAttr.h @@ -0,0 +1,19 @@ +#ifndef LLVM_TRANSFORMS_ADDSIZEATTR_H +#define LLVM_TRANSFORMS_ADDSIZEATTR_H + +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/*class AddSizeAttrPass + : public ModulePass { + +public: + bool runOnModule(Module &M); +};*/ + +ModulePass *createAddSizeAttrPass(); +} + +#endif diff --git a/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionEnum.h b/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionEnum.h new file mode 100644 index 000000000000..f3b550290470 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionEnum.h @@ -0,0 +1,139 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the LICENSE file +// in the root directory of this source tree. +// +// This file was generated automatically the script +// build_tools/llvm/legacy_pass_manager/make_action_space_genfiles.py. + +#ifndef LLVM_TRANSFORMS_CODESIZEOPT_ACTIONSPACE_ACTIONENUM_H +#define LLVM_TRANSFORMS_CODESIZEOPT_ACTIONSPACE_ACTIONENUM_H + +enum LlvmAction { + ADD_DISCRIMINATORS=1, + ADCE, + AGGRESSIVE_INSTCOMBINE, + ALIGNMENT_FROM_ASSUMPTIONS, + ALWAYS_INLINE, + ARGPROMOTION, + ATTRIBUTOR, + BARRIER, + BDCE, + BREAK_CRIT_EDGES, + SIMPLIFYCFG, + CALLSITE_SPLITTING, + CALLED_VALUE_PROPAGATION, + CANONICALIZE_ALIASES, + CONSTHOIST, + CONSTMERGE, + CONSTPROP, + CORO_CLEANUP, + CORO_EARLY, + CORO_ELIDE, + CORO_SPLIT, + CORRELATED_PROPAGATION, + CROSS_DSO_CFI, + DEADARGELIM, + DCE, + DIE, + DSE, + REG2MEM, + DIV_REM_PAIRS, + EARLY_CSE_MEMSSA, + EARLY_CSE, + ELIM_AVAIL_EXTERN, + EE_INSTRUMENT, + FLATTENCFG, + FLOAT2INT, + FORCEATTRS, + INLINE, + INSERT_GCOV_PROFILING, + GVN_HOIST, + GVN, + GLOBALDCE, + GLOBALOPT, + GLOBALSPLIT, + GUARD_WIDENING, + HOTCOLDSPLIT, + IPCONSTPROP, + IPSCCP, + INDVARS, + IRCE, + INFER_ADDRESS_SPACES, + INFERATTRS, + INJECT_TLI_MAPPINGS, + INSTSIMPLIFY, + INSTCOMBINE, + INSTNAMER, + JUMP_THREADING, + LCSSA, + LICM, + LIBCALLS_SHRINKWRAP, + LOAD_STORE_VECTORIZER, + LOOP_DATA_PREFETCH, + LOOP_DELETION, + LOOP_DISTRIBUTE, + LOOP_FUSION, + LOOP_GUARD_WIDENING, + LOOP_IDIOM, + LOOP_INSTSIMPLIFY, + LOOP_INTERCHANGE, + LOOP_LOAD_ELIM, + LOOP_PREDICATION, + LOOP_REROLL, + LOOP_ROTATE, + LOOP_SIMPLIFYCFG, + LOOP_SIMPLIFY, + LOOP_SINK, + LOOP_REDUCE, + LOOP_UNROLL_AND_JAM, + LOOP_UNROLL, + LOOP_UNSWITCH, + LOOP_VECTORIZE, + LOOP_VERSIONING_LICM, + LOOP_VERSIONING, + LOWERATOMIC, + LOWER_CONSTANT_INTRINSICS, + LOWER_EXPECT, + LOWER_GUARD_INTRINSIC, + LOWERINVOKE, + LOWER_MATRIX_INTRINSICS, + LOWERSWITCH, + LOWER_WIDENABLE_CONDITION, + MEMCPYOPT, + MERGEFUNC, + MERGEICMPS, + MLDST_MOTION, + SANCOV, + NAME_ANON_GLOBALS, + NARY_REASSOCIATE, + NEWGVN, + PGO_MEMOP_OPT, + PARTIAL_INLINER, + PARTIALLY_INLINE_LIBCALLS, + POST_INLINE_EE_INSTRUMENT, + FUNCTIONATTRS, + MEM2REG, + PRUNE_EH, + REASSOCIATE, + REDUNDANT_DBG_INST_ELIM, + RPO_FUNCTIONATTRS, + REWRITE_STATEPOINTS_FOR_GC, + SCCP, + SLP_VECTORIZER, + SROA, + SCALARIZER, + SEPARATE_CONST_OFFSET_FROM_GEP, + SIMPLE_LOOP_UNSWITCH, + SINK, + SPECULATIVE_EXECUTION, + SLSR, + STRIP_DEAD_PROTOTYPES, + STRIP_DEBUG_DECLARE, + STRIP_NONDEBUG, + STRIP, + TAILCALLELIM, + MERGERETURN, +}; + +#endif \ No newline at end of file diff --git a/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionHeaders.h b/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionHeaders.h new file mode 100644 index 000000000000..1187ff3aa487 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionHeaders.h @@ -0,0 +1,109 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the LICENSE file +// in the root directory of this source tree. +// +// This file was generated automatically the script +// build_tools/llvm/legacy_pass_manager/make_action_space_genfiles.py. + +#ifndef LLVM_TRANSFORMS_CODESIZEOPT_ACTIONSPACE_ACTIONHEADER_H +#define LLVM_TRANSFORMS_CODESIZEOPT_ACTIONSPACE_ACTIONHEADER_H + +#include "llvm/LinkAllPasses.h" +#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" +#include "llvm/Transforms/Coroutines.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/ArgumentPromotion.h" +#include "llvm/Transforms/IPO/Attributor.h" +#include "llvm/Transforms/IPO/CalledValuePropagation.h" +#include "llvm/Transforms/IPO/ConstantMerge.h" +#include "llvm/Transforms/IPO/CrossDSOCFI.h" +#include "llvm/Transforms/IPO/DeadArgumentElimination.h" +#include "llvm/Transforms/IPO/ElimAvailExtern.h" +#include "llvm/Transforms/IPO/ForceFunctionAttrs.h" +#include "llvm/Transforms/IPO/FunctionAttrs.h" +#include "llvm/Transforms/IPO/GlobalDCE.h" +#include "llvm/Transforms/IPO/GlobalOpt.h" +#include "llvm/Transforms/IPO/GlobalSplit.h" +#include "llvm/Transforms/IPO/HotColdSplitting.h" +#include "llvm/Transforms/IPO/InferFunctionAttrs.h" +#include "llvm/Transforms/IPO/MergeFunctions.h" +#include "llvm/Transforms/IPO/PartialInlining.h" +#include "llvm/Transforms/IPO/SCCP.h" +#include "llvm/Transforms/IPO/StripDeadPrototypes.h" +#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" +#include "llvm/Transforms/Scalar/ADCE.h" +#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" +#include "llvm/Transforms/Scalar/BDCE.h" +#include "llvm/Transforms/Scalar/CallSiteSplitting.h" +#include "llvm/Transforms/Scalar/ConstantHoisting.h" +#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" +#include "llvm/Transforms/Scalar/DCE.h" +#include "llvm/Transforms/Scalar/DeadStoreElimination.h" +#include "llvm/Transforms/Scalar/DivRemPairs.h" +#include "llvm/Transforms/Scalar/EarlyCSE.h" +#include "llvm/Transforms/Scalar/Float2Int.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/GuardWidening.h" +#include "llvm/Transforms/Scalar/IndVarSimplify.h" +#include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h" +#include "llvm/Transforms/Scalar/InstSimplifyPass.h" +#include "llvm/Transforms/Scalar/JumpThreading.h" +#include "llvm/Transforms/Scalar/LICM.h" +#include "llvm/Transforms/Scalar/LoopDataPrefetch.h" +#include "llvm/Transforms/Scalar/LoopDeletion.h" +#include "llvm/Transforms/Scalar/LoopDistribute.h" +#include "llvm/Transforms/Scalar/LoopFuse.h" +#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" +#include "llvm/Transforms/Scalar/LoopInstSimplify.h" +#include "llvm/Transforms/Scalar/LoopLoadElimination.h" +#include "llvm/Transforms/Scalar/LoopPredication.h" +#include "llvm/Transforms/Scalar/LoopRotation.h" +#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" +#include "llvm/Transforms/Scalar/LoopSink.h" +#include "llvm/Transforms/Scalar/LoopStrengthReduce.h" +#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" +#include "llvm/Transforms/Scalar/LoopUnrollPass.h" +#include "llvm/Transforms/Scalar/LowerAtomic.h" +#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" +#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" +#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h" +#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" +#include "llvm/Transforms/Scalar/LowerWidenableCondition.h" +#include "llvm/Transforms/Scalar/MemCpyOptimizer.h" +#include "llvm/Transforms/Scalar/MergeICmps.h" +#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" +#include "llvm/Transforms/Scalar/NaryReassociate.h" +#include "llvm/Transforms/Scalar/NewGVN.h" +#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" +#include "llvm/Transforms/Scalar/Reassociate.h" +#include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h" +#include "llvm/Transforms/Scalar/SCCP.h" +#include "llvm/Transforms/Scalar/SROA.h" +#include "llvm/Transforms/Scalar/Scalarizer.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" +#include "llvm/Transforms/Scalar/Sink.h" +#include "llvm/Transforms/Scalar/SpeculativeExecution.h" +#include "llvm/Transforms/Scalar/TailRecursionElimination.h" +#include "llvm/Transforms/Utils/AddDiscriminators.h" +#include "llvm/Transforms/Utils/BreakCriticalEdges.h" +#include "llvm/Transforms/Utils/CanonicalizeAliases.h" +#include "llvm/Transforms/Utils/EntryExitInstrumenter.h" +#include "llvm/Transforms/Utils/InjectTLIMappings.h" +#include "llvm/Transforms/Utils/LCSSA.h" +#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LoopVersioning.h" +#include "llvm/Transforms/Utils/LowerInvoke.h" +#include "llvm/Transforms/Utils/Mem2Reg.h" +#include "llvm/Transforms/Utils/NameAnonGlobals.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h" +#include "llvm/Transforms/Vectorize/LoopVectorize.h" +#include "llvm/Transforms/Vectorize/SLPVectorizer.h" + +namespace llvm { +FunctionPass* createEarlyCSEMemSSAPass() { return createEarlyCSEPass(/*UseMemorySSA=*/true); } +} // namespace llvm + +#endif \ No newline at end of file diff --git a/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionSwitch.h b/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionSwitch.h new file mode 100644 index 000000000000..08f75e67805c --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionSwitch.h @@ -0,0 +1,388 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the LICENSE file +// in the root directory of this source tree. +// +// This file was generated automatically the script +// build_tools/llvm/legacy_pass_manager/make_action_space_genfiles.py. + +#ifndef LLVM_TRANSFORMS_CODESIZEOPT_ACTIONSPACE_ACTIONSWITCH_H +#define LLVM_TRANSFORMS_CODESIZEOPT_ACTIONSPACE_ACTIONSWITCH_H + +#define HANDLE_ACTION(action, handlePass) \ + switch (action) { \ + case LlvmAction::ADD_DISCRIMINATORS: \ + handlePass(llvm::createAddDiscriminatorsPass()); \ + break; \ + case LlvmAction::ADCE: \ + handlePass(llvm::createAggressiveDCEPass()); \ + break; \ + case LlvmAction::AGGRESSIVE_INSTCOMBINE: \ + handlePass(llvm::createAggressiveInstCombinerPass()); \ + break; \ + case LlvmAction::ALIGNMENT_FROM_ASSUMPTIONS: \ + handlePass(llvm::createAlignmentFromAssumptionsPass()); \ + break; \ + case LlvmAction::ALWAYS_INLINE: \ + handlePass(llvm::createAlwaysInlinerLegacyPass()); \ + break; \ + case LlvmAction::ARGPROMOTION: \ + handlePass(llvm::createArgumentPromotionPass()); \ + break; \ + case LlvmAction::ATTRIBUTOR: \ + handlePass(llvm::createAttributorLegacyPass()); \ + break; \ + case LlvmAction::BARRIER: \ + handlePass(llvm::createBarrierNoopPass()); \ + break; \ + case LlvmAction::BDCE: \ + handlePass(llvm::createBitTrackingDCEPass()); \ + break; \ + case LlvmAction::BREAK_CRIT_EDGES: \ + handlePass(llvm::createBreakCriticalEdgesPass()); \ + break; \ + case LlvmAction::SIMPLIFYCFG: \ + handlePass(llvm::createCFGSimplificationPass()); \ + break; \ + case LlvmAction::CALLSITE_SPLITTING: \ + handlePass(llvm::createCallSiteSplittingPass()); \ + break; \ + case LlvmAction::CALLED_VALUE_PROPAGATION: \ + handlePass(llvm::createCalledValuePropagationPass()); \ + break; \ + case LlvmAction::CANONICALIZE_ALIASES: \ + handlePass(llvm::createCanonicalizeAliasesPass()); \ + break; \ + case LlvmAction::CONSTHOIST: \ + handlePass(llvm::createConstantHoistingPass()); \ + break; \ + case LlvmAction::CONSTMERGE: \ + handlePass(llvm::createConstantMergePass()); \ + break; \ + case LlvmAction::CONSTPROP: \ + handlePass(llvm::createConstantPropagationPass()); \ + break; \ + case LlvmAction::CORO_CLEANUP: \ + handlePass(llvm::createCoroCleanupLegacyPass()); \ + break; \ + case LlvmAction::CORO_EARLY: \ + handlePass(llvm::createCoroEarlyLegacyPass()); \ + break; \ + case LlvmAction::CORO_ELIDE: \ + handlePass(llvm::createCoroElideLegacyPass()); \ + break; \ + case LlvmAction::CORO_SPLIT: \ + handlePass(llvm::createCoroSplitLegacyPass()); \ + break; \ + case LlvmAction::CORRELATED_PROPAGATION: \ + handlePass(llvm::createCorrelatedValuePropagationPass()); \ + break; \ + case LlvmAction::CROSS_DSO_CFI: \ + handlePass(llvm::createCrossDSOCFIPass()); \ + break; \ + case LlvmAction::DEADARGELIM: \ + handlePass(llvm::createDeadArgEliminationPass()); \ + break; \ + case LlvmAction::DCE: \ + handlePass(llvm::createDeadCodeEliminationPass()); \ + break; \ + case LlvmAction::DIE: \ + handlePass(llvm::createDeadInstEliminationPass()); \ + break; \ + case LlvmAction::DSE: \ + handlePass(llvm::createDeadStoreEliminationPass()); \ + break; \ + case LlvmAction::REG2MEM: \ + handlePass(llvm::createDemoteRegisterToMemoryPass()); \ + break; \ + case LlvmAction::DIV_REM_PAIRS: \ + handlePass(llvm::createDivRemPairsPass()); \ + break; \ + case LlvmAction::EARLY_CSE_MEMSSA: \ + handlePass(llvm::createEarlyCSEMemSSAPass()); \ + break; \ + case LlvmAction::EARLY_CSE: \ + handlePass(llvm::createEarlyCSEPass()); \ + break; \ + case LlvmAction::ELIM_AVAIL_EXTERN: \ + handlePass(llvm::createEliminateAvailableExternallyPass()); \ + break; \ + case LlvmAction::EE_INSTRUMENT: \ + handlePass(llvm::createEntryExitInstrumenterPass()); \ + break; \ + case LlvmAction::FLATTENCFG: \ + handlePass(llvm::createFlattenCFGPass()); \ + break; \ + case LlvmAction::FLOAT2INT: \ + handlePass(llvm::createFloat2IntPass()); \ + break; \ + case LlvmAction::FORCEATTRS: \ + handlePass(llvm::createForceFunctionAttrsLegacyPass()); \ + break; \ + case LlvmAction::INLINE: \ + handlePass(llvm::createFunctionInliningPass()); \ + break; \ + case LlvmAction::INSERT_GCOV_PROFILING: \ + handlePass(llvm::createGCOVProfilerPass()); \ + break; \ + case LlvmAction::GVN_HOIST: \ + handlePass(llvm::createGVNHoistPass()); \ + break; \ + case LlvmAction::GVN: \ + handlePass(llvm::createGVNPass()); \ + break; \ + case LlvmAction::GLOBALDCE: \ + handlePass(llvm::createGlobalDCEPass()); \ + break; \ + case LlvmAction::GLOBALOPT: \ + handlePass(llvm::createGlobalOptimizerPass()); \ + break; \ + case LlvmAction::GLOBALSPLIT: \ + handlePass(llvm::createGlobalSplitPass()); \ + break; \ + case LlvmAction::GUARD_WIDENING: \ + handlePass(llvm::createGuardWideningPass()); \ + break; \ + case LlvmAction::HOTCOLDSPLIT: \ + handlePass(llvm::createHotColdSplittingPass()); \ + break; \ + case LlvmAction::IPCONSTPROP: \ + handlePass(llvm::createIPConstantPropagationPass()); \ + break; \ + case LlvmAction::IPSCCP: \ + handlePass(llvm::createIPSCCPPass()); \ + break; \ + case LlvmAction::INDVARS: \ + handlePass(llvm::createIndVarSimplifyPass()); \ + break; \ + case LlvmAction::IRCE: \ + handlePass(llvm::createInductiveRangeCheckEliminationPass()); \ + break; \ + case LlvmAction::INFER_ADDRESS_SPACES: \ + handlePass(llvm::createInferAddressSpacesPass()); \ + break; \ + case LlvmAction::INFERATTRS: \ + handlePass(llvm::createInferFunctionAttrsLegacyPass()); \ + break; \ + case LlvmAction::INJECT_TLI_MAPPINGS: \ + handlePass(llvm::createInjectTLIMappingsLegacyPass()); \ + break; \ + case LlvmAction::INSTSIMPLIFY: \ + handlePass(llvm::createInstSimplifyLegacyPass()); \ + break; \ + case LlvmAction::INSTCOMBINE: \ + handlePass(llvm::createInstructionCombiningPass()); \ + break; \ + case LlvmAction::INSTNAMER: \ + handlePass(llvm::createInstructionNamerPass()); \ + break; \ + case LlvmAction::JUMP_THREADING: \ + handlePass(llvm::createJumpThreadingPass()); \ + break; \ + case LlvmAction::LCSSA: \ + handlePass(llvm::createLCSSAPass()); \ + break; \ + case LlvmAction::LICM: \ + handlePass(llvm::createLICMPass()); \ + break; \ + case LlvmAction::LIBCALLS_SHRINKWRAP: \ + handlePass(llvm::createLibCallsShrinkWrapPass()); \ + break; \ + case LlvmAction::LOAD_STORE_VECTORIZER: \ + handlePass(llvm::createLoadStoreVectorizerPass()); \ + break; \ + case LlvmAction::LOOP_DATA_PREFETCH: \ + handlePass(llvm::createLoopDataPrefetchPass()); \ + break; \ + case LlvmAction::LOOP_DELETION: \ + handlePass(llvm::createLoopDeletionPass()); \ + break; \ + case LlvmAction::LOOP_DISTRIBUTE: \ + handlePass(llvm::createLoopDistributePass()); \ + break; \ + case LlvmAction::LOOP_FUSION: \ + handlePass(llvm::createLoopFusePass()); \ + break; \ + case LlvmAction::LOOP_GUARD_WIDENING: \ + handlePass(llvm::createLoopGuardWideningPass()); \ + break; \ + case LlvmAction::LOOP_IDIOM: \ + handlePass(llvm::createLoopIdiomPass()); \ + break; \ + case LlvmAction::LOOP_INSTSIMPLIFY: \ + handlePass(llvm::createLoopInstSimplifyPass()); \ + break; \ + case LlvmAction::LOOP_INTERCHANGE: \ + handlePass(llvm::createLoopInterchangePass()); \ + break; \ + case LlvmAction::LOOP_LOAD_ELIM: \ + handlePass(llvm::createLoopLoadEliminationPass()); \ + break; \ + case LlvmAction::LOOP_PREDICATION: \ + handlePass(llvm::createLoopPredicationPass()); \ + break; \ + case LlvmAction::LOOP_REROLL: \ + handlePass(llvm::createLoopRerollPass()); \ + break; \ + case LlvmAction::LOOP_ROTATE: \ + handlePass(llvm::createLoopRotatePass()); \ + break; \ + case LlvmAction::LOOP_SIMPLIFYCFG: \ + handlePass(llvm::createLoopSimplifyCFGPass()); \ + break; \ + case LlvmAction::LOOP_SIMPLIFY: \ + handlePass(llvm::createLoopSimplifyPass()); \ + break; \ + case LlvmAction::LOOP_SINK: \ + handlePass(llvm::createLoopSinkPass()); \ + break; \ + case LlvmAction::LOOP_REDUCE: \ + handlePass(llvm::createLoopStrengthReducePass()); \ + break; \ + case LlvmAction::LOOP_UNROLL_AND_JAM: \ + handlePass(llvm::createLoopUnrollAndJamPass()); \ + break; \ + case LlvmAction::LOOP_UNROLL: \ + handlePass(llvm::createLoopUnrollPass()); \ + break; \ + case LlvmAction::LOOP_UNSWITCH: \ + handlePass(llvm::createLoopUnswitchPass()); \ + break; \ + case LlvmAction::LOOP_VECTORIZE: \ + handlePass(llvm::createLoopVectorizePass()); \ + break; \ + case LlvmAction::LOOP_VERSIONING_LICM: \ + handlePass(llvm::createLoopVersioningLICMPass()); \ + break; \ + case LlvmAction::LOOP_VERSIONING: \ + handlePass(llvm::createLoopVersioningPass()); \ + break; \ + case LlvmAction::LOWERATOMIC: \ + handlePass(llvm::createLowerAtomicPass()); \ + break; \ + case LlvmAction::LOWER_CONSTANT_INTRINSICS: \ + handlePass(llvm::createLowerConstantIntrinsicsPass()); \ + break; \ + case LlvmAction::LOWER_EXPECT: \ + handlePass(llvm::createLowerExpectIntrinsicPass()); \ + break; \ + case LlvmAction::LOWER_GUARD_INTRINSIC: \ + handlePass(llvm::createLowerGuardIntrinsicPass()); \ + break; \ + case LlvmAction::LOWERINVOKE: \ + handlePass(llvm::createLowerInvokePass()); \ + break; \ + case LlvmAction::LOWER_MATRIX_INTRINSICS: \ + handlePass(llvm::createLowerMatrixIntrinsicsPass()); \ + break; \ + case LlvmAction::LOWERSWITCH: \ + handlePass(llvm::createLowerSwitchPass()); \ + break; \ + case LlvmAction::LOWER_WIDENABLE_CONDITION: \ + handlePass(llvm::createLowerWidenableConditionPass()); \ + break; \ + case LlvmAction::MEMCPYOPT: \ + handlePass(llvm::createMemCpyOptPass()); \ + break; \ + case LlvmAction::MERGEFUNC: \ + handlePass(llvm::createMergeFunctionsPass()); \ + break; \ + case LlvmAction::MERGEICMPS: \ + handlePass(llvm::createMergeICmpsLegacyPass()); \ + break; \ + case LlvmAction::MLDST_MOTION: \ + handlePass(llvm::createMergedLoadStoreMotionPass()); \ + break; \ + case LlvmAction::SANCOV: \ + handlePass(llvm::createModuleSanitizerCoverageLegacyPassPass()); \ + break; \ + case LlvmAction::NAME_ANON_GLOBALS: \ + handlePass(llvm::createNameAnonGlobalPass()); \ + break; \ + case LlvmAction::NARY_REASSOCIATE: \ + handlePass(llvm::createNaryReassociatePass()); \ + break; \ + case LlvmAction::NEWGVN: \ + handlePass(llvm::createNewGVNPass()); \ + break; \ + case LlvmAction::PGO_MEMOP_OPT: \ + handlePass(llvm::createPGOMemOPSizeOptLegacyPass()); \ + break; \ + case LlvmAction::PARTIAL_INLINER: \ + handlePass(llvm::createPartialInliningPass()); \ + break; \ + case LlvmAction::PARTIALLY_INLINE_LIBCALLS: \ + handlePass(llvm::createPartiallyInlineLibCallsPass()); \ + break; \ + case LlvmAction::POST_INLINE_EE_INSTRUMENT: \ + handlePass(llvm::createPostInlineEntryExitInstrumenterPass()); \ + break; \ + case LlvmAction::FUNCTIONATTRS: \ + handlePass(llvm::createPostOrderFunctionAttrsLegacyPass()); \ + break; \ + case LlvmAction::MEM2REG: \ + handlePass(llvm::createPromoteMemoryToRegisterPass()); \ + break; \ + case LlvmAction::PRUNE_EH: \ + handlePass(llvm::createPruneEHPass()); \ + break; \ + case LlvmAction::REASSOCIATE: \ + handlePass(llvm::createReassociatePass()); \ + break; \ + case LlvmAction::REDUNDANT_DBG_INST_ELIM: \ + handlePass(llvm::createRedundantDbgInstEliminationPass()); \ + break; \ + case LlvmAction::RPO_FUNCTIONATTRS: \ + handlePass(llvm::createReversePostOrderFunctionAttrsPass()); \ + break; \ + case LlvmAction::REWRITE_STATEPOINTS_FOR_GC: \ + handlePass(llvm::createRewriteStatepointsForGCLegacyPass()); \ + break; \ + case LlvmAction::SCCP: \ + handlePass(llvm::createSCCPPass()); \ + break; \ + case LlvmAction::SLP_VECTORIZER: \ + handlePass(llvm::createSLPVectorizerPass()); \ + break; \ + case LlvmAction::SROA: \ + handlePass(llvm::createSROAPass()); \ + break; \ + case LlvmAction::SCALARIZER: \ + handlePass(llvm::createScalarizerPass()); \ + break; \ + case LlvmAction::SEPARATE_CONST_OFFSET_FROM_GEP: \ + handlePass(llvm::createSeparateConstOffsetFromGEPPass()); \ + break; \ + case LlvmAction::SIMPLE_LOOP_UNSWITCH: \ + handlePass(llvm::createSimpleLoopUnswitchLegacyPass()); \ + break; \ + case LlvmAction::SINK: \ + handlePass(llvm::createSinkingPass()); \ + break; \ + case LlvmAction::SPECULATIVE_EXECUTION: \ + handlePass(llvm::createSpeculativeExecutionPass()); \ + break; \ + case LlvmAction::SLSR: \ + handlePass(llvm::createStraightLineStrengthReducePass()); \ + break; \ + case LlvmAction::STRIP_DEAD_PROTOTYPES: \ + handlePass(llvm::createStripDeadPrototypesPass()); \ + break; \ + case LlvmAction::STRIP_DEBUG_DECLARE: \ + handlePass(llvm::createStripDebugDeclarePass()); \ + break; \ + case LlvmAction::STRIP_NONDEBUG: \ + handlePass(llvm::createStripNonDebugSymbolsPass()); \ + break; \ + case LlvmAction::STRIP: \ + handlePass(llvm::createStripSymbolsPass()); \ + break; \ + case LlvmAction::TAILCALLELIM: \ + handlePass(llvm::createTailCallEliminationPass()); \ + break; \ + case LlvmAction::MERGERETURN: \ + handlePass(llvm::createUnifyFunctionExitNodesPass()); \ + break; \ + } + +#endif \ No newline at end of file diff --git a/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.h b/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.h new file mode 100644 index 000000000000..c4414d684f7d --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.h @@ -0,0 +1,10 @@ +#ifndef LLVM_TRANSFORMS_CODESIZEOPT_RL_H +#define LLVM_TRANSFORMS_CODESIZEOPT_RL_H + +#include "llvm/Pass.h" + +namespace llvm { + + ModulePass *createCodeSizeOptPass(); +} // namespace llvm +#endif \ No newline at end of file diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h new file mode 100644 index 000000000000..b1d94a15ace9 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -0,0 +1,258 @@ +// llvm/Transforms/IPO/PassManagerBuilder.h - Build Standard Pass -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the PassManagerBuilder class, which is used to set up a +// "standard" optimization sequence suitable for languages like C and C++. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_PASSMANAGERBUILDER_H +#define LLVM_TRANSFORMS_IPO_PASSMANAGERBUILDER_H + +#include +#include +#include +#include + +namespace llvm { +class ModuleSummaryIndex; +class Pass; +class TargetLibraryInfoImpl; +class TargetMachine; + +// The old pass manager infrastructure is hidden in a legacy namespace now. +namespace legacy { +class FunctionPassManager; +class PassManagerBase; +} + +/// PassManagerBuilder - This class is used to set up a standard optimization +/// sequence for languages like C and C++, allowing some APIs to customize the +/// pass sequence in various ways. A simple example of using it would be: +/// +/// PassManagerBuilder Builder; +/// Builder.OptLevel = 2; +/// Builder.populateFunctionPassManager(FPM); +/// Builder.populateModulePassManager(MPM); +/// +/// In addition to setting up the basic passes, PassManagerBuilder allows +/// frontends to vend a plugin API, where plugins are allowed to add extensions +/// to the default pass manager. They do this by specifying where in the pass +/// pipeline they want to be added, along with a callback function that adds +/// the pass(es). For example, a plugin that wanted to add a loop optimization +/// could do something like this: +/// +/// static void addMyLoopPass(const PMBuilder &Builder, PassManagerBase &PM) { +/// if (Builder.getOptLevel() > 2 && Builder.getOptSizeLevel() == 0) +/// PM.add(createMyAwesomePass()); +/// } +/// ... +/// Builder.addExtension(PassManagerBuilder::EP_LoopOptimizerEnd, +/// addMyLoopPass); +/// ... +class PassManagerBuilder { +public: + /// Extensions are passed to the builder itself (so they can see how it is + /// configured) as well as the pass manager to add stuff to. + typedef std::function + ExtensionFn; + typedef int GlobalExtensionID; + + enum ExtensionPointTy { + /// EP_EarlyAsPossible - This extension point allows adding passes before + /// any other transformations, allowing them to see the code as it is coming + /// out of the frontend. + EP_EarlyAsPossible, + + /// EP_ModuleOptimizerEarly - This extension point allows adding passes + /// just before the main module-level optimization passes. + EP_ModuleOptimizerEarly, + + /// EP_LoopOptimizerEnd - This extension point allows adding loop passes to + /// the end of the loop optimizer. + EP_LoopOptimizerEnd, + + /// EP_ScalarOptimizerLate - This extension point allows adding optimization + /// passes after most of the main optimizations, but before the last + /// cleanup-ish optimizations. + EP_ScalarOptimizerLate, + + /// EP_OptimizerLast -- This extension point allows adding passes that + /// run after everything else. + EP_OptimizerLast, + + /// EP_VectorizerStart - This extension point allows adding optimization + /// passes before the vectorizer and other highly target specific + /// optimization passes are executed. + EP_VectorizerStart, + + /// EP_EnabledOnOptLevel0 - This extension point allows adding passes that + /// should not be disabled by O0 optimization level. The passes will be + /// inserted after the inlining pass. + EP_EnabledOnOptLevel0, + + /// EP_Peephole - This extension point allows adding passes that perform + /// peephole optimizations similar to the instruction combiner. These passes + /// will be inserted after each instance of the instruction combiner pass. + EP_Peephole, + + /// EP_LateLoopOptimizations - This extension point allows adding late loop + /// canonicalization and simplification passes. This is the last point in + /// the loop optimization pipeline before loop deletion. Each pass added + /// here must be an instance of LoopPass. + /// This is the place to add passes that can remove loops, such as target- + /// specific loop idiom recognition. + EP_LateLoopOptimizations, + + /// EP_CGSCCOptimizerLate - This extension point allows adding CallGraphSCC + /// passes at the end of the main CallGraphSCC passes and before any + /// function simplification passes run by CGPassManager. + EP_CGSCCOptimizerLate, + + /// EP_FullLinkTimeOptimizationEarly - This extensions point allow adding + /// passes that + /// run at Link Time, before Full Link Time Optimization. + EP_FullLinkTimeOptimizationEarly, + + /// EP_FullLinkTimeOptimizationLast - This extensions point allow adding + /// passes that + /// run at Link Time, after Full Link Time Optimization. + EP_FullLinkTimeOptimizationLast, + }; + + /// The Optimization Level - Specify the basic optimization level. + /// 0 = -O0, 1 = -O1, 2 = -O2, 3 = -O3 + unsigned OptLevel; + + /// SizeLevel - How much we're optimizing for size. + /// 0 = none, 1 = -Os, 2 = -Oz + unsigned SizeLevel; + + /// LibraryInfo - Specifies information about the runtime library for the + /// optimizer. If this is non-null, it is added to both the function and + /// per-module pass pipeline. + TargetLibraryInfoImpl *LibraryInfo; + + /// Inliner - Specifies the inliner to use. If this is non-null, it is + /// added to the per-module passes. + Pass *Inliner; + + /// The module summary index to use for exporting information from the + /// regular LTO phase, for example for the CFI and devirtualization type + /// tests. + ModuleSummaryIndex *ExportSummary = nullptr; + + /// The module summary index to use for importing information to the + /// thin LTO backends, for example for the CFI and devirtualization type + /// tests. + const ModuleSummaryIndex *ImportSummary = nullptr; + + bool DisableTailCalls; + bool DisableUnrollLoops; + bool SLPVectorize; + bool LoopVectorize; + bool LoopsInterleaved; + bool RerollLoops; + bool NewGVN; + bool DisableGVNLoadPRE; + bool ForgetAllSCEVInLoopUnroll; + bool VerifyInput; + bool VerifyOutput; + bool MergeFunctions; + bool PrepareForLTO; + bool PrepareForThinLTO; + bool PerformThinLTO; + bool DivergentTarget; + unsigned LicmMssaOptCap; + unsigned LicmMssaNoAccForPromotionCap; + + /// Enable profile instrumentation pass. + bool EnablePGOInstrGen; + /// Enable profile context sensitive instrumentation pass. + bool EnablePGOCSInstrGen; + /// Enable profile context sensitive profile use pass. + bool EnablePGOCSInstrUse; + /// Profile data file name that the instrumentation will be written to. + std::string PGOInstrGen; + /// Path of the profile data file. + std::string PGOInstrUse; + /// Path of the sample Profile data file. + std::string PGOSampleUse; + static bool check_flag; + +private: + /// ExtensionList - This is list of all of the extensions that are registered. + std::vector> Extensions; + +public: + PassManagerBuilder(); + ~PassManagerBuilder(); + /// Adds an extension that will be used by all PassManagerBuilder instances. + /// This is intended to be used by plugins, to register a set of + /// optimisations to run automatically. + /// + /// \returns A global extension identifier that can be used to remove the + /// extension. + static GlobalExtensionID addGlobalExtension(ExtensionPointTy Ty, + ExtensionFn Fn); + /// Removes an extension that was previously added using addGlobalExtension. + /// This is also intended to be used by plugins, to remove any extension that + /// was previously registered before being unloaded. + /// + /// \param ExtensionID Identifier of the extension to be removed. + static void removeGlobalExtension(GlobalExtensionID ExtensionID); + void addExtension(ExtensionPointTy Ty, ExtensionFn Fn); + +private: + void addExtensionsToPM(ExtensionPointTy ETy, + legacy::PassManagerBase &PM) const; + void addInitialAliasAnalysisPasses(legacy::PassManagerBase &PM) const; + void addLTOOptimizationPasses(legacy::PassManagerBase &PM); + void addLateLTOOptimizationPasses(legacy::PassManagerBase &PM); + void addPGOInstrPasses(legacy::PassManagerBase &MPM, bool IsCS); + void addFunctionSimplificationPasses(legacy::PassManagerBase &MPM); + void addInstructionCombiningPass(legacy::PassManagerBase &MPM) const; + +public: + /// populateFunctionPassManager - This fills in the function pass manager, + /// which is expected to be run on each function immediately as it is + /// generated. The idea is to reduce the size of the IR in memory. + void populateFunctionPassManager(legacy::FunctionPassManager &FPM); + void customPopulateFunctionPassManager(legacy::FunctionPassManager &FPM, unsigned customSizeLevel, unsigned subSeqNum); + + /// populateModulePassManager - This sets up the primary pass manager. + void populateModulePassManager(legacy::PassManagerBase &MPM); + void customPopulateModulePassManager(legacy::PassManagerBase &MPM, unsigned customSizeLevel, unsigned subSeqNum); + void populateLTOPassManager(legacy::PassManagerBase &PM); + void populateThinLTOPassManager(legacy::PassManagerBase &PM); +}; + +/// Registers a function for adding a standard set of passes. This should be +/// used by optimizer plugins to allow all front ends to transparently use +/// them. Create a static instance of this class in your plugin, providing a +/// private function that the PassManagerBuilder can use to add your passes. +class RegisterStandardPasses { + PassManagerBuilder::GlobalExtensionID ExtensionID; + +public: + RegisterStandardPasses(PassManagerBuilder::ExtensionPointTy Ty, + PassManagerBuilder::ExtensionFn Fn) { + ExtensionID = PassManagerBuilder::addGlobalExtension(Ty, std::move(Fn)); + } + + ~RegisterStandardPasses() { + // If the collection holding the global extensions is destroyed after the + // plugin is unloaded, the extension has to be removed here. Indeed, the + // destructor of the ExtensionFn may reference code in the plugin. + PassManagerBuilder::removeGlobalExtension(ExtensionID); + } +}; + +} // end namespace llvm +#endif diff --git a/llvm/lib/CodeGen/MLRegAlloc/CMakeLists.txt b/llvm/lib/CodeGen/MLRegAlloc/CMakeLists.txt new file mode 100644 index 000000000000..a15d289a30bd --- /dev/null +++ b/llvm/lib/CodeGen/MLRegAlloc/CMakeLists.txt @@ -0,0 +1,23 @@ +# message("From MLREGALOC", ${LLVM_INCLUDE_DIR}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2") + +find_package(Protobuf CONFIG REQUIRED) +message(STATUS "Using protobuf ${Protobuf_VERSION}") +message(STATUS "Using protobuf Include Dirs ${Protobuf_INCLUDE_DIRS}") + +add_llvm_component_library(LLVMMLRegAlloc +MLRegAlloc.cpp + +DEPENDS +intrinsics_gen + +LINK_LIBS +LLVMMLRegAllocInference +LLVMSymbolic +) + +add_subdirectory(inference) +target_link_libraries(LLVMMLRegAlloc PRIVATE gRPCModelRunnerLib ONNXModelRunnerLib) +configure_file (./Config.h.cmake Config.h @ONLY) \ No newline at end of file diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index e6bff119b094..e5ba187e9538 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -337,3 +337,5 @@ if(LLVM_WITH_Z3) ${Z3_INCLUDE_DIR} ) endif() + +target_link_libraries(LLVMSupport PUBLIC ModelRunnerUtils) \ No newline at end of file diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index d3efb8b67be5..190c0fb62a6a 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -41,6 +41,7 @@ #include "llvm/Support/StringSaver.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" +#include "MLModelRunner/Utils/MLConfig.h" #include #include #include @@ -75,6 +76,10 @@ template class opt; } // namespace cl } // namespace llvm +llvm::cl::opt MLBridge::MLConfig::mlconfig( + "ml-config-path", llvm::cl::Hidden, llvm::cl::Optional, + llvm::cl::desc("Path to ML config files"), llvm::cl::init("")); + // Pin the vtables to this file. void GenericOptionValue::anchor() {} void OptionValue::anchor() {} diff --git a/llvm/lib/Transforms/CMakeLists.txt b/llvm/lib/Transforms/CMakeLists.txt index 83d5d24c10d6..b50bb98d3950 100644 --- a/llvm/lib/Transforms/CMakeLists.txt +++ b/llvm/lib/Transforms/CMakeLists.txt @@ -6,10 +6,25 @@ add_subdirectory(Scalar) add_subdirectory(IPO) add_subdirectory(Vectorize) add_subdirectory(Hello) +<<<<<<< HEAD add_subdirectory(demoGrpcPass) add_subdirectory(demoInferenceEnginePass) +======= +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp add_subdirectory(Hello-IR2Vec) add_subdirectory(ObjCARC) add_subdirectory(Coroutines) add_subdirectory(CFGuard) +<<<<<<< HEAD +<<<<<<< HEAD add_subdirectory(Hello-MLBridge) +======= +add_subdirectory(AddSizeAttr) +======= +>>>>>>> 92e0943e9769... Fixed cmake linking issues +add_subdirectory(MCAInstrumentation) +<<<<<<< HEAD +add_subdirectory(PipeIR) +>>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO +======= +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp diff --git a/llvm/lib/Transforms/Hello-MLBridge/CMakeLists.txt b/llvm/lib/Transforms/Hello-MLBridge/CMakeLists.txt index 6cec4e0179f2..d18fc4d9f304 100644 --- a/llvm/lib/Transforms/Hello-MLBridge/CMakeLists.txt +++ b/llvm/lib/Transforms/Hello-MLBridge/CMakeLists.txt @@ -29,7 +29,13 @@ add_llvm_library(LLVMHelloMLBridge MODULE BUILDTREE_ONLY PLUGIN_TOOL opt +<<<<<<< HEAD ) +======= + ) + +target_link_libraries(LLVMHelloMLBridge PUBLIC LLVMMLBridge) +>>>>>>> 92e0943e9769... Fixed cmake linking issues file(GLOB MODEL_OBJECTS ${CMAKE_CURRENT_SOURCE_DIR}/tf_model/*.o) diff --git a/llvm/lib/Transforms/Hello-MLBridge/Hello.cpp b/llvm/lib/Transforms/Hello-MLBridge/Hello.cpp index 4964773f4f1f..afbd6dffb2a5 100644 --- a/llvm/lib/Transforms/Hello-MLBridge/Hello.cpp +++ b/llvm/lib/Transforms/Hello-MLBridge/Hello.cpp @@ -231,6 +231,25 @@ M(49500) \ M(50000) +<<<<<<< HEAD +======= +#define DEBUG_TYPE "hello_mlbridge" + +using namespace llvm; +using namespace grpc; +using namespace helloMLBridgegRPC; + +STATISTIC(hellomodule, "Counts number of functions greeted"); + +static cl::opt training("hello-training", cl::Hidden, + cl::desc("whether it is training or inference"), + cl::init(false)); + +static cl::opt server_address( + "hello-server-address", cl::Hidden, + cl::desc("Starts the server in the given address, format :"), + cl::init("localhost:5050")); +>>>>>>> 2d00f46f0790... DEBUG_TYPE fix in Hello-MLBridge pass using namespace llvm; using namespace MLBridge; @@ -262,7 +281,42 @@ void HelloMLBridge::setModelRunner(int n) { "output", M->getContext()); \ break; MODELS(M) +<<<<<<< HEAD #undef M +======= +#undef M + } + // MLRunner = std::make_unique>("output"); + } + + void TFinitCommunication() { + auto StartTime = std::chrono::high_resolution_clock::now(); + + std::pair> p1("x", FeatureVector); + + setTFModelRunner(n); + MLRunner->populateFeatures(p1); + double Out = MLRunner->evaluate(); + + auto EndTime = std::chrono::high_resolution_clock::now(); + + auto Duration = std::chrono::duration_cast( + EndTime - StartTime); + std::ofstream outputFile; + outputFile.open("tf-inference.csv", std::ios_base::app); + outputFile << n << "," << Duration.count() << "\n"; + outputFile.close(); + } + + bool runOnModule(Module &M) override { + // unregister MLConfig::mlconfig + MLConfig::mlconfig.removeArgument(); + this->M = &M; + if (useTF) { + populateFeatureVector(); + TFinitCommunication(); + return false; +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp } } @@ -287,6 +341,7 @@ void HelloMLBridge::initFeatureVector() { for (int i = 0; i < n; i++) { FeatureVector[i] = dis(gen); } +<<<<<<< HEAD } // New PM Registration @@ -310,4 +365,20 @@ llvm::PassPluginLibraryInfo getHelloMLBridgePluginInfo() { extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() { return getHelloMLBridgePluginInfo(); -} \ No newline at end of file +} +======= + +private: + std::unique_ptr MLRunner; + std::string basename; + BaseSerDes::Kind SerDesType; + Module *M; +}; + +} // namespace + +char HelloMLBridge::ID = 0; +static RegisterPass Z("hello-MLBridge", + "Hello World Pass (with MLBridge)"); + +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp diff --git a/llvm/lib/Transforms/Hello/Hello.cpp b/llvm/lib/Transforms/Hello/Hello.cpp index 46527e3933ee..6b6fe5477a64 100644 --- a/llvm/lib/Transforms/Hello/Hello.cpp +++ b/llvm/lib/Transforms/Hello/Hello.cpp @@ -20,6 +20,7 @@ using namespace llvm; #define DEBUG_TYPE "hello" STATISTIC(HelloCounter, "Counts number of functions greeted"); +<<<<<<< HEAD namespace { // Hello - The first implementation, without getAnalysisUsage. @@ -39,6 +40,8 @@ struct Hello : public FunctionPass { char Hello::ID = 0; static RegisterPass X("hello", "Hello World Pass"); +======= +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp namespace { // Hello2 - The second implementation with getAnalysisUsage implemented. struct Hello2 : public FunctionPass { diff --git a/llvm/lib/Transforms/IPO/AddSizeAttr/AddSizeAttr.cpp b/llvm/lib/Transforms/IPO/AddSizeAttr/AddSizeAttr.cpp new file mode 100644 index 000000000000..7cb571439dc1 --- /dev/null +++ b/llvm/lib/Transforms/IPO/AddSizeAttr/AddSizeAttr.cpp @@ -0,0 +1,61 @@ +//Add Code Size reduction attributes + +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/InitializePasses.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/IPO/AddSizeAttr/AddSizeAttr.h" +using namespace llvm; + +#define DEBUG_TYPE "addsizeattr" + +static cl::opt EnableMinSize("enableMinSizeAttr", cl::desc("Option to add minsize function attribute"), cl::value_desc("true/false")); +static cl::opt RemoveNoInline("removeNoInlineAttr", cl::desc("Option to remove noinline function attribute"), cl::value_desc("true/false")); + +namespace { + class AddSizeAttrPass : public ModulePass { + public: + static char ID; + + AddSizeAttrPass() : ModulePass(ID) { + initializeAddSizeAttrPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) { + for (Function &F : M) { + F.addFnAttr(Attribute::OptimizeForSize); + if(EnableMinSize){ + F.addFnAttr(Attribute::MinSize); + } + if(RemoveNoInline){ + F.removeFnAttr(Attribute::NoInline); + } + } + return true; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + } + }; +} + + +char AddSizeAttrPass::ID = 0; +INITIALIZE_PASS_BEGIN(AddSizeAttrPass, + "add-size-attr", + "Add Function Attributes that reduce code size", false, false) +INITIALIZE_PASS_END(AddSizeAttrPass, + "add-size-attr", + "Add Function Attributes that reduce code size", false, false) + +void llvm::initializeAddSizeAttr(PassRegistry &Registry) { + initializeAddSizeAttrPassPass(Registry); +} + +ModulePass *llvm::createAddSizeAttrPass() { + return new AddSizeAttrPass(); +} diff --git a/llvm/lib/Transforms/IPO/AddSizeAttr/CMakeLists.txt b/llvm/lib/Transforms/IPO/AddSizeAttr/CMakeLists.txt new file mode 100644 index 000000000000..1aa0dd5944bd --- /dev/null +++ b/llvm/lib/Transforms/IPO/AddSizeAttr/CMakeLists.txt @@ -0,0 +1,12 @@ +add_llvm_component_library(LLVMAddSizeAttr + AddSizeAttr.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/AddSizeAttr + + DEPENDS + intrinsics_gen + ) + +target_link_libraries(LLVMAddSizeAttr PUBLIC LLVMMLBridge) diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index e03aff0f65d7..4e183f87e1b0 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -1,3 +1,9 @@ +<<<<<<< HEAD +======= +add_subdirectory(PosetRL) +add_subdirectory(AddSizeAttr) + +>>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO add_llvm_component_library(LLVMipo AlwaysInliner.cpp Annotation2Metadata.cpp @@ -50,9 +56,12 @@ add_llvm_component_library(LLVMipo intrinsics_gen omp_gen +<<<<<<< HEAD COMPONENT_NAME IPO +<<<<<<< HEAD +<<<<<<< HEAD LINK_COMPONENTS AggressiveInstCombine Analysis @@ -72,3 +81,14 @@ add_llvm_component_library(LLVMipo Vectorize Instrumentation ) +======= +target_link_libraries(LLVMipo PUBLIC LLVMPosetRL LLVMCodeSizeOpt) +target_link_libraries(LLVMipo PRIVATE LLVMMLBridge) +>>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO +======= +target_link_libraries(LLVMipo PUBLIC LLVMPosetRL) +target_link_libraries(LLVMipo PRIVATE LLVMMLBridge) +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp +======= +target_link_libraries(LLVMipo PUBLIC LLVMPosetRL LLVMAddSizeAttr) +>>>>>>> 92e0943e9769... Fixed cmake linking issues diff --git a/llvm/lib/Transforms/IPO/CodeSizeOpt/CMakeLists.txt b/llvm/lib/Transforms/IPO/CodeSizeOpt/CMakeLists.txt new file mode 100644 index 000000000000..1e57171c6d53 --- /dev/null +++ b/llvm/lib/Transforms/IPO/CodeSizeOpt/CMakeLists.txt @@ -0,0 +1,14 @@ +add_llvm_component_library(LLVMCodeSizeOpt CodeSizeOpt.cpp + +LINK_COMPONENTS +IR2Vec + +ADDITIONAL_HEADER_DIRS +${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms +${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/CodeSizeOpt + +DEPENDS +intrinsics_gen +LLVMMLBridge +) +target_link_libraries(LLVMCodeSizeOpt PUBLIC LLVMMLBridge) diff --git a/llvm/lib/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.cpp b/llvm/lib/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.cpp new file mode 100644 index 000000000000..d3fb6e6c2935 --- /dev/null +++ b/llvm/lib/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.cpp @@ -0,0 +1,118 @@ +#include "llvm/Transforms/IPO/CodeSizeOpt/CodeSizeOpt.h" +#include "MLModelRunner/MLModelRunner.h" +#include "MLModelRunner/ONNXModelRunner/ONNXModelRunner.h" +#include "MLModelRunner/Utils/MLConfig.h" +#include "inference/CodeSizeOptEnv.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IR2Vec.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassSupport.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionEnum.h" +#include "llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionHeaders.h" +#include "llvm/Transforms/IPO/CodeSizeOpt/ActionSpace/ActionSwitch.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include +#include +#include +#include + +#define DEBUG_TYPE "codesizeopt" + +using namespace llvm; + +namespace { +struct CodeSizeOpt : public ModulePass, public CodeSizeOptEnv { + static char ID; + CodeSizeOpt() : ModulePass(ID) {} + bool runOnModule(Module &M) override { + assert(MLConfig::mlconfig != "" && "ml-config-path required"); + this->M = &M; + llvm::Triple triple(M.getTargetTriple()); + tlii_ = llvm::TargetLibraryInfoImpl(triple); + Agent agent(MLConfig::mlconfig + "/codesizeopt/compiler_gym_ir2vec.onnx"); + std::map agents; + agents["agent"] = &agent; + MLRunner = std::make_unique(this, agents, &M.getContext()); + MLRunner->evaluate(); + errs() << "Sequence: "; + for (auto a : Sequence) + errs() << a << " "; + errs() << "\n"; + + return true; + } + + inline const llvm::TargetLibraryInfoImpl &tlii() const { return tlii_; } + + void addPassToPM(llvm::legacy::FunctionPassManager *PM, Pass *P) { + errs() << "Adding Pass: Profilesummaryinfo" + << "\n"; + PM->add(new ProfileSummaryInfoWrapperPass()); + errs() << "Adding Pass: TargetLibraryInfo" + << "\n"; + PM->add(new TargetLibraryInfoWrapperPass(tlii())); + errs() << "Adding Pass: TargetTransformInfo" + << "\n"; + PM->add(createTargetTransformInfoWrapperPass(TargetIRAnalysis())); + errs() << "Adding Pass: " << P->getPassName() << "\n"; + PM->add(P); + } + + Embedding getEmbeddings() override { + auto Ir2vec = IR2Vec::Embeddings( + *M, IR2Vec::IR2VecMode::FlowAware, + MLConfig::mlconfig + "/ir2vec/seedEmbeddingVocab-300-llvm10.txt"); + auto ProgVector = Ir2vec.getProgramVector(); + Embedding Vector(ProgVector.begin(), ProgVector.end()); + // errs() << "Embedding: "; + // for(auto v : Vector) + // errs() << v << " "; + // errs() << "\n"; + return Vector; + } + + void applySeq(Action Action) override { + PassManagerBuilder Builder; + Builder.OptLevel = 2; + Builder.SizeLevel = 2; + + legacy::FunctionPassManager FPM(M); + legacy::PassManager MPM; + errs() << "Handle Pass: " << Action << "\n"; + +#define HANDLE_PASS(pass) addPassToPM(&FPM, pass); + HANDLE_ACTION(Action, HANDLE_PASS) +#undef HANDLE_PASS + + // Builder.customPopulateFunctionPassManager(FPM, 34, Action); + // Builder.customPopulateModulePassManager(MPM, 34, Action); + // run the passes + errs() << "Running Module Passes\n"; + MPM.run(*M); + errs() << "Running Function Passes\n"; + for (auto &F : *M) { + FPM.run(F); + } + } + +private: + Module *M; + std::unique_ptr MLRunner; + llvm::TargetLibraryInfoImpl tlii_; +}; +} // namespace +char CodeSizeOpt::ID = 0; +INITIALIZE_PASS_BEGIN(CodeSizeOpt, "codesizeopt-rl", "poset sequence pass", + false, false) +INITIALIZE_PASS_END(CodeSizeOpt, "codesizeopt-rl", "poset sequence pass", false, + false) + +ModulePass *llvm::createCodeSizeOptPass() { return new CodeSizeOpt(); } diff --git a/llvm/lib/Transforms/IPO/CodeSizeOpt/inference/CodeSizeOptEnv.h b/llvm/lib/Transforms/IPO/CodeSizeOpt/inference/CodeSizeOptEnv.h new file mode 100644 index 000000000000..f894511cd8f0 --- /dev/null +++ b/llvm/lib/Transforms/IPO/CodeSizeOpt/inference/CodeSizeOptEnv.h @@ -0,0 +1,68 @@ +#include "MLModelRunner/ONNXModelRunner/environment.h" +#include "MLModelRunner/ONNXModelRunner/utils.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include + +#define ActionMaskSize 34 +#define EmbeddingSize 300 + +using namespace llvm; +using namespace MLBridge; + +typedef std::vector Embedding; + +class CodeSizeOptEnv : public Environment { + unsigned Actioncount = 0; + Embedding CurrEmbedding; + Observation CurrObs; +public: + std::vector Sequence; + +public: + CodeSizeOptEnv(); + Observation& reset() override; + Observation& step(Action) override; + virtual Embedding getEmbeddings() = 0; + virtual void applySeq(Action) = 0; + +}; + + +inline Observation& CodeSizeOptEnv::step(Action Action) { + Sequence.push_back(Action); + applySeq(Action); + + Actioncount += 1; + CurrEmbedding = getEmbeddings(); + if (Actioncount >= 30) + setDone(); + + CurrObs.clear(); + std::copy(CurrEmbedding.begin(), CurrEmbedding.end(), + std::back_inserter(CurrObs)); + return CurrObs; +} + +inline Observation& CodeSizeOptEnv::reset() { + CurrEmbedding = getEmbeddings(); + + CurrObs.clear(); + std::copy(CurrEmbedding.begin(), CurrEmbedding.end(), + std::back_inserter(CurrObs)); + return CurrObs; +} + +inline CodeSizeOptEnv::CodeSizeOptEnv() { + CurrEmbedding.assign(EmbeddingSize, 0); + setNextAgent("agent"); +} diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp new file mode 100644 index 000000000000..c84329a1a494 --- /dev/null +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -0,0 +1,2184 @@ +//===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the PassManagerBuilder class, which is used to set up a +// "standard" optimization sequence suitable for languages like C and C++. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm-c/Transforms/PassManagerBuilder.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CFLAndersAliasAnalysis.h" +#include "llvm/Analysis/CFLSteensAliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ScopedNoAliasAA.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Transforms/IPO/AddSizeAttr/AddSizeAttr.h" +#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/Attributor.h" +#include "llvm/Transforms/IPO/ForceFunctionAttrs.h" +#include "llvm/Transforms/IPO/FunctionAttrs.h" +#include "llvm/Transforms/IPO/InferFunctionAttrs.h" +#include "llvm/Transforms/InstCombine/InstCombine.h" +#include "llvm/Transforms/Instrumentation.h" +<<<<<<< HEAD +#include "llvm/Transforms/PosetRL/PosetRL.h" +#include "llvm/Transforms/CodeSizeOpt/CodeSizeOpt.h" +======= +#include "llvm/Transforms/IPO/PosetRL/PosetRL.h" +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/InstSimplifyPass.h" +#include "llvm/Transforms/Scalar/LICM.h" +#include "llvm/Transforms/Scalar/LoopUnrollPass.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Vectorize.h" +#include "llvm/Transforms/Vectorize/LoopVectorize.h" +#include "llvm/Transforms/Vectorize/SLPVectorizer.h" +#include "llvm/Transforms/IR2Vec-LOF/custom_loop_distribution.h" + +using namespace llvm; + +bool PassManagerBuilder::check_flag = false; +static cl::opt + OPosetRL("OPosetRL", cl::init(false), cl::Hidden, + cl::desc("poset rl pass sequence")); + +static cl::opt + RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, + cl::ZeroOrMore, cl::desc("Run Partial inlinining pass")); + +static cl::opt +UseGVNAfterVectorization("use-gvn-after-vectorization", + cl::init(false), cl::Hidden, + cl::desc("Run GVN instead of Early CSE after vectorization passes")); + +static cl::opt ExtraVectorizerPasses( + "extra-vectorizer-passes", cl::init(false), cl::Hidden, + cl::desc("Run cleanup optimization passes after vectorization.")); + +static cl::opt +RunLoopRerolling("reroll-loops", cl::Hidden, + cl::desc("Run the loop rerolling pass")); + +static cl::opt RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, + cl::desc("Run the NewGVN pass")); + +static cl::opt +Runcustom_loop_distribution("cld", cl::init(false), cl::Hidden, + cl::desc("costomized loop-distribution pass")); + +static cl::opt +RunNoPreDistributionPasses("No-PreDistributionPasses", cl::init(false), cl::Hidden, + cl::desc("Apply pre-distribution passes")); + +static cl::opt +RunNoPostDistributionPasses("No-PostDistributionPasses", cl::init(false), cl::Hidden, + cl::desc("Apply post-distribution passes")); + +// Experimental option to use CFL-AA +enum class CFLAAType { None, Steensgaard, Andersen, Both }; +static cl::opt + UseCFLAA("use-cfl-aa", cl::init(CFLAAType::None), cl::Hidden, + cl::desc("Enable the new, experimental CFL alias analysis"), + cl::values(clEnumValN(CFLAAType::None, "none", "Disable CFL-AA"), + clEnumValN(CFLAAType::Steensgaard, "steens", + "Enable unification-based CFL-AA"), + clEnumValN(CFLAAType::Andersen, "anders", + "Enable inclusion-based CFL-AA"), + clEnumValN(CFLAAType::Both, "both", + "Enable both variants of CFL-AA"))); + +static cl::opt EnableLoopInterchange( + "enable-loopinterchange", cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental LoopInterchange Pass")); + +static cl::opt EnableUnrollAndJam("enable-unroll-and-jam", + cl::init(false), cl::Hidden, + cl::desc("Enable Unroll And Jam Pass")); + +static cl::opt + EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden, + cl::desc("Enable preparation for ThinLTO.")); + +static cl::opt + EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden, + cl::desc("Enable performing ThinLTO.")); + +cl::opt EnableHotColdSplit("hot-cold-split", cl::init(false), cl::Hidden, + cl::desc("Enable hot-cold splitting pass")); + +static cl::opt UseLoopVersioningLICM( + "enable-loop-versioning-licm", cl::init(false), cl::Hidden, + cl::desc("Enable the experimental Loop Versioning LICM pass")); + +static cl::opt + DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, + cl::desc("Disable pre-instrumentation inliner")); + +static cl::opt PreInlineThreshold( + "preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore, + cl::desc("Control the amount of inlining in pre-instrumentation inliner " + "(default = 75)")); + +static cl::opt EnableGVNHoist( + "enable-gvn-hoist", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN hoisting pass (default = off)")); + +static cl::opt + DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false), + cl::Hidden, + cl::desc("Disable shrink-wrap library calls")); + +static cl::opt EnableSimpleLoopUnswitch( + "enable-simple-loop-unswitch", cl::init(false), cl::Hidden, + cl::desc("Enable the simple loop unswitch pass. Also enables independent " + "cleanup passes integrated into the loop pass manager pipeline.")); + +static cl::opt EnableGVNSink( + "enable-gvn-sink", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN sinking pass (default = off)")); + +// This option is used in simplifying testing SampleFDO optimizations for +// profile loading. +static cl::opt + EnableCHR("enable-chr", cl::init(true), cl::Hidden, + cl::desc("Enable control height reduction optimization (CHR)")); + +cl::opt FlattenedProfileUsed( + "flattened-profile-used", cl::init(false), cl::Hidden, + cl::desc("Indicate the sample profile being used is flattened, i.e., " + "no inline hierachy exists in the profile. ")); + +cl::opt EnableOrderFileInstrumentation( + "enable-order-file-instrumentation", cl::init(false), cl::Hidden, + cl::desc("Enable order file instrumentation (default = off)")); + +static cl::opt + EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, + cl::desc("Enable lowering of the matrix intrinsics")); + +static cl::opt + EnableFusion("enable-fusion", cl::init(false), cl::Hidden, + cl::desc("LOF:Enable LLVM fusion")); + +PassManagerBuilder::PassManagerBuilder() { + OptLevel = 2; + SizeLevel = 0; + LibraryInfo = nullptr; + Inliner = nullptr; + DisableUnrollLoops = false; + SLPVectorize = RunSLPVectorization; + LoopVectorize = EnableLoopVectorization; + LoopsInterleaved = EnableLoopInterleaving; + RerollLoops = RunLoopRerolling; + NewGVN = RunNewGVN; + LicmMssaOptCap = SetLicmMssaOptCap; + LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; + DisableGVNLoadPRE = false; + ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; + VerifyInput = false; + VerifyOutput = false; + MergeFunctions = false; + PrepareForLTO = false; + EnablePGOInstrGen = false; + EnablePGOCSInstrGen = false; + EnablePGOCSInstrUse = false; + PGOInstrGen = ""; + PGOInstrUse = ""; + PGOSampleUse = ""; + PrepareForThinLTO = EnablePrepareForThinLTO; + PerformThinLTO = EnablePerformThinLTO; + DivergentTarget = false; +} + +PassManagerBuilder::~PassManagerBuilder() { + delete LibraryInfo; + delete Inliner; +} + +/// Set of global extensions, automatically added as part of the standard set. +static ManagedStatic< + SmallVector, + 8>> + GlobalExtensions; +static PassManagerBuilder::GlobalExtensionID GlobalExtensionsCounter; + +/// Check if GlobalExtensions is constructed and not empty. +/// Since GlobalExtensions is a managed static, calling 'empty()' will trigger +/// the construction of the object. +static bool GlobalExtensionsNotEmpty() { + return GlobalExtensions.isConstructed() && !GlobalExtensions->empty(); +} + +PassManagerBuilder::GlobalExtensionID +PassManagerBuilder::addGlobalExtension(PassManagerBuilder::ExtensionPointTy Ty, + PassManagerBuilder::ExtensionFn Fn) { + auto ExtensionID = GlobalExtensionsCounter++; + GlobalExtensions->push_back(std::make_tuple(Ty, std::move(Fn), ExtensionID)); + return ExtensionID; +} + +void PassManagerBuilder::removeGlobalExtension( + PassManagerBuilder::GlobalExtensionID ExtensionID) { + // RegisterStandardPasses may try to call this function after GlobalExtensions + // has already been destroyed; doing so should not generate an error. + if (!GlobalExtensions.isConstructed()) + return; + + auto GlobalExtension = + llvm::find_if(*GlobalExtensions, [ExtensionID](const auto &elem) { + return std::get<2>(elem) == ExtensionID; + }); + assert(GlobalExtension != GlobalExtensions->end() && + "The extension ID to be removed should always be valid."); + + GlobalExtensions->erase(GlobalExtension); +} + +void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) { + Extensions.push_back(std::make_pair(Ty, std::move(Fn))); +} + +void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, + legacy::PassManagerBase &PM) const { + if (GlobalExtensionsNotEmpty()) { + for (auto &Ext : *GlobalExtensions) { + if (std::get<0>(Ext) == ETy) + std::get<1>(Ext)(*this, PM); + } + } + for (unsigned i = 0, e = Extensions.size(); i != e; ++i) + if (Extensions[i].first == ETy) + Extensions[i].second(*this, PM); +} + +void PassManagerBuilder::addInitialAliasAnalysisPasses( + legacy::PassManagerBase &PM) const { + switch (UseCFLAA) { + case CFLAAType::Steensgaard: + PM.add(createCFLSteensAAWrapperPass()); + break; + case CFLAAType::Andersen: + PM.add(createCFLAndersAAWrapperPass()); + break; + case CFLAAType::Both: + PM.add(createCFLSteensAAWrapperPass()); + PM.add(createCFLAndersAAWrapperPass()); + break; + default: + break; + } + + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that + // BasicAliasAnalysis wins if they disagree. This is intended to help + // support "obvious" type-punning idioms. + PM.add(createTypeBasedAAWrapperPass()); + PM.add(createScopedNoAliasAAWrapperPass()); +} + +void PassManagerBuilder::addInstructionCombiningPass( + legacy::PassManagerBase &PM) const { + bool ExpensiveCombines = OptLevel > 2; + PM.add(createInstructionCombiningPass(ExpensiveCombines)); +} + +void PassManagerBuilder::populateFunctionPassManager( + legacy::FunctionPassManager &FPM) { + addExtensionsToPM(EP_EarlyAsPossible, FPM); + FPM.add(createEntryExitInstrumenterPass()); + + // Add LibraryInfo if we have some. + if (LibraryInfo) + FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); + + if (OptLevel == 0) return; + + addInitialAliasAnalysisPasses(FPM); + + FPM.add(createCFGSimplificationPass()); + FPM.add(createSROAPass()); + FPM.add(createEarlyCSEPass()); + FPM.add(createLowerExpectIntrinsicPass()); +} + +void PassManagerBuilder::customPopulateFunctionPassManager( + legacy::FunctionPassManager &FPM, unsigned customSizeLevel, unsigned subSeqNum) { + //if (((customSizeLevel==15 || customSizeLevel==17) && subSeqNum == 0) || (customSizeLevel==30 && subSeqNum < 4)) { + if (customSizeLevel==15 || customSizeLevel==17 || customSizeLevel==30 || customSizeLevel==34 || customSizeLevel==40) { + FPM.add(createEntryExitInstrumenterPass()); + } + + // Add LibraryInfo if we have some. + if (LibraryInfo) + FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); + + //if (OptLevel == 0) return; + + addInitialAliasAnalysisPasses(FPM); + + if ((customSizeLevel==15 || customSizeLevel==17) && subSeqNum == 0){ + FPM.add(createCFGSimplificationPass()); + FPM.add(createSROAPass()); + FPM.add(createEarlyCSEPass()); + FPM.add(createLowerExpectIntrinsicPass()); + } + + if ((customSizeLevel==34 || customSizeLevel==40) && (subSeqNum == 29 || subSeqNum == 30 || subSeqNum == 31)){ + FPM.add(createCFGSimplificationPass()); + FPM.add(createSROAPass()); + FPM.add(createEarlyCSEPass()); + FPM.add(createLowerExpectIntrinsicPass()); + } + + if (customSizeLevel==30 && subSeqNum == 3){ + FPM.add(createCFGSimplificationPass()); + } + + if (customSizeLevel==30 && subSeqNum == 29){ + FPM.add(createSROAPass()); + FPM.add(createEarlyCSEPass()); + FPM.add(createLowerExpectIntrinsicPass()); + } +} + +// Do PGO instrumentation generation or use pass as the option specified. +void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM, + bool IsCS = false) { + if (IsCS) { + if (!EnablePGOCSInstrGen && !EnablePGOCSInstrUse) + return; + } else if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty()) + return; + + // Perform the preinline and cleanup passes for O1 and above. + // And avoid doing them if optimizing for size. + // We will not do this inline for context sensitive PGO (when IsCS is true). + if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner && + PGOSampleUse.empty() && !IsCS) { + // Create preinline pass. We construct an InlineParams object and specify + // the threshold here to avoid the command line options of the regular + // inliner to influence pre-inlining. The only fields of InlineParams we + // care about are DefaultThreshold and HintThreshold. + InlineParams IP; + IP.DefaultThreshold = PreInlineThreshold; + // FIXME: The hint threshold has the same value used by the regular inliner. + // This should probably be lowered after performance testing. + IP.HintThreshold = 325; + + MPM.add(createFunctionInliningPass(IP)); + MPM.add(createSROAPass()); + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createInstructionCombiningPass()); // Combine silly seq's + addExtensionsToPM(EP_Peephole, MPM); + } + if ((EnablePGOInstrGen && !IsCS) || (EnablePGOCSInstrGen && IsCS)) { + MPM.add(createPGOInstrumentationGenLegacyPass(IsCS)); + // Add the profile lowering pass. + InstrProfOptions Options; + if (!PGOInstrGen.empty()) + Options.InstrProfileOutput = PGOInstrGen; + Options.DoCounterPromotion = true; + Options.UseBFIInPromotion = IsCS; + MPM.add(createLoopRotatePass()); + MPM.add(createInstrProfilingLegacyPass(Options, IsCS)); + } + if (!PGOInstrUse.empty()) + MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse, IsCS)); + // Indirect call promotion that promotes intra-module targets only. + // For ThinLTO this is done earlier due to interactions with globalopt + // for imported functions. We don't run this at -O0. + if (OptLevel > 0 && !IsCS) + MPM.add( + createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty())); +} + +void PassManagerBuilder::addFunctionSimplificationPasses( + legacy::PassManagerBase &MPM) { + // Start of function pass. + // Break up aggregate allocas, using SSAUpdater. + assert(OptLevel >= 1 && "Calling function optimizer with no optimization level!"); + MPM.add(createSROAPass()); + MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies + + if (OptLevel > 1) { + if (EnableGVNHoist) + MPM.add(createGVNHoistPass()); + if (EnableGVNSink) { + MPM.add(createGVNSinkPass()); + MPM.add(createCFGSimplificationPass()); + } + } + + if (OptLevel > 1) { + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); + + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + } + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + // Combine silly seq's + if (OptLevel > 2) + MPM.add(createAggressiveInstCombinerPass()); + addInstructionCombiningPass(MPM); + if (SizeLevel == 0 && !DisableLibCallsShrinkWrap) + MPM.add(createLibCallsShrinkWrapPass()); + addExtensionsToPM(EP_Peephole, MPM); + + + // Optimize memory intrinsic calls based on the profiled size information. + if (SizeLevel == 0) + MPM.add(createPGOMemOPSizeOptLegacyPass()); + + // TODO: Investigate the cost/benefit of tail call elimination on debugging. + if (OptLevel > 1) + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createReassociatePass()); // Reassociate expressions + + // Begin the loop pass pipeline. + if (EnableSimpleLoopUnswitch) { + // The simple loop unswitch pass relies on separate cleanup passes. Schedule + // them first so when we re-process a loop they run before other loop + // passes. + MPM.add(createLoopInstSimplifyPass()); + MPM.add(createLoopSimplifyCFGPass()); + } + // Rotate Loop - disable header duplication at -Oz + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + // TODO: Investigate promotion cap for O1. + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + if (EnableSimpleLoopUnswitch) + MPM.add(createSimpleLoopUnswitchLegacyPass()); + else + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + // FIXME: We break the loop pass pipeline here in order to do full + // simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace the + // need for this. + MPM.add(createCFGSimplificationPass()); + addInstructionCombiningPass(MPM); + // We resume loop passes creating a second loop pipeline here. + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. + addExtensionsToPM(EP_LateLoopOptimizations, MPM); + MPM.add(createLoopDeletionPass()); // Delete dead loops + + if (EnableLoopInterchange) + MPM.add(createLoopInterchangePass()); // Interchange loops + + // Unroll small loops + MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + addExtensionsToPM(EP_LoopOptimizerEnd, MPM); + // This ends the loop pass pipelines. + + if (OptLevel > 1) { + MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds + MPM.add(NewGVN ? createNewGVNPass() + : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + } + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP + + // Delete dead bit computations (instcombine runs after to fold away the dead + // computations, and then ADCE will run later to exploit any new DCE + // opportunities that creates). + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + + // Run instcombine after redundancy elimination to exploit opportunities + // opened up by them. + addInstructionCombiningPass(MPM); + addExtensionsToPM(EP_Peephole, MPM); + if (OptLevel > 1) { + MPM.add(createJumpThreadingPass()); // Thread jumps + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + } + + addExtensionsToPM(EP_ScalarOptimizerLate, MPM); + + if (RerollLoops) + MPM.add(createLoopRerollPass()); + + // TODO: Investigate if this is too expensive at O1. + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + // Clean up after everything. + addInstructionCombiningPass(MPM); + addExtensionsToPM(EP_Peephole, MPM); + + if (EnableCHR && OptLevel >= 3 && + (!PGOInstrUse.empty() || !PGOSampleUse.empty() || EnablePGOCSInstrGen)) + MPM.add(createControlHeightReductionLegacyPass()); +} + +void PassManagerBuilder::customPopulateModulePassManager( + legacy::PassManagerBase &MPM, unsigned customSizeLevel, unsigned subSeqNum) { + + // if (customSizeLevel == 0 && subSeqNum == 0){ + // MPM.add(createPosetRLPass()); + + // } + + if (((customSizeLevel == 15 || customSizeLevel == 17) && subSeqNum == 0) || (customSizeLevel == 30 && subSeqNum == 29)){ + // Allow forcing function attributes as a debugging and tuning aid. + MPM.add(createForceFunctionAttrsLegacyPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 29 || subSeqNum == 30 || subSeqNum == 31)) { + // Allow forcing function attributes as a debugging and tuning aid. + MPM.add(createForceFunctionAttrsLegacyPass()); + } + + // Add LibraryInfo if we have some. + if (LibraryInfo) + MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); + + addInitialAliasAnalysisPasses(MPM); + + if (customSizeLevel == 15 && subSeqNum == 0){ + MPM.add(createInferFunctionAttrsLegacyPass()); + MPM.add(createPromoteMemoryToRegisterPass()); + } + + if (customSizeLevel == 15 && subSeqNum == 1){ + MPM.add(createIPSCCPPass()); + MPM.add(createCalledValuePropagationPass()); + MPM.add(createAttributorLegacyPass()); + MPM.add(createGlobalOptimizerPass()); + } + + if (customSizeLevel == 15 && subSeqNum == 2){ + MPM.add(createDeadArgEliminationPass()); + addInstructionCombiningPass(MPM); + MPM.add(createCFGSimplificationPass()); + } + + if (customSizeLevel == 15 && subSeqNum == 3){ + MPM.add(createPruneEHPass()); // Remove dead EH info + bool RunInliner = false; + if (Inliner) { + MPM.add(Inliner); + Inliner = nullptr; + RunInliner = true; + } + MPM.add(createPostOrderFunctionAttrsLegacyPass()); + MPM.add(createBarrierNoopPass()); + } + + if (customSizeLevel == 15 && subSeqNum == 4){ + MPM.add(createSROAPass()); + MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + } + + if (customSizeLevel == 15 && subSeqNum == 5){ + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + addInstructionCombiningPass(MPM); + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createReassociatePass()); // Reassociate expressions + } + + if (customSizeLevel == 15 && subSeqNum == 6){ + // Rotate Loop - disable header duplication at -Oz + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + // TODO: Investigate promotion cap for O1. + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + addInstructionCombiningPass(MPM); + } + + if (customSizeLevel == 15 && subSeqNum == 7){ + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. + MPM.add(createLoopDeletionPass()); // Delete dead loops + MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + } + + if (customSizeLevel == 15 && subSeqNum == 8){ + MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds + MPM.add(NewGVN ? createNewGVNPass() + : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + addInstructionCombiningPass(MPM); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + } + + if (customSizeLevel == 15 && subSeqNum == 9){ + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + addInstructionCombiningPass(MPM); + } + + if (customSizeLevel == 15 && subSeqNum == 10){ + MPM.add(createBarrierNoopPass()); + MPM.add(createEliminateAvailableExternallyPass()); + MPM.add(createReversePostOrderFunctionAttrsPass()); + MPM.add(createGlobalOptimizerPass()); + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); + } + + if (customSizeLevel == 15 && subSeqNum == 11){ + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLoopDistributePass()); + MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize)); + } + + if (customSizeLevel == 15 && subSeqNum == 12){ + MPM.add(createLoopLoadEliminationPass()); + addInstructionCombiningPass(MPM); + MPM.add(createCFGSimplificationPass(1, true, true, false, true)); + addInstructionCombiningPass(MPM); + } + + if (customSizeLevel == 15 && subSeqNum == 13){ + MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + addInstructionCombiningPass(MPM); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAlignmentFromAssumptionsPass()); + } + + if (customSizeLevel == 15 && subSeqNum == 14){ + MPM.add(createStripDeadPrototypesPass()); + MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. + MPM.add(createConstantMergePass()); // Merge dup global constants + MPM.add(createLoopSinkPass()); + MPM.add(createInstSimplifyLegacyPass()); + MPM.add(createDivRemPairsPass()); + MPM.add(createCFGSimplificationPass()); + } + + if (customSizeLevel == 17 && subSeqNum == 0){ + MPM.add(createInferFunctionAttrsLegacyPass()); + MPM.add(createIPSCCPPass()); + MPM.add(createCalledValuePropagationPass()); + MPM.add(createAttributorLegacyPass()); + MPM.add(createGlobalOptimizerPass()); + MPM.add(createPromoteMemoryToRegisterPass()); + MPM.add(createDeadArgEliminationPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 29 || subSeqNum == 30 || subSeqNum == 31)){ + MPM.add(createInferFunctionAttrsLegacyPass()); + MPM.add(createIPSCCPPass()); + MPM.add(createCalledValuePropagationPass()); + MPM.add(createAttributorLegacyPass()); + MPM.add(createGlobalOptimizerPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 31){ + MPM.add(createPromoteMemoryToRegisterPass()); + MPM.add(createDeadArgEliminationPass()); + } + // if (OptLevel > 2) + // MPM.add(createCallSiteSplittingPass()); + + if (customSizeLevel == 17 && subSeqNum == 1) { + addInstructionCombiningPass(MPM); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 5) { + addInstructionCombiningPass(MPM); + } + + if (customSizeLevel == 40 && (subSeqNum > 6 && subSeqNum < 22)) { + MPM.add(createPromoteMemoryToRegisterPass()); + } + + if (customSizeLevel == 17 && subSeqNum == 2) { + MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + MPM.add(createPruneEHPass()); // Remove dead EH info + bool RunInliner = false; + if (Inliner) { + MPM.add(Inliner); + Inliner = nullptr; + RunInliner = true; + } + MPM.add(createPostOrderFunctionAttrsLegacyPass()); + MPM.add(createSROAPass()); + MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + MPM.add(createBarrierNoopPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 23 || subSeqNum == 24 || subSeqNum == 25 || subSeqNum == 26 || subSeqNum == 27)) { + MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + MPM.add(createPruneEHPass()); // Remove dead EH info + bool RunInliner = false; + if (Inliner) { + MPM.add(Inliner); + Inliner = nullptr; + RunInliner = true; + } + MPM.add(createPostOrderFunctionAttrsLegacyPass()); + MPM.add(createSROAPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 23 || subSeqNum == 24 || subSeqNum == 25)) { + MPM.add(createEarlyCSEPass()); + MPM.add(createLowerExpectIntrinsicPass()); + MPM.add(createForceFunctionAttrsLegacyPass()); + MPM.add(createInferFunctionAttrsLegacyPass()); + MPM.add(createIPSCCPPass()); + MPM.add(createCalledValuePropagationPass()); + MPM.add(createAttributorLegacyPass()); + MPM.add(createGlobalOptimizerPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 23) { + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createConstantMergePass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 24) { + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 25) { + MPM.add(createPromoteMemoryToRegisterPass()); + MPM.add(createDeadArgEliminationPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 26 || subSeqNum == 27)) { + MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 26) { + MPM.add(createDeadStoreEliminationPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 23 || subSeqNum == 24 || subSeqNum == 25 || subSeqNum == 26 || subSeqNum == 27)) { + MPM.add(createBarrierNoopPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 32 || subSeqNum == 33)) { + MPM.add(createCFGSimplificationPass()); + MPM.add(createSROAPass()); + MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 32) { + MPM.add(createDeadStoreEliminationPass()); + } + + if (customSizeLevel == 17 && subSeqNum == 3) { + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + } + + if (customSizeLevel == 17 && subSeqNum == 4) { + addInstructionCombiningPass(MPM); + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 6) { + addInstructionCombiningPass(MPM); + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + } + + if (customSizeLevel == 17 && subSeqNum == 5) { + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createReassociatePass()); // Reassociate expressions + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 28) { + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createReassociatePass()); // Reassociate expressions + } + + if (customSizeLevel == 17 && subSeqNum == 6) { + // Rotate Loop - disable header duplication at -Oz + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + // TODO: Investigate promotion cap for O1. + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 12) { + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 14 || subSeqNum == 15)) { + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAlignmentFromAssumptionsPass()); + MPM.add(createStripDeadPrototypesPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 14) { + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createConstantMergePass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 15) { + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 16) { + // Rotate Loop - disable header duplication at -Oz + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + // TODO: Investigate promotion cap for O1. + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + } + + if (customSizeLevel == 17 && subSeqNum == 7) { + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. + MPM.add(createLoopDeletionPass()); // Delete dead loops + MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds + MPM.add(NewGVN ? createNewGVNPass() + : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && ( subSeqNum == 7 || subSeqNum == 8)) { + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. + MPM.add(createLoopDeletionPass()); // Delete dead loops + MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 8) { + MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds + MPM.add(NewGVN ? createNewGVNPass() + : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + } + + if (customSizeLevel == 17 && subSeqNum == 8) { + addInstructionCombiningPass(MPM); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 3 || subSeqNum == 4)) { + addInstructionCombiningPass(MPM); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); + } + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 3) { + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + } + + if (customSizeLevel == 17 && subSeqNum == 9) { + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 9) { + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + } + + if (customSizeLevel == 17 && subSeqNum == 10) { + addInstructionCombiningPass(MPM); + MPM.add(createBarrierNoopPass()); + MPM.add(createEliminateAvailableExternallyPass()); + MPM.add(createReversePostOrderFunctionAttrsPass()); + MPM.add(createGlobalOptimizerPass()); + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 0 || subSeqNum == 1 || subSeqNum == 2)) { + addInstructionCombiningPass(MPM); + MPM.add(createBarrierNoopPass()); + MPM.add(createEliminateAvailableExternallyPass()); + MPM.add(createReversePostOrderFunctionAttrsPass()); + MPM.add(createGlobalOptimizerPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 0) { + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createConstantMergePass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 1) { + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 2) { + MPM.add(createPromoteMemoryToRegisterPass()); + MPM.add(createDeadArgEliminationPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 29) { + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createConstantMergePass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 30) { + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); + } + + if (customSizeLevel == 17 && subSeqNum == 11) { + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLoopDistributePass()); + MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize)); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 17) { + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLoopDistributePass()); + MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize)); + } + + if (customSizeLevel == 17 && subSeqNum == 12) { + MPM.add(createLoopLoadEliminationPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 21) { + MPM.add(createLoopLoadEliminationPass()); + } + + if (customSizeLevel == 17 && subSeqNum == 13) { + MPM.add(createCFGSimplificationPass(1, true, true, false, true)); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 22) { + MPM.add(createCFGSimplificationPass(1, true, true, false, true)); + } + + if (customSizeLevel == 17 && subSeqNum == 14) { + MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + } + + if (customSizeLevel == 17 && subSeqNum == 15) { + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAlignmentFromAssumptionsPass()); + MPM.add(createStripDeadPrototypesPass()); + MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. + MPM.add(createConstantMergePass()); // Merge dup global constants + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && (subSeqNum == 10 || subSeqNum == 11)) { + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAlignmentFromAssumptionsPass()); + MPM.add(createStripDeadPrototypesPass()); + MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 10) { + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createConstantMergePass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 11) { + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 13) { + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + } + + if (customSizeLevel == 17 && subSeqNum == 16) { + MPM.add(createLoopSinkPass()); + MPM.add(createInstSimplifyLegacyPass()); + MPM.add(createDivRemPairsPass()); + MPM.add(createCFGSimplificationPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 18) { + MPM.add(createLoopSinkPass()); + MPM.add(createInstSimplifyLegacyPass()); + MPM.add(createDivRemPairsPass()); + MPM.add(createCFGSimplificationPass()); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 19){ + MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + } + + if ((customSizeLevel == 34 || customSizeLevel == 40) && subSeqNum == 20){ + MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds + MPM.add(NewGVN ? createNewGVNPass() + : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + } + + if (customSizeLevel == 30 && subSeqNum == 0){ + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createReassociatePass()); // Reassociate expressions + } + + if (customSizeLevel == 30 && subSeqNum == 1){ + MPM.add(createCFGSimplificationPass(1, true, true, false, true)); + } + + if (customSizeLevel == 30 && subSeqNum == 2){ + MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + MPM.add(createPruneEHPass()); // Remove dead EH info + bool RunInliner = false; + if (Inliner) { + MPM.add(Inliner); + Inliner = nullptr; + RunInliner = true; + } + MPM.add(createPostOrderFunctionAttrsLegacyPass()); + MPM.add(createBarrierNoopPass()); + } + + if (customSizeLevel == 30 && subSeqNum == 4){ + addInstructionCombiningPass(MPM); + MPM.add(createBarrierNoopPass()); + MPM.add(createEliminateAvailableExternallyPass()); + MPM.add(createReversePostOrderFunctionAttrsPass()); + } + + if (customSizeLevel == 30 && subSeqNum == 5){ + addInstructionCombiningPass(MPM); + MPM.add(createJumpThreadingPass()); // Thread jumps + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + } + + if (customSizeLevel == 30 && subSeqNum == 6){ + addInstructionCombiningPass(MPM); + MPM.add(createJumpThreadingPass()); // Thread jumps + MPM.add(createCorrelatedValuePropagationPass()); + } + + if (customSizeLevel == 30 && subSeqNum == 7){ + addInstructionCombiningPass(MPM); + } + + if (customSizeLevel == 30 && subSeqNum == 8){ + addInstructionCombiningPass(MPM); + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + } + + if (customSizeLevel == 30 && subSeqNum == 9){ //Check + addInstructionCombiningPass(MPM); + } + + if (customSizeLevel == 30 && subSeqNum == 10){ + MPM.add(createLoopLoadEliminationPass()); + } + + if (customSizeLevel == 30 && subSeqNum == 11){ + MPM.add(createLoopSinkPass()); + MPM.add(createInstSimplifyLegacyPass()); + MPM.add(createDivRemPairsPass()); + MPM.add(createCFGSimplificationPass()); + } + + if (customSizeLevel == 30 && subSeqNum == 12){ + MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + } + + + if (customSizeLevel == 30 && subSeqNum == 13){ + MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds + MPM.add(NewGVN ? createNewGVNPass() + : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + } + + + if (customSizeLevel == 30 && subSeqNum == 14){ + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAlignmentFromAssumptionsPass()); + MPM.add(createStripDeadPrototypesPass()); + } + + if (customSizeLevel == 30 && subSeqNum == 15){ + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + } + + if (customSizeLevel == 30 && subSeqNum == 16){ + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + } + + if (customSizeLevel == 30 && subSeqNum == 17){ + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. + MPM.add(createLoopDeletionPass()); // Delete dead loops + MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + } + + if (customSizeLevel == 30 && subSeqNum == 18){ + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. + MPM.add(createLoopDeletionPass()); // Delete dead loops + MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds + MPM.add(NewGVN ? createNewGVNPass() + : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + } + + if (customSizeLevel == 30 && subSeqNum == 19){ + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLoopDistributePass()); + MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize)); + } + + if (customSizeLevel == 30 && subSeqNum == 20){ + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAlignmentFromAssumptionsPass()); + MPM.add(createStripDeadPrototypesPass()); + } + + if (customSizeLevel == 30 && subSeqNum == 21){ + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + } + + if (customSizeLevel == 30 && subSeqNum == 22){ + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + } + + if (customSizeLevel == 30 && subSeqNum == 23){ + MPM.add(createGlobalOptimizerPass()); + } + + if (customSizeLevel == 30 && subSeqNum == 24){ + MPM.add(createGlobalOptimizerPass()); // Optimize out global vars + // Promote any localized global vars. + MPM.add(createPromoteMemoryToRegisterPass()); + MPM.add(createDeadArgEliminationPass()); // Dead argument elimination + } + + if (customSizeLevel == 30 && subSeqNum == 25){ + MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. + MPM.add(createConstantMergePass()); // Merge dup global constants + } + + if (customSizeLevel == 30 && subSeqNum == 26){ + MPM.add(createGlobalDCEPass()); + MPM.add(createGlobalsAAWrapperPass()); + MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); + } + + if (customSizeLevel == 30 && subSeqNum == 27){ + MPM.add(createSROAPass()); + MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + } + + if (customSizeLevel == 30 && subSeqNum == 28){ + MPM.add(createSROAPass()); + MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + } + + if (customSizeLevel == 30 && subSeqNum == 29){ + MPM.add(createInferFunctionAttrsLegacyPass()); + MPM.add(createIPSCCPPass()); + MPM.add(createCalledValuePropagationPass()); + MPM.add(createAttributorLegacyPass()); + } + // We add a module alias analysis pass here. In part due to bugs in the + // analysis infrastructure this "works" in that the analysis stays alive + // for the entire SCC pass run below. + //MPM.add(createGlobalsAAWrapperPass()); + + // if (OptLevel > 2) + // MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args + + // addFunctionSimplificationPasses(MPM); + + // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC + // pass manager that we are specifically trying to avoid. To prevent this + // we must insert a no-op module pass to reset the pass manager. + // MPM.add(createBarrierNoopPass()); + + // The inliner performs some kind of dead code elimination as it goes, + // but there are cases that are not really caught by it. We might + // at some point consider teaching the inliner about them, but it + // is OK for now to run GlobalOpt + GlobalDCE in tandem as their + // benefits generally outweight the cost, making the whole pipeline + // faster. + // if (RunInliner) { + // MPM.add(createGlobalOptimizerPass()); + // MPM.add(createGlobalDCEPass()); + // } + + + // Scheduling LoopVersioningLICM when inlining is over, because after that + // we may see more accurate aliasing. Reason to run this late is that too + // early versioning may prevent further inlining due to increase of code + // size. By placing it just after inlining other optimizations which runs + // later might get benefit of no-alias assumption in clone loop. + // if (UseLoopVersioningLICM) { + // MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM + // MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + // } + + // if (EnableMatrix) { + // MPM.add(createLowerMatrixIntrinsicsPass()); + // // CSE the pointer arithmetic of the column vectors. This allows alias + // // analysis to establish no-aliasing between loads and stores of different + // // columns of the same matrix. + // MPM.add(createEarlyCSEPass(false)); + // } + + // Re-rotate loops in all our loop nests. These may have fallout out of + // rotated form due to GVN or other transformations, and the vectorizer relies + // on the rotated form. Disable header duplication at -Oz. + // MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + + // Distribute loops to allow partial vectorization. I.e. isolate dependences + // into separate loop that would otherwise inhibit vectorization. This is + // currently only performed for loops marked with the metadata + // llvm.loop.distribute=true or when -enable-loop-distribute is specified. + // MPM.add(createLoopDistributePass()); + + // Eliminate loads by forwarding stores from the previous iteration to loads + // of the current iteration. + // MPM.add(createLoopLoadEliminationPass()); + + // FIXME: Because of #pragma vectorize enable, the passes below are always + // inserted in the pipeline, even when the vectorizer doesn't run (ex. when + // on -O1 and no #pragma is found). Would be good to have these two passes + // as function calls, so that we can only pass them when the vectorizer + // changed the code. + // if (OptLevel > 1 && ExtraVectorizerPasses) { + // // At higher optimization levels, try to clean up any runtime overlap and + // // alignment checks inserted by the vectorizer. We want to track correllated + // // runtime checks for two inner loops in the same outer loop, fold any + // // common computations, hoist loop-invariant aspects out of any outer loop, + // // and unswitch the runtime checks if possible. Once hoisted, we may have + // // dead (or speculatable) control flows or more combining opportunities. + // MPM.add(createEarlyCSEPass()); + // MPM.add(createCorrelatedValuePropagationPass()); + // addInstructionCombiningPass(MPM); + // MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + // MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + // MPM.add(createCFGSimplificationPass()); + // addInstructionCombiningPass(MPM); + // } + + // Cleanup after loop vectorization, etc. Simplification passes like CVP and + // GVN, loop transforms, and others have already run, so it's now better to + // convert to more optimized IR using more aggressive simplify CFG options. + // The extra sinking transform can create larger basic blocks, so do this + // before SLP vectorization. + // MPM.add(createCFGSimplificationPass(1, true, true, false, true)); + + // if (SLPVectorize) { + // MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + // if (OptLevel > 1 && ExtraVectorizerPasses) { + // MPM.add(createEarlyCSEPass()); + // } + // } + + // if (EnableUnrollAndJam && !DisableUnrollLoops) { + // // Unroll and Jam. We do this before unroll but need to be in a separate + // // loop pass manager in order for the outer loop to be processed by + // // unroll and jam before the inner loop is unrolled. + // MPM.add(createLoopUnrollAndJamPass(OptLevel)); + // } + + // Unroll small loops + // MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, + // ForgetAllSCEVInLoopUnroll)); + + // if (!DisableUnrollLoops) { + // // LoopUnroll may generate some redundency to cleanup. + // addInstructionCombiningPass(MPM); + + // // Runtime unrolling will introduce runtime check in loop prologue. If the + // // unrolled loop is a inner loop, then the prologue will be inside the + // // outer loop. LICM pass can help to promote the runtime check out if the + // // checked value is loop invariant. + // MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + // } + + // MPM.add(createWarnMissedTransformationsPass()); + + // After vectorization and unrolling, assume intrinsics may tell us more + // about pointer alignments. + // MPM.add(createAlignmentFromAssumptionsPass()); + + // FIXME: We shouldn't bother with this anymore. + // MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes + + // GlobalOpt already deletes dead functions and globals, at -O2 try a + // late pass of GlobalDCE. It is capable of deleting dead cycles. + // if (OptLevel > 1) { + // MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. + // MPM.add(createConstantMergePass()); // Merge dup global constants + // } + + // See comment in the new PM for justification of scheduling splitting at + // this stage (\ref buildModuleSimplificationPipeline). + // if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO)) + // MPM.add(createHotColdSplittingPass()); + + // if (MergeFunctions) + // MPM.add(createMergeFunctionsPass()); + + // LoopSink pass sinks instructions hoisted by LICM, which serves as a + // canonicalization pass that enables other optimizations. As a result, + // LoopSink pass needs to be a very late IR pass to avoid undoing LICM + // result too early. + // MPM.add(createLoopSinkPass()); + // Get rid of LCSSA nodes. + // MPM.add(createInstSimplifyLegacyPass()); + + // This hoists/decomposes div/rem ops. It should run after other sink/hoist + // passes to avoid re-sinking, but before SimplifyCFG because it can allow + // flattening of blocks. + // MPM.add(createDivRemPairsPass()); + + // LoopSink (and other loop passes since the last simplifyCFG) might have + // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. + // MPM.add(createCFGSimplificationPass()); +} + +void PassManagerBuilder::populateModulePassManager( + legacy::PassManagerBase &MPM) { + // Whether this is a default or *LTO pre-link pipeline. The FullLTO post-link + // is handled separately, so just check this is not the ThinLTO post-link. + bool DefaultOrPreLinkPipeline = !PerformThinLTO; + + if (OPosetRL){ + if (check_flag == false){ + errs() << "opt level "<< OptLevel << " SizeLevel " << SizeLevel << "\n"; + MPM.add(createPosetRLPass()); + } + check_flag = true; + return; + } + + if(!RunNoPreDistributionPasses){ + if (!PGOSampleUse.empty()) { + MPM.add(createPruneEHPass()); + // In ThinLTO mode, when flattened profile is used, all the available + // profile information will be annotated in PreLink phase so there is + // no need to load the profile again in PostLink. + if (!(FlattenedProfileUsed && PerformThinLTO)) + MPM.add(createSampleProfileLoaderPass(PGOSampleUse)); + } + + // Allow forcing function attributes as a debugging and tuning aid. + MPM.add(createForceFunctionAttrsLegacyPass()); + + // If all optimizations are disabled, just run the always-inline pass and, + // if enabled, the function merging pass. + if (OptLevel == 0) { + addPGOInstrPasses(MPM); + if (Inliner) { + MPM.add(Inliner); + Inliner = nullptr; + } + + // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly + // creates a CGSCC pass manager, but we don't want to add extensions into + // that pass manager. To prevent this we insert a no-op module pass to reset + // the pass manager to get the same behavior as EP_OptimizerLast in non-O0 + // builds. The function merging pass is + if (MergeFunctions) + MPM.add(createMergeFunctionsPass()); + else if (GlobalExtensionsNotEmpty() || !Extensions.empty()) + MPM.add(createBarrierNoopPass()); + + if (PerformThinLTO) { + // Drop available_externally and unreferenced globals. This is necessary + // with ThinLTO in order to avoid leaving undefined references to dead + // globals in the object file. + MPM.add(createEliminateAvailableExternallyPass()); + MPM.add(createGlobalDCEPass()); + } + + addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); + + if (PrepareForLTO || PrepareForThinLTO) { + MPM.add(createCanonicalizeAliasesPass()); + // Rename anon globals to be able to export them in the summary. + // This has to be done after we add the extensions to the pass manager + // as there could be passes (e.g. Adddress sanitizer) which introduce + // new unnamed globals. + MPM.add(createNameAnonGlobalPass()); + } + return; + } + + // Add LibraryInfo if we have some. + if (LibraryInfo) + MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); + + addInitialAliasAnalysisPasses(MPM); + + // For ThinLTO there are two passes of indirect call promotion. The + // first is during the compile phase when PerformThinLTO=false and + // intra-module indirect call targets are promoted. The second is during + // the ThinLTO backend when PerformThinLTO=true, when we promote imported + // inter-module indirect calls. For that we perform indirect call promotion + // earlier in the pass pipeline, here before globalopt. Otherwise imported + // available_externally functions look unreferenced and are removed. + if (PerformThinLTO) + MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true, + !PGOSampleUse.empty())); + + // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops + // as it will change the CFG too much to make the 2nd profile annotation + // in backend more difficult. + bool PrepareForThinLTOUsingPGOSampleProfile = + PrepareForThinLTO && !PGOSampleUse.empty(); + if (PrepareForThinLTOUsingPGOSampleProfile) + DisableUnrollLoops = true; + + // Infer attributes about declarations if possible. + MPM.add(createInferFunctionAttrsLegacyPass()); + + addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); + + if (OptLevel > 2) + MPM.add(createCallSiteSplittingPass()); + + MPM.add(createIPSCCPPass()); // IP SCCP + MPM.add(createCalledValuePropagationPass()); + + // Infer attributes on declarations, call sites, arguments, etc. + MPM.add(createAttributorLegacyPass()); + + MPM.add(createGlobalOptimizerPass()); // Optimize out global vars + // Promote any localized global vars. + MPM.add(createPromoteMemoryToRegisterPass()); + + MPM.add(createDeadArgEliminationPass()); // Dead argument elimination + + addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE + addExtensionsToPM(EP_Peephole, MPM); + MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + + // For SamplePGO in ThinLTO compile phase, we do not want to do indirect + // call promotion as it will change the CFG too much to make the 2nd + // profile annotation in backend more difficult. + // PGO instrumentation is added during the compile phase for ThinLTO, do + // not run it a second time + if (DefaultOrPreLinkPipeline && !PrepareForThinLTOUsingPGOSampleProfile) + addPGOInstrPasses(MPM); + + // Create profile COMDAT variables. Lld linker wants to see all variables + // before the LTO/ThinLTO link since it needs to resolve symbols/comdats. + if (!PerformThinLTO && EnablePGOCSInstrGen) + MPM.add(createPGOInstrumentationGenCreateVarLegacyPass(PGOInstrGen)); + + // We add a module alias analysis pass here. In part due to bugs in the + // analysis infrastructure this "works" in that the analysis stays alive + // for the entire SCC pass run below. + MPM.add(createGlobalsAAWrapperPass()); + + // Start of CallGraph SCC passes. + MPM.add(createPruneEHPass()); // Remove dead EH info + bool RunInliner = false; + if (Inliner) { + MPM.add(Inliner); + Inliner = nullptr; + RunInliner = true; + } + + MPM.add(createPostOrderFunctionAttrsLegacyPass()); + if (OptLevel > 2) + MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args + + addExtensionsToPM(EP_CGSCCOptimizerLate, MPM); + addFunctionSimplificationPasses(MPM); + + // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC + // pass manager that we are specifically trying to avoid. To prevent this + // we must insert a no-op module pass to reset the pass manager. + MPM.add(createBarrierNoopPass()); + + if (RunPartialInlining) + MPM.add(createPartialInliningPass()); + + if (OptLevel > 1 && !PrepareForLTO && !PrepareForThinLTO) + // Remove avail extern fns and globals definitions if we aren't + // compiling an object file for later LTO. For LTO we want to preserve + // these so they are eligible for inlining at link-time. Note if they + // are unreferenced they will be removed by GlobalDCE later, so + // this only impacts referenced available externally globals. + // Eventually they will be suppressed during codegen, but eliminating + // here enables more opportunity for GlobalDCE as it may make + // globals referenced by available external functions dead + // and saves running remaining passes on the eliminated functions. + MPM.add(createEliminateAvailableExternallyPass()); + + // CSFDO instrumentation and use pass. Don't invoke this for Prepare pass + // for LTO and ThinLTO -- The actual pass will be called after all inlines + // are performed. + // Need to do this after COMDAT variables have been eliminated, + // (i.e. after EliminateAvailableExternallyPass). + if (!(PrepareForLTO || PrepareForThinLTO)) + addPGOInstrPasses(MPM, /* IsCS */ true); + + if (EnableOrderFileInstrumentation) + MPM.add(createInstrOrderFilePass()); + + MPM.add(createReversePostOrderFunctionAttrsPass()); + + // The inliner performs some kind of dead code elimination as it goes, + // but there are cases that are not really caught by it. We might + // at some point consider teaching the inliner about them, but it + // is OK for now to run GlobalOpt + GlobalDCE in tandem as their + // benefits generally outweight the cost, making the whole pipeline + // faster. + if (RunInliner) { + MPM.add(createGlobalOptimizerPass()); + MPM.add(createGlobalDCEPass()); + if(EnableFusion){ + MPM.add(createLoopFusePass()); + } + } + + // If we are planning to perform ThinLTO later, let's not bloat the code with + // unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes + // during ThinLTO and perform the rest of the optimizations afterward. + if (PrepareForThinLTO) { + // Ensure we perform any last passes, but do so before renaming anonymous + // globals in case the passes add any. + addExtensionsToPM(EP_OptimizerLast, MPM); + MPM.add(createCanonicalizeAliasesPass()); + // Rename anon globals to be able to export them in the summary. + MPM.add(createNameAnonGlobalPass()); + return; + } + + if (PerformThinLTO) + // Optimize globals now when performing ThinLTO, this enables more + // optimizations later. + MPM.add(createGlobalOptimizerPass()); + + // Scheduling LoopVersioningLICM when inlining is over, because after that + // we may see more accurate aliasing. Reason to run this late is that too + // early versioning may prevent further inlining due to increase of code + // size. By placing it just after inlining other optimizations which runs + // later might get benefit of no-alias assumption in clone loop. + if (UseLoopVersioningLICM) { + MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + } + + // We add a fresh GlobalsModRef run at this point. This is particularly + // useful as the above will have inlined, DCE'ed, and function-attr + // propagated everything. We should at this point have a reasonably minimal + // and richly annotated call graph. By computing aliasing and mod/ref + // information for all local globals here, the late loop passes and notably + // the vectorizer will be able to use them to help recognize vectorizable + // memory operations. + // + // Note that this relies on a bug in the pass manager which preserves + // a module analysis into a function pass pipeline (and throughout it) so + // long as the first function pass doesn't invalidate the module analysis. + // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for + // this to work. Fortunately, it is trivial to preserve AliasAnalysis + // (doing nothing preserves it as it is required to be conservatively + // correct in the face of IR changes). + MPM.add(createGlobalsAAWrapperPass()); + + MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); + + if (EnableMatrix) { + MPM.add(createLowerMatrixIntrinsicsPass()); + // CSE the pointer arithmetic of the column vectors. This allows alias + // analysis to establish no-aliasing between loads and stores of different + // columns of the same matrix. + MPM.add(createEarlyCSEPass(false)); + } + + addExtensionsToPM(EP_VectorizerStart, MPM); + + // Re-rotate loops in all our loop nests. These may have fallout out of + // rotated form due to GVN or other transformations, and the vectorizer relies + // on the rotated form. Disable header duplication at -Oz. + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + } + + // Distribute loops to allow partial vectorization. I.e. isolate dependences + // into separate loop that would otherwise inhibit vectorization. This is + // currently only performed for loops marked with the metadata + // llvm.loop.distribute=true or when -enable-loop-distribute is specified. + if(!Runcustom_loop_distribution && !RunNoPreDistributionPasses && !RunNoPostDistributionPasses) { + MPM.add(createLoopDistributePass()); + } + + // if(Runcustom_loop_distribution) { + // MPM.add(createcustom_loop_distributionPass()); + // } + + if(!RunNoPostDistributionPasses) { + MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize)); + + // Eliminate loads by forwarding stores from the previous iteration to loads + // of the current iteration. + MPM.add(createLoopLoadEliminationPass()); + + // FIXME: Because of #pragma vectorize enable, the passes below are always + // inserted in the pipeline, even when the vectorizer doesn't run (ex. when + // on -O1 and no #pragma is found). Would be good to have these two passes + // as function calls, so that we can only pass them when the vectorizer + // changed the code. + addInstructionCombiningPass(MPM); + if (OptLevel > 1 && ExtraVectorizerPasses) { + // At higher optimization levels, try to clean up any runtime overlap and + // alignment checks inserted by the vectorizer. We want to track correllated + // runtime checks for two inner loops in the same outer loop, fold any + // common computations, hoist loop-invariant aspects out of any outer loop, + // and unswitch the runtime checks if possible. Once hoisted, we may have + // dead (or speculatable) control flows or more combining opportunities. + MPM.add(createEarlyCSEPass()); + MPM.add(createCorrelatedValuePropagationPass()); + addInstructionCombiningPass(MPM); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + MPM.add(createCFGSimplificationPass()); + addInstructionCombiningPass(MPM); + } + + // Cleanup after loop vectorization, etc. Simplification passes like CVP and + // GVN, loop transforms, and others have already run, so it's now better to + // convert to more optimized IR using more aggressive simplify CFG options. + // The extra sinking transform can create larger basic blocks, so do this + // before SLP vectorization. + MPM.add(createCFGSimplificationPass(1, true, true, false, true)); + + if (SLPVectorize) { + MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + if (OptLevel > 1 && ExtraVectorizerPasses) { + MPM.add(createEarlyCSEPass()); + } + } + + addExtensionsToPM(EP_Peephole, MPM); + addInstructionCombiningPass(MPM); + + if (EnableUnrollAndJam && !DisableUnrollLoops) { + // Unroll and Jam. We do this before unroll but need to be in a separate + // loop pass manager in order for the outer loop to be processed by + // unroll and jam before the inner loop is unrolled. + MPM.add(createLoopUnrollAndJamPass(OptLevel)); + } + + // Unroll small loops + MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + + if (!DisableUnrollLoops) { + // LoopUnroll may generate some redundency to cleanup. + addInstructionCombiningPass(MPM); + + // Runtime unrolling will introduce runtime check in loop prologue. If the + // unrolled loop is a inner loop, then the prologue will be inside the + // outer loop. LICM pass can help to promote the runtime check out if the + // checked value is loop invariant. + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + } + + MPM.add(createWarnMissedTransformationsPass()); + + // After vectorization and unrolling, assume intrinsics may tell us more + // about pointer alignments. + MPM.add(createAlignmentFromAssumptionsPass()); + + // FIXME: We shouldn't bother with this anymore. + MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes + + // GlobalOpt already deletes dead functions and globals, at -O2 try a + // late pass of GlobalDCE. It is capable of deleting dead cycles. + if (OptLevel > 1) { + MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. + MPM.add(createConstantMergePass()); // Merge dup global constants + } + + // See comment in the new PM for justification of scheduling splitting at + // this stage (\ref buildModuleSimplificationPipeline). + if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO)) + MPM.add(createHotColdSplittingPass()); + + if (MergeFunctions) + MPM.add(createMergeFunctionsPass()); + + // LoopSink pass sinks instructions hoisted by LICM, which serves as a + // canonicalization pass that enables other optimizations. As a result, + // LoopSink pass needs to be a very late IR pass to avoid undoing LICM + // result too early. + MPM.add(createLoopSinkPass()); + // Get rid of LCSSA nodes. + MPM.add(createInstSimplifyLegacyPass()); + + // This hoists/decomposes div/rem ops. It should run after other sink/hoist + // passes to avoid re-sinking, but before SimplifyCFG because it can allow + // flattening of blocks. + MPM.add(createDivRemPairsPass()); + + // LoopSink (and other loop passes since the last simplifyCFG) might have + // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. + MPM.add(createCFGSimplificationPass()); + + addExtensionsToPM(EP_OptimizerLast, MPM); + + if (PrepareForLTO) { + MPM.add(createCanonicalizeAliasesPass()); + // Rename anon globals to be able to handle them in the summary + MPM.add(createNameAnonGlobalPass()); + } + } +} + +void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { + // Load sample profile before running the LTO optimization pipeline. + if (!PGOSampleUse.empty()) { + PM.add(createPruneEHPass()); + PM.add(createSampleProfileLoaderPass(PGOSampleUse)); + } + + // Remove unused virtual tables to improve the quality of code generated by + // whole-program devirtualization and bitset lowering. + PM.add(createGlobalDCEPass()); + + // Provide AliasAnalysis services for optimizations. + addInitialAliasAnalysisPasses(PM); + + // Allow forcing function attributes as a debugging and tuning aid. + PM.add(createForceFunctionAttrsLegacyPass()); + + // Infer attributes about declarations if possible. + PM.add(createInferFunctionAttrsLegacyPass()); + + if (OptLevel > 1) { + // Split call-site with more constrained arguments. + PM.add(createCallSiteSplittingPass()); + + // Indirect call promotion. This should promote all the targets that are + // left by the earlier promotion pass that promotes intra-module targets. + // This two-step promotion is to save the compile time. For LTO, it should + // produce the same result as if we only do promotion here. + PM.add( + createPGOIndirectCallPromotionLegacyPass(true, !PGOSampleUse.empty())); + + // Propagate constants at call sites into the functions they call. This + // opens opportunities for globalopt (and inlining) by substituting function + // pointers passed as arguments to direct uses of functions. + PM.add(createIPSCCPPass()); + + // Attach metadata to indirect call sites indicating the set of functions + // they may target at run-time. This should follow IPSCCP. + PM.add(createCalledValuePropagationPass()); + + // Infer attributes on declarations, call sites, arguments, etc. + PM.add(createAttributorLegacyPass()); + } + + // Infer attributes about definitions. The readnone attribute in particular is + // required for virtual constant propagation. + PM.add(createPostOrderFunctionAttrsLegacyPass()); + PM.add(createReversePostOrderFunctionAttrsPass()); + + // Split globals using inrange annotations on GEP indices. This can help + // improve the quality of generated code when virtual constant propagation or + // control flow integrity are enabled. + PM.add(createGlobalSplitPass()); + + // Apply whole-program devirtualization and virtual constant propagation. + PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr)); + + // That's all we need at opt level 1. + if (OptLevel == 1) + return; + + // Now that we internalized some globals, see if we can hack on them! + PM.add(createGlobalOptimizerPass()); + // Promote any localized global vars. + PM.add(createPromoteMemoryToRegisterPass()); + + // Linking modules together can lead to duplicated global constants, only + // keep one copy of each constant. + PM.add(createConstantMergePass()); + + // Remove unused arguments from functions. + PM.add(createDeadArgEliminationPass()); + + // Reduce the code after globalopt and ipsccp. Both can open up significant + // simplification opportunities, and both can propagate functions through + // function pointers. When this happens, we often have to resolve varargs + // calls, etc, so let instcombine do this. + if (OptLevel > 2) + PM.add(createAggressiveInstCombinerPass()); + addInstructionCombiningPass(PM); + addExtensionsToPM(EP_Peephole, PM); + + // Inline small functions + bool RunInliner = Inliner; + if (RunInliner) { + PM.add(Inliner); + Inliner = nullptr; + } + + PM.add(createPruneEHPass()); // Remove dead EH info. + + // CSFDO instrumentation and use pass. + addPGOInstrPasses(PM, /* IsCS */ true); + + // Optimize globals again if we ran the inliner. + if (RunInliner) + PM.add(createGlobalOptimizerPass()); + PM.add(createGlobalDCEPass()); // Remove dead functions. + + // If we didn't decide to inline a function, check to see if we can + // transform it to pass arguments by value instead of by reference. + PM.add(createArgumentPromotionPass()); + + // The IPO passes may leave cruft around. Clean up after them. + addInstructionCombiningPass(PM); + addExtensionsToPM(EP_Peephole, PM); + PM.add(createJumpThreadingPass()); + + // Break up allocas + PM.add(createSROAPass()); + + // LTO provides additional opportunities for tailcall elimination due to + // link-time inlining, and visibility of nocapture attribute. + if (OptLevel > 1) + PM.add(createTailCallEliminationPass()); + + if (RunInliner) + PM.add(createLoopFusePass()); + + // Infer attributes on declarations, call sites, arguments, etc. + PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture. + // Run a few AA driven optimizations here and now, to cleanup the code. + PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. + + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds. + PM.add(NewGVN ? createNewGVNPass() + : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. + PM.add(createMemCpyOptPass()); // Remove dead memcpys. + + // Nuke dead stores. + PM.add(createDeadStoreEliminationPass()); + + // More loops are countable; try to optimize them. + PM.add(createIndVarSimplifyPass()); + PM.add(createLoopDeletionPass()); + if (EnableLoopInterchange) + PM.add(createLoopInterchangePass()); + + // Unroll small loops + PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + PM.add(createLoopVectorizePass(true, !LoopVectorize)); + // The vectorizer may have significantly shortened a loop body; unroll again. + PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, + ForgetAllSCEVInLoopUnroll)); + + PM.add(createWarnMissedTransformationsPass()); + + // Now that we've optimized loops (in particular loop induction variables), + // we may have exposed more scalar opportunities. Run parts of the scalar + // optimizer again at this point. + addInstructionCombiningPass(PM); // Initial cleanup + PM.add(createCFGSimplificationPass()); // if-convert + PM.add(createSCCPPass()); // Propagate exposed constants + addInstructionCombiningPass(PM); // Clean up again + PM.add(createBitTrackingDCEPass()); + + // More scalar chains could be vectorized due to more alias information + if (SLPVectorize) + PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + + // After vectorization, assume intrinsics may tell us more about pointer + // alignments. + PM.add(createAlignmentFromAssumptionsPass()); + + // Cleanup and simplify the code after the scalar optimizations. + addInstructionCombiningPass(PM); + addExtensionsToPM(EP_Peephole, PM); + + PM.add(createJumpThreadingPass()); +} + +void PassManagerBuilder::addLateLTOOptimizationPasses( + legacy::PassManagerBase &PM) { + // See comment in the new PM for justification of scheduling splitting at + // this stage (\ref buildLTODefaultPipeline). + if (EnableHotColdSplit) + PM.add(createHotColdSplittingPass()); + + // Delete basic blocks, which optimization passes may have killed. + PM.add(createCFGSimplificationPass()); + + // Drop bodies of available externally objects to improve GlobalDCE. + PM.add(createEliminateAvailableExternallyPass()); + + // Now that we have optimized the program, discard unreachable functions. + PM.add(createGlobalDCEPass()); + + // FIXME: this is profitable (for compiler time) to do at -O0 too, but + // currently it damages debug info. + if (MergeFunctions) + PM.add(createMergeFunctionsPass()); +} + +void PassManagerBuilder::populateThinLTOPassManager( + legacy::PassManagerBase &PM) { + PerformThinLTO = true; + if (LibraryInfo) + PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); + + if (VerifyInput) + PM.add(createVerifierPass()); + + if (ImportSummary) { + // These passes import type identifier resolutions for whole-program + // devirtualization and CFI. They must run early because other passes may + // disturb the specific instruction patterns that these passes look for, + // creating dependencies on resolutions that may not appear in the summary. + // + // For example, GVN may transform the pattern assume(type.test) appearing in + // two basic blocks into assume(phi(type.test, type.test)), which would + // transform a dependency on a WPD resolution into a dependency on a type + // identifier resolution for CFI. + // + // Also, WPD has access to more precise information than ICP and can + // devirtualize more effectively, so it should operate on the IR first. + PM.add(createWholeProgramDevirtPass(nullptr, ImportSummary)); + PM.add(createLowerTypeTestsPass(nullptr, ImportSummary)); + } + + populateModulePassManager(PM); + + if (VerifyOutput) + PM.add(createVerifierPass()); + PerformThinLTO = false; +} + +void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { + if (LibraryInfo) + PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); + + if (VerifyInput) + PM.add(createVerifierPass()); + + addExtensionsToPM(EP_FullLinkTimeOptimizationEarly, PM); + + if (OptLevel != 0) + addLTOOptimizationPasses(PM); + else { + // The whole-program-devirt pass needs to run at -O0 because only it knows + // about the llvm.type.checked.load intrinsic: it needs to both lower the + // intrinsic itself and handle it in the summary. + PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr)); + } + + // Create a function that performs CFI checks for cross-DSO calls with targets + // in the current module. + PM.add(createCrossDSOCFIPass()); + + // Lower type metadata and the type.test intrinsic. This pass supports Clang's + // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at + // link time if CFI is enabled. The pass does nothing if CFI is disabled. + PM.add(createLowerTypeTestsPass(ExportSummary, nullptr)); + + if (OptLevel != 0) + addLateLTOOptimizationPasses(PM); + + addExtensionsToPM(EP_FullLinkTimeOptimizationLast, PM); + + if (VerifyOutput) + PM.add(createVerifierPass()); +} + +inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) { + return reinterpret_cast(P); +} + +inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) { + return reinterpret_cast(P); +} + +LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() { + PassManagerBuilder *PMB = new PassManagerBuilder(); + return wrap(PMB); +} + +void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) { + PassManagerBuilder *Builder = unwrap(PMB); + delete Builder; +} + +void +LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB, + unsigned OptLevel) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->OptLevel = OptLevel; +} + +void +LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB, + unsigned SizeLevel) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->SizeLevel = SizeLevel; +} + +void +LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { + // NOTE: The DisableUnitAtATime switch has been removed. +} + +void +LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->DisableUnrollLoops = Value; +} + +void +LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { + // NOTE: The simplify-libcalls pass has been removed. +} + +void +LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB, + unsigned Threshold) { + PassManagerBuilder *Builder = unwrap(PMB); + Builder->Inliner = createFunctionInliningPass(Threshold); +} + +void +LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB, + LLVMPassManagerRef PM) { + PassManagerBuilder *Builder = unwrap(PMB); + legacy::FunctionPassManager *FPM = unwrap(PM); + Builder->populateFunctionPassManager(*FPM); +} + +void +LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB, + LLVMPassManagerRef PM) { + PassManagerBuilder *Builder = unwrap(PMB); + legacy::PassManagerBase *MPM = unwrap(PM); + Builder->populateModulePassManager(*MPM); +} + +void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, + LLVMPassManagerRef PM, + LLVMBool Internalize, + LLVMBool RunInliner) { + PassManagerBuilder *Builder = unwrap(PMB); + legacy::PassManagerBase *LPM = unwrap(PM); + + // A small backwards compatibility hack. populateLTOPassManager used to take + // an RunInliner option. + if (RunInliner && !Builder->Inliner) + Builder->Inliner = createFunctionInliningPass(); + + Builder->populateLTOPassManager(*LPM); +} diff --git a/llvm/lib/Transforms/IPO/PosetRL/README.md b/llvm/lib/Transforms/IPO/PosetRL/README.md new file mode 100644 index 000000000000..a8c62c24fd2d --- /dev/null +++ b/llvm/lib/Transforms/IPO/PosetRL/README.md @@ -0,0 +1,87 @@ +# POSET-RL +POSET-RL uses a reinforcement learning approach to find pass sequence (for optimal code size + execution time), as the search space of optimization sequences is too big to enumerate. For a compiler with `m` optimization passes, if the sequence length is fixed as `n`, then there can be potentially `mxn` combinations, allowing repetitions. + +This repo contains the source code and relevant information described in the [paper](https://ieeexplore.ieee.org/abstract/document/9804673) ([arXiv](https://arxiv.org/abs/2208.04238), [slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf)) +Please see [here](https://compilers.cse.iith.ac.in/projects/posetrl) for more details. + +> POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta ,ISPASS 2022 + +# Environment setup +Execute the following command for environment setup + +```bash +conda env create -f ${LLVM_DIR}/model/POSET-RL/posetrl_env.yml +#LLVM_DIR is the path to the llvm directory +``` +# Inference on Trained Models +There are three modes of communication via [MLCompilerBridge](https://compilers.cse.iith.ac.in/publications/mlcompilerbridge) +- [gRPC Model Runner](#Inference-flow-using-gRPC-Model-Runner) - gRPC based Model Runner +- [Pipe Model Runner](#Inference-flow-using-Pipe-Model-Runner) - Unix pipes based Model Runner +- [ONNX Model Runner](#Inference-flow-using-ONNX-Model-Runner) - In-process Model Runner (Suited for stand-alone inference/deployment) + +## Inference flow using gRPC Model Runner + +### Starting the gRPC server: +The script for running the server, `inference.py` is present in this directory ```{LLVM_DIR}/model/POSET_RL/src``` + + +```py +python inference.py --use_grpc --server_port= --model= +``` +- `port_no`: The port where server is hosted +- `model_path`: The path to the checkpoint directory for example: ${LLVM_DIR}/model/POSET-RL/checkpoint_dir + +### Invoking the POSET-RL Pass via gRPC: +```bash +# Open a new terminal +${BUILD_DIR}/bin/opt -poset-rl -ml-config-path=${LLVM_DIR}/config -server_address=127.0.0.1: -o +``` +- `BUILD_DIR`: Directory where the project is built +- `port_no`: Port number for communication + +## Inference flow using Pipe Model Runner + +### Starting the Pipe Server: +The script for running server, `inference.py` is present in this directory ```${LLVM_DIR}/model/POSET_RL/src``` + +```py +python inference.py --use_pipe --pipe_name= --data_format= --model= +``` +- `pipe_identifier`: Name of the pipe for communication. (A pipe with this name will be created during communication) +- `format`: The data format for communication b/w server client. Can either be `json` or `bytes`. +- `model`: The path to the checkpoint directory for example: ${LLVM_DIR}/model/POSET-RL/checkpoint_dir + +### Invoking the POSET-RL Pass via Pipes +```bash +${BUILD_DIR}/bin/opt -poset-rl -use-pipe -pipe-name= -data-format= ml-config-path=${LLVM_DIR}/config -o +``` +- `BUILD_DIR`: Directory where the project is built +- `pipe_identifier`: Name of the pipe for communication. (A pipe with this name will be created during communication) +- `format`: The data format for communication b/w server client. Can either be `json` or `bytes`. + +Note: Both `opt` and `inference.py` should be invoked with same `format`. + +## Inference flow using ONNXModelRunner + +### Invoking the POSET-RL pass with ONNX: +```bash +${BUILD_DIR}/bin/opt -poset-rl -use-onnx -ml-config-path=${LLVM_DIR}/config -o +``` +## Dumping Model files + +```bash +cd ml-llvm-project/model/POSET_RL/src + +python inference.py --test-dir= --use_grpc --server_address= --model= --export_onnx + +``` +- The `-export_onnx` option in `inference.py` is responsible for dumping the onnx model +- The Model files will be dumped in `model/onnx-model` +- Rename the Model files as `.onnx`. +- Move the Model files from their respective directory to the path `config/posetrl/onnx-checkpoint` +- Make sure to regenerate new ONNX models for new checkpoints. + + +### Model Training: [Refer to Model Training](../../../../../model/POSET-RL/README.md) + + diff --git a/llvm/lib/Transforms/IPO/PosetRL/inference/poset_rl_env.h b/llvm/lib/Transforms/IPO/PosetRL/inference/poset_rl_env.h new file mode 100644 index 000000000000..f155339b6a24 --- /dev/null +++ b/llvm/lib/Transforms/IPO/PosetRL/inference/poset_rl_env.h @@ -0,0 +1,71 @@ +#include "MLModelRunner/ONNXModelRunner/environment.h" +#include "MLModelRunner/ONNXModelRunner/utils.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include + +#define ActionMaskSize 34 +#define EmbeddingSize 300 + +using namespace llvm; + +typedef std::vector Embedding; +typedef std::vector ActionMask; + +using namespace MLBridge; + +class PosetRLEnv : public Environment { + unsigned Actioncount = 0; + Embedding CurrEmbedding; + ActionMask CurrActionMask; + Observation CurrObs; +public: + std::vector Sequence; +public: + PosetRLEnv(); + Observation& reset() override; + Observation& step(Action) override; + virtual Embedding getEmbeddings() = 0; + virtual void applySeq(Action) = 0; +}; + +inline Observation& PosetRLEnv::step(Action Action) { + Sequence.push_back(Action); + applySeq(Action); + + Actioncount += 1; + CurrActionMask[Action % ActionMaskSize] = 0; + CurrEmbedding = getEmbeddings(); + + if (Actioncount >= 34) + setDone(); + + CurrObs.clear(); + std::copy(CurrActionMask.begin(), CurrActionMask.end(), + std::back_inserter(CurrObs)); + std::copy(CurrEmbedding.begin(), CurrEmbedding.end(), + std::back_inserter(CurrObs)); + + return CurrObs; +} + +inline Observation& PosetRLEnv::reset() { + CurrEmbedding = getEmbeddings(); + CurrActionMask.assign(ActionMaskSize, 1); + + CurrObs.clear(); + std::copy(CurrActionMask.begin(), CurrActionMask.end(), + std::back_inserter(CurrObs)); + std::copy(CurrEmbedding.begin(), CurrEmbedding.end(), + std::back_inserter(CurrObs)); + + return CurrObs; +} + +inline PosetRLEnv::PosetRLEnv() { + CurrEmbedding.assign(EmbeddingSize, 0); + CurrActionMask.assign(ActionMaskSize, 1); + setNextAgent("agent"); +} diff --git a/llvm/lib/Transforms/IPO/PosetRL/posetRL.cpp b/llvm/lib/Transforms/IPO/PosetRL/posetRL.cpp new file mode 100644 index 000000000000..8212aa4e3244 --- /dev/null +++ b/llvm/lib/Transforms/IPO/PosetRL/posetRL.cpp @@ -0,0 +1,209 @@ +#include "llvm/Transforms/IPO/PosetRL/PosetRL.h" +#include "inference/poset_rl_env.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IR2Vec.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassSupport.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include +#include +#include "grpc/posetRL/posetRL.grpc.pb.h" +#include "grpc/posetRL/posetRL.pb.h" +#include +#include +#include +#include "MLModelRunner/MLModelRunner.h" +#include "MLModelRunner/ONNXModelRunner/ONNXModelRunner.h" +#include "MLModelRunner/PipeModelRunner.h" +#include "MLModelRunner/gRPCModelRunner.h" +#include "MLModelRunner/Utils/MLConfig.h" +#include "grpcpp/impl/codegen/status.h" + +using namespace llvm; +using namespace grpc; +using namespace posetRLgRPC; +// using namespace google::protobuf; + +static cl::opt training("training", cl::Hidden, + cl::desc("whether it is training or inference"), + cl::init(false)); + +static cl::opt + usePipe("use-pipe", cl::Hidden, + cl::desc("Use pipe based interation with python model"), + cl::init(false)); + +static cl::opt data_format( + "data-format", cl::Hidden, cl::init("protobuf"), + cl::desc("Data format to use for communication with python model")); +static cl::opt useONNX("use-onnx", cl::Hidden, + cl::desc("Use ONNX for inferencing model"), + cl::init(false)); + +static cl::opt server_address( + "server_address", cl::Hidden, + cl::desc("Starts the server in the given address, format :"), + cl::init("127.0.0.1:50051")); + +static cl::opt pipe_name("pipe-name", cl::Hidden, + cl::init("posetrl_pipe"), + cl::desc("Name for pipe file")); + +namespace { +struct PosetRL : public ModulePass, + public PosetRLEnv, + public posetRLgRPC::PosetRLService::Service { + static char ID; + PosetRL() : ModulePass(ID) {} + bool runOnModule(Module &M) override { + assert(MLConfig::mlconfig != "" && "ml-config-path required" ); + this->M = &M; + // Establish pipe communication + if (usePipe) { + // data_format can take values: protobuf, json, bytes + std::string basename = + "/tmp/" + pipe_name; + + BaseSerDes::Kind SerDesType; + if (data_format == "json") + SerDesType = BaseSerDes::Kind::Json; + else if (data_format == "protobuf") + SerDesType = BaseSerDes::Kind::Protobuf; + else if (data_format == "bytes") + SerDesType = BaseSerDes::Kind::Bitstream; + else { + errs() << "Invalid data format\n"; + exit(1); + } + + MLRunner = std::make_unique( + basename + ".out", basename + ".in", SerDesType, &M.getContext()); + posetRLgRPC::EmbeddingResponse response; + posetRLgRPC::ActionRequest request; + MLRunner->setRequest(&response); + MLRunner->setResponse(&request); + initPipeCommunication(); + } else { + if (training) { + MLRunner = std::make_unique>(server_address, this, &M.getContext()); + } else if (useONNX) { + Agent agent(MLConfig::mlconfig + + "/posetrl/posetrl_model.onnx"); + std::map agents; + agents["agent"] = &agent; + MLRunner = + std::make_unique(this, agents, &M.getContext()); + MLRunner->evaluate(); + errs() << "Sequence: "; + for (auto a : Sequence) + errs() << a << " "; + errs() << "\n"; + } else { + posetRLgRPC::EmbeddingResponse request; + posetRLgRPC::ActionRequest response; + MLRunner = std::make_unique>( + server_address, &request, &response, &M.getContext()); + MLRunner->setRequest(&request); + MLRunner->setResponse(&response); + initPipeCommunication(); + } + } + return true; + } + void initPipeCommunication() { + int passSequence = 0; + while (passSequence != -1) { + std::pair> p1("embedding", + getEmbeddings()); + MLRunner->populateFeatures(p1); + int Res = MLRunner->evaluate(); + processMLAdvice(Res); + passSequence = Res; + errs() << "Sequence : " << passSequence << "\t"; + } + } + + inline void processMLAdvice(int advice) { applySeq(advice); } + + Embedding getEmbeddings() override { + auto Ir2vec = + IR2Vec::Embeddings(*M, IR2Vec::IR2VecMode::FlowAware, + MLConfig::mlconfig + "/ir2vec/seedEmbeddingVocab-300-llvm10.txt"); + auto ProgVector = Ir2vec.getProgramVector(); + Embedding Vector(ProgVector.begin(), ProgVector.end()); + return Vector; + } + + void applySeq(Action Action) override { + PassManagerBuilder Builder; + Builder.OptLevel = 2; + Builder.SizeLevel = 2; + + legacy::FunctionPassManager FPM(M); + legacy::PassManager MPM; + Builder.customPopulateFunctionPassManager(FPM, 34, Action); + Builder.customPopulateModulePassManager(MPM, 34, Action); + // run the passes + MPM.run(*M); + for (auto &F : *M) { + FPM.run(F); + } + } + + grpc::Status + applyActionGetEmbeddings(grpc::ServerContext *context, + const ::posetRLgRPC::ActionRequest *request, + ::posetRLgRPC::EmbeddingResponse *response) override { + // errs() << "Action requested: " << request->action() << "\n"; + if (request->action() == -1) { + return grpc::Status::OK; + } + if (request->action() != 0) + processMLAdvice(request->action()); + + Embedding emb = getEmbeddings(); + for (unsigned long i = 0; i < emb.size(); i++) { + response->add_embedding(emb[i]); + } + return grpc::Status::OK; + } + + grpc::Status + queryCompiler(grpc::ServerContext *context, + const ::posetRLgRPC::ActionRequest *request, + ::posetRLgRPC::EmbeddingResponse *response) { + if (request->action() == -1) { + return grpc::Status::OK; + } else if (request->action() != 0) + processMLAdvice(request->action()); + + Embedding emb = getEmbeddings(); + for (unsigned long i = 0; i < emb.size(); i++) { + response->add_embedding(emb[i]); + } + return grpc::Status::OK; + } + +private: + Module *M; + std::unique_ptr MLRunner; +}; +} // namespace +char PosetRL::ID = 0; +INITIALIZE_PASS_BEGIN(PosetRL, "poset-rl", "poset sequence pass", false, false) +INITIALIZE_PASS_END(PosetRL, "poset-rl", "poset sequence pass", false, false) + +ModulePass *llvm::createPosetRLPass() { return new PosetRL(); } diff --git a/llvm/lib/Transforms/PosetRL/CMakeLists.txt b/llvm/lib/Transforms/PosetRL/CMakeLists.txt new file mode 100644 index 000000000000..8a051d08343a --- /dev/null +++ b/llvm/lib/Transforms/PosetRL/CMakeLists.txt @@ -0,0 +1,14 @@ +add_llvm_component_library(LLVMPosetRL posetRL.cpp + +LINK_COMPONENTS +IR2Vec + +ADDITIONAL_HEADER_DIRS +${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms +${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/PosetRL + +DEPENDS +intrinsics_gen +LLVMMLBridge +) +target_link_libraries(LLVMPosetRL PUBLIC LLVMMLBridge) diff --git a/llvm/lib/Transforms/demoGrpcPass/CMakeLists.txt b/llvm/lib/Transforms/demoGrpcPass/CMakeLists.txt index 10084ba343b8..3fb67ee47fac 100644 --- a/llvm/lib/Transforms/demoGrpcPass/CMakeLists.txt +++ b/llvm/lib/Transforms/demoGrpcPass/CMakeLists.txt @@ -18,8 +18,18 @@ add_llvm_library(HelloWorld MODULE BUILDTREE_ONLY DEPENDS intrinsics_gen + LLVMMLBridge + PLUGIN_TOOL opt ) +<<<<<<< HEAD +<<<<<<< HEAD # add_llvm_library(HelloWorld MODULE Hello.cpp) +======= +target_link_libraries(LLVMHelloGRPC PRIVATE LLVMMLBridge) +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp +======= +target_link_libraries(LLVMHelloGRPC PUBLIC LLVMMLBridge) +>>>>>>> 92e0943e9769... Fixed cmake linking issues diff --git a/llvm/tools/CMakeLists.txt b/llvm/tools/CMakeLists.txt index 2a7ca1bfa84d..8cd62085956c 100644 --- a/llvm/tools/CMakeLists.txt +++ b/llvm/tools/CMakeLists.txt @@ -55,7 +55,10 @@ add_llvm_external_project(bolt) add_llvm_implicit_projects() add_llvm_external_project(polly) +<<<<<<< HEAD add_llvm_external_project(MLCompilerBridge) +======= +>>>>>>> 92e0943e9769... Fixed cmake linking issues # Add subprojects specified using LLVM_EXTERNAL_PROJECTS foreach(p ${LLVM_EXTERNAL_PROJECTS}) diff --git a/llvm/tools/bugpoint/CMakeLists.txt b/llvm/tools/bugpoint/CMakeLists.txt index b0e71910c7cc..9485682025bd 100644 --- a/llvm/tools/bugpoint/CMakeLists.txt +++ b/llvm/tools/bugpoint/CMakeLists.txt @@ -21,6 +21,10 @@ set(LLVM_LINK_COMPONENTS TargetParser TransformUtils Vectorize +<<<<<<< HEAD +======= + IR2Vec +>>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO ) add_llvm_tool(bugpoint diff --git a/llvm/tools/llc/CMakeLists.txt b/llvm/tools/llc/CMakeLists.txt index 257d5b519f04..f1f37cccaa48 100644 --- a/llvm/tools/llc/CMakeLists.txt +++ b/llvm/tools/llc/CMakeLists.txt @@ -19,6 +19,12 @@ set(LLVM_LINK_COMPONENTS TargetParser TransformUtils Vectorize +<<<<<<< HEAD +======= + IPO + IR2Vec + CollectMachineIR +>>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO ) add_llvm_tool(llc diff --git a/llvm/tools/llvm-mca/CMakeLists.txt b/llvm/tools/llvm-mca/CMakeLists.txt index 878a05c51cfb..db23b161e9a6 100644 --- a/llvm/tools/llvm-mca/CMakeLists.txt +++ b/llvm/tools/llvm-mca/CMakeLists.txt @@ -10,7 +10,11 @@ set(LLVM_LINK_COMPONENTS MC MCParser Support +<<<<<<< HEAD TargetParser +======= + IR2Vec +>>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO ) add_llvm_tool(llvm-mca diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp index eb71cffba6dd..85f534b2cb8d 100644 --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -225,6 +225,7 @@ static cl::opt ShowBarriers( cl::desc("Print memory barrier information in the instruction info view"), cl::cat(ViewOptions), cl::init(false)); +<<<<<<< HEAD static cl::opt DisableCustomBehaviour( "disable-cb", cl::desc( @@ -236,6 +237,11 @@ static cl::opt DisableInstrumentManager( cl::desc("Disable instrumentation manager (use the default class which " "ignores instruments.)."), cl::cat(ViewOptions), cl::init(false)); +======= +static cl::opt + loopID("lc-lID", cl::Hidden, cl::Optional, + cl::desc("ID of the loop set by RDG/loop distribution pass"), cl::init(0)); +>>>>>>> d83580ac9157... Changed the llvm-mca.cpp to get the MCA throughput changed lines 220 and 466 namespace { @@ -563,7 +569,24 @@ int main(int argc, char **argv) { if (Region->empty()) continue; +<<<<<<< HEAD IB.clear(); +======= + // Don't print the header of this region if it is the default region, and + // it doesn't have an end location. + if (Region->startLoc().isValid() || Region->endLoc().isValid()) { + StringRef Desc = Region->getDescription(); + std::string DescToMatch = funcName + "-" + std::to_string(loopID); + // errs () << "funcName : " << funcName << " loop id : " << loopID << "\n"; + // errs () << Desc << " " << DescToMatch << "\n"; + if (loopID && !Desc.equals(DescToMatch)) + continue; + TOF->os() << "\n[" << RegionIdx++ << "] Code Region"; + if (!Desc.empty()) + TOF->os() << " - " << Desc; + TOF->os() << "\n\n"; + } +>>>>>>> d83580ac9157... Changed the llvm-mca.cpp to get the MCA throughput changed lines 220 and 466 // Lower the MCInst sequence into an mca::Instruction sequence. ArrayRef Insts = Region->getInstructions(); diff --git a/llvm/tools/llvm-opt-fuzzer/CMakeLists.txt b/llvm/tools/llvm-opt-fuzzer/CMakeLists.txt index b3c1c85681d2..b2107617702c 100644 --- a/llvm/tools/llvm-opt-fuzzer/CMakeLists.txt +++ b/llvm/tools/llvm-opt-fuzzer/CMakeLists.txt @@ -24,7 +24,13 @@ set(LLVM_LINK_COMPONENTS TargetParser TransformUtils Vectorize +<<<<<<< HEAD ) +======= + Passes + IR2Vec +) +>>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO add_llvm_fuzzer(llvm-opt-fuzzer llvm-opt-fuzzer.cpp diff --git a/llvm/tools/opt/CMakeLists.txt b/llvm/tools/opt/CMakeLists.txt index b23e70839f67..9aa35d71b876 100644 --- a/llvm/tools/opt/CMakeLists.txt +++ b/llvm/tools/opt/CMakeLists.txt @@ -29,6 +29,19 @@ set(LLVM_LINK_COMPONENTS TransformUtils Vectorize Passes +<<<<<<< HEAD +<<<<<<< HEAD +<<<<<<< HEAD +======= + HelloMLBridge +======= +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp + AddSizeAttr +======= +>>>>>>> 92e0943e9769... Fixed cmake linking issues + IR2Vec + CollectMachineIR +>>>>>>> fb9f173c59b7... Moved CodeSizeOpt to IPO ) add_llvm_tool(opt diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index a9248efa189f..a4aff0b82745 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -424,6 +424,11 @@ int main(int argc, char **argv) { initializeTransformUtils(Registry); initializeInstCombine(Registry); initializeTarget(Registry); +<<<<<<< HEAD +======= + initializePosetRLPass(Registry); + initializeAddSizeAttrPassPass(Registry); +>>>>>>> e4011a594dc3... Added MLConfig in Support/CommandLine.cpp // For codegen passes, only passes that do IR to IR transformation are // supported. initializeExpandLargeDivRemLegacyPassPass(Registry); diff --git a/mlir/lib/Transforms/HelloMLBridgePass.cpp b/mlir/lib/Transforms/HelloMLBridgePass.cpp new file mode 100644 index 000000000000..27da7916267e --- /dev/null +++ b/mlir/lib/Transforms/HelloMLBridgePass.cpp @@ -0,0 +1,491 @@ +#include "MLModelRunner/MLModelRunner.h" +#include "MLModelRunner/ONNXModelRunner/ONNXModelRunner.h" +#include "MLModelRunner/ONNXModelRunner/environment.h" +#include "MLModelRunner/ONNXModelRunner/utils.h" +#include "MLModelRunner/PipeModelRunner.h" +#include "MLModelRunner/TFModelRunner.h" +#include "MLModelRunner/Utils/MLConfig.h" +#include "MLModelRunner/gRPCModelRunner.h" +#include "grpc/helloMLBridge/helloMLBridge.grpc.pb.h" +#include "grpc/helloMLBridge/helloMLBridge.pb.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassRegistry.h" +#include "mlir/Transforms/Passes.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include +#include + +#include "tf_models/LinearModel1000.h" +#include "tf_models/LinearModel10000.h" +#include "tf_models/LinearModel10500.h" +#include "tf_models/LinearModel11000.h" +#include "tf_models/LinearModel11500.h" +#include "tf_models/LinearModel12000.h" +#include "tf_models/LinearModel12500.h" +#include "tf_models/LinearModel13000.h" +#include "tf_models/LinearModel13500.h" +#include "tf_models/LinearModel14000.h" +#include "tf_models/LinearModel14500.h" +#include "tf_models/LinearModel1500.h" +#include "tf_models/LinearModel15000.h" +#include "tf_models/LinearModel15500.h" +#include "tf_models/LinearModel16000.h" +#include "tf_models/LinearModel16500.h" +#include "tf_models/LinearModel17000.h" +#include "tf_models/LinearModel17500.h" +#include "tf_models/LinearModel18000.h" +#include "tf_models/LinearModel18500.h" +#include "tf_models/LinearModel19000.h" +#include "tf_models/LinearModel19500.h" +#include "tf_models/LinearModel2000.h" +#include "tf_models/LinearModel20000.h" +#include "tf_models/LinearModel20500.h" +#include "tf_models/LinearModel21000.h" +#include "tf_models/LinearModel21500.h" +#include "tf_models/LinearModel22000.h" +#include "tf_models/LinearModel22500.h" +#include "tf_models/LinearModel23000.h" +#include "tf_models/LinearModel23500.h" +#include "tf_models/LinearModel24000.h" +#include "tf_models/LinearModel24500.h" +#include "tf_models/LinearModel2500.h" +#include "tf_models/LinearModel25000.h" +#include "tf_models/LinearModel25500.h" +#include "tf_models/LinearModel26000.h" +#include "tf_models/LinearModel26500.h" +#include "tf_models/LinearModel27000.h" +#include "tf_models/LinearModel27500.h" +#include "tf_models/LinearModel28000.h" +#include "tf_models/LinearModel28500.h" +#include "tf_models/LinearModel29000.h" +#include "tf_models/LinearModel29500.h" +#include "tf_models/LinearModel3000.h" +#include "tf_models/LinearModel30000.h" +#include "tf_models/LinearModel30500.h" +#include "tf_models/LinearModel31000.h" +#include "tf_models/LinearModel31500.h" +#include "tf_models/LinearModel32000.h" +#include "tf_models/LinearModel32500.h" +#include "tf_models/LinearModel33000.h" +#include "tf_models/LinearModel33500.h" +#include "tf_models/LinearModel34000.h" +#include "tf_models/LinearModel34500.h" +#include "tf_models/LinearModel3500.h" +#include "tf_models/LinearModel35000.h" +#include "tf_models/LinearModel35500.h" +#include "tf_models/LinearModel36000.h" +#include "tf_models/LinearModel36500.h" +#include "tf_models/LinearModel37000.h" +#include "tf_models/LinearModel37500.h" +#include "tf_models/LinearModel38000.h" +#include "tf_models/LinearModel38500.h" +#include "tf_models/LinearModel39000.h" +#include "tf_models/LinearModel39500.h" +#include "tf_models/LinearModel4000.h" +#include "tf_models/LinearModel40000.h" +#include "tf_models/LinearModel40500.h" +#include "tf_models/LinearModel41000.h" +#include "tf_models/LinearModel41500.h" +#include "tf_models/LinearModel42000.h" +#include "tf_models/LinearModel42500.h" +#include "tf_models/LinearModel43000.h" +#include "tf_models/LinearModel43500.h" +#include "tf_models/LinearModel44000.h" +#include "tf_models/LinearModel44500.h" +#include "tf_models/LinearModel4500.h" +#include "tf_models/LinearModel45000.h" +#include "tf_models/LinearModel45500.h" +#include "tf_models/LinearModel46000.h" +#include "tf_models/LinearModel46500.h" +#include "tf_models/LinearModel47000.h" +#include "tf_models/LinearModel47500.h" +#include "tf_models/LinearModel48000.h" +#include "tf_models/LinearModel48500.h" +#include "tf_models/LinearModel49000.h" +#include "tf_models/LinearModel49500.h" +#include "tf_models/LinearModel500.h" +#include "tf_models/LinearModel5000.h" +#include "tf_models/LinearModel50000.h" +#include "tf_models/LinearModel5500.h" +#include "tf_models/LinearModel6000.h" +#include "tf_models/LinearModel6500.h" +#include "tf_models/LinearModel7000.h" +#include "tf_models/LinearModel7500.h" +#include "tf_models/LinearModel8000.h" +#include "tf_models/LinearModel8500.h" +#include "tf_models/LinearModel9000.h" +#include "tf_models/LinearModel9500.h" + +#define MODELS(M) \ + M(500) \ + M(1000) \ + M(1500) \ + M(2000) \ + M(2500) \ + M(3000) \ + M(3500) \ + M(4000) \ + M(4500) \ + M(5000) \ + M(5500) \ + M(6000) \ + M(6500) \ + M(7000) \ + M(7500) \ + M(8000) \ + M(8500) \ + M(9000) \ + M(9500) \ + M(10000) \ + M(10500) \ + M(11000) \ + M(11500) \ + M(12000) \ + M(12500) \ + M(13000) \ + M(13500) \ + M(14000) \ + M(14500) \ + M(15000) \ + M(15500) \ + M(16000) \ + M(16500) \ + M(17000) \ + M(17500) \ + M(18000) \ + M(18500) \ + M(19000) \ + M(19500) \ + M(20000) \ + M(20500) \ + M(21000) \ + M(21500) \ + M(22000) \ + M(22500) \ + M(23000) \ + M(23500) \ + M(24000) \ + M(24500) \ + M(25000) \ + M(25500) \ + M(26000) \ + M(26500) \ + M(27000) \ + M(27500) \ + M(28000) \ + M(28500) \ + M(29000) \ + M(29500) \ + M(30000) \ + M(30500) \ + M(31000) \ + M(31500) \ + M(32000) \ + M(32500) \ + M(33000) \ + M(33500) \ + M(34000) \ + M(34500) \ + M(35000) \ + M(35500) \ + M(36000) \ + M(36500) \ + M(37000) \ + M(37500) \ + M(38000) \ + M(38500) \ + M(39000) \ + M(39500) \ + M(40000) \ + M(40500) \ + M(41000) \ + M(41500) \ + M(42000) \ + M(42500) \ + M(43000) \ + M(43500) \ + M(44000) \ + M(44500) \ + M(45000) \ + M(45500) \ + M(46000) \ + M(46500) \ + M(47000) \ + M(47500) \ + M(48000) \ + M(48500) \ + M(49000) \ + M(49500) \ + M(50000) + +static llvm::cl::opt + training("mlir-hello-training", llvm::cl::Hidden, + llvm::cl::desc("whether it is training or inference"), + llvm::cl::init(false)); +static llvm::cl::opt server_address( + "mlir-hello-server-address", llvm::cl::Hidden, + llvm::cl::desc( + "Starts the server in the given address, format :"), + llvm::cl::init("localhost:5050")); + +static llvm::cl::opt data_format( + "mlir-hello-data-format", llvm::cl::Hidden, llvm::cl::init("json"), + llvm::cl::desc("Data format to use for communication with python model")); + +static llvm::cl::opt + useONNX("mlir-hello-use-onnx", llvm::cl::Hidden, + llvm::cl::desc("Use ONNX for inferencing model"), + llvm::cl::init(false)); + +static llvm::cl::opt + usePipe("mlir-hello-use-pipe", llvm::cl::Hidden, + llvm::cl::desc("Use pipe based interation with python model"), + llvm::cl::init(false)); + +static llvm::cl::opt + pipe_name("mlir-hello-pipe-name", llvm::cl::Hidden, llvm::cl::init("dummy"), + llvm::cl::desc("Name for pipe file")); +static llvm::cl::opt n("mlir-hello-data-size", llvm::cl::Hidden, + llvm::cl::init(1000), + llvm::cl::desc("Size of input vector")); + +static llvm::cl::opt + useTF("mlir-hello-use-tf", llvm::cl::Hidden, + llvm::cl::desc("Use TF AOT for inferencing model"), + llvm::cl::init(false)); + +using namespace mlir; +using namespace grpc; +using namespace MLBridge; +using namespace helloMLBridgegRPC; + +namespace { + +std::random_device rd; +std::mt19937 gen(5); +std::uniform_real_distribution dis(0.0, 1.0); + +class HelloMLBridgeEnv : public Environment { + Observation CurrObs; + +public: + HelloMLBridgeEnv() { setNextAgent("agent"); }; + Observation &reset() override; + Observation &step(Action) override; + +protected: + std::vector FeatureVector; +}; + +Observation &HelloMLBridgeEnv::step(Action Action) { + CurrObs.clear(); + std::copy(FeatureVector.begin(), FeatureVector.end(), + std::back_inserter(CurrObs)); + setDone(); + return CurrObs; +} + +Observation &HelloMLBridgeEnv::reset() { + std::copy(FeatureVector.begin(), FeatureVector.end(), + std::back_inserter(CurrObs)); + return CurrObs; +} + +struct MLIRHelloMLBridge : public OperationPass, + public HelloMLBridgeEnv { +public: + MLIRHelloMLBridge() {} + + struct HelloMLIRTraining + : public helloMLBridgegRPC::HelloMLBridgeService::Service { + private: + std::vector FeatureVector; + + public: + grpc::Status + getTensor(grpc::ServerContext *context, + const ::helloMLBridgegRPC::ActionRequest *request, + ::helloMLBridgegRPC::TensorResponse *response) override { + if (request->action() == -1) { + return grpc::Status::OK; + } + if (request->action()) { + populateFeatureVector(FeatureVector); + for (int I = 0, E = FeatureVector.size(); I < E; I++) { + response->add_tensor(FeatureVector[I]); + } + } + return grpc::Status::OK; + } + }; + + void setTFModelRunner(int n) { + switch (n) { +#define M(x) \ + case x: \ + MLRunner = new TFModelRunner("output"); \ + break; + MODELS(M) +#undef M + } + // MLRunner = new TFModelRunner("output"); + } + + void TFinitCommunication() { + auto StartTime = std::chrono::high_resolution_clock::now(); + + std::pair> p1("x", FeatureVector); + + setTFModelRunner(n); + MLRunner->populateFeatures(p1); + double Out = MLRunner->evaluate(); + + auto EndTime = std::chrono::high_resolution_clock::now(); + + auto Duration = std::chrono::duration_cast( + EndTime - StartTime); + std::ofstream outputFile; + outputFile.open("tf-inference.csv", std::ios_base::app); + outputFile << n << "," << Duration.count() << "\n"; + outputFile.close(); + } + + void runOnOperation() override { + assert(MLConfig::mlconfig != "" && "ml-config-path required"); + // Get the current operation being operated on. + Operation *op = getOperation(); + // llvm::errs() << "Hello World pass\n"; + // bool use_pipe = false; + // bool useONNX = false; + if (useTF) { + populateFeatureVector(FeatureVector); + TFinitCommunication(); + return; + } + if (usePipe) { + populateFeatureVector(FeatureVector); + initCommunication(); + } else { + if (training) { + HelloMLIRTraining *gRPCTrainer = new HelloMLIRTraining(); + MLRunner = new gRPCModelRunner< + helloMLBridgegRPC::HelloMLBridgeService::Service, + helloMLBridgegRPC::HelloMLBridgeService::Stub, + helloMLBridgegRPC::TensorResponse, + helloMLBridgegRPC::ActionRequest>(server_address, gRPCTrainer); + } else if (useONNX) { + std::ofstream outputFile; + outputFile.open("onnx-inference.csv", std::ios::app); + Agent *agent = new Agent(MLConfig::mlconfig + + "/hellopass/onnx_test_dir/dummy-torch-model-" + + std::to_string(n) + ".onnx"); + std::map agents; + agents["agent"] = agent; + auto StartTime = std::chrono::high_resolution_clock::now(); + Env = new HelloMLBridgeEnv(); + MLRunner = new ONNXModelRunner(this, agents, nullptr); + populateFeatureVector(FeatureVector); + int Out = MLRunner->evaluate(); + auto EndTime = std::chrono::high_resolution_clock::now(); + auto Duration = std::chrono::duration_cast( + EndTime - StartTime); + outputFile << n << "," << Duration.count() << "\n"; + outputFile.close(); + } else { + // llvm::errs() << "Using 2nd gRPC flow...\n"; + std::ofstream outputFile; + outputFile.open("grpc-inference.csv", std::ios::app); + auto StartTime = std::chrono::high_resolution_clock::now(); + + helloMLBridgegRPC::TensorResponse request; + helloMLBridgegRPC::ActionRequest response; + MLRunner = + new gRPCModelRunner( + server_address, &request, &response); + + MLRunner->setRequest(&request); + MLRunner->setResponse(&response); + populateFeatureVector(FeatureVector); + std::pair> p1("tensor", FeatureVector); + MLRunner->populateFeatures(p1); + int Out = MLRunner->evaluate(); + + auto EndTime = std::chrono::high_resolution_clock::now(); + + auto Duration = std::chrono::duration_cast( + EndTime - StartTime); + // outs() << n << " " << Duration.count() << "\n"; + outputFile << n << "," << Duration.count() << "\n"; + outputFile.close(); + } + } + } + +private: + BaseSerDes::Kind SerDesType; + HelloMLBridgeEnv *Env; + std::string basename = "/tmp/" + pipe_name; + MLModelRunner *MLRunner; + static void populateFeatureVector(std::vector &FeatureVector); + void initCommunication(); + void setModelRunner(int n); +}; + +void MLIRHelloMLBridge::initCommunication() { + if (data_format == "bytes") { + SerDesType = BaseSerDes::Kind::Bitstream; + } else if (data_format == "json") { + SerDesType = BaseSerDes::Kind::Json; + } + basename = "/tmp/" + pipe_name; + auto StartTime = std::chrono::high_resolution_clock::now(); + MLRunner = + new PipeModelRunner(basename + ".out", basename + ".in", SerDesType); + + std::pair> p1("tensor", FeatureVector); + MLRunner->populateFeatures(p1); + int Out = MLRunner->evaluate(); + + // llvm::outs() << "Returned value: " << Out << "\n"; + + auto EndTime = std::chrono::high_resolution_clock::now(); + + auto Duration = std::chrono::duration_cast( + EndTime - StartTime); + std::ofstream outputFile; + outputFile.open("pipe-" + data_format + "-inference.csv", std::ios::app); + outputFile << n << "," << Duration.count() << "\n"; + outputFile.close(); +} + +void MLIRHelloMLBridge::populateFeatureVector( + std::vector &FeatureVector) { + FeatureVector.resize(n); + for (int i = 0; i < n; i++) { + FeatureVector[i] = dis(gen); + } +} + +void MLIRHelloMLBridge::setModelRunner(int n) { MLRunner = nullptr; } + +} // end anonymous namespace + +std::unique_ptr mlir::createMLIRHelloMLBridgePass() { + return std::make_unique(); +} + +static PassRegistration pass("mlir-hello-mlbridge", + "MLIR Hello MLBridge"); diff --git a/mlopt.yml b/mlopt.yml new file mode 100644 index 000000000000..418e1f656a9d --- /dev/null +++ b/mlopt.yml @@ -0,0 +1,480 @@ +name: mlopt +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - abseil-cpp=20211102.0=hd4dd3e8_0 + - aiobotocore=2.5.0=py310h06a4308_0 + - aiofiles=22.1.0=py310h06a4308_0 + - aiohttp=3.8.3=py310h5eee18b_0 + - aioitertools=0.7.1=pyhd3eb1b0_0 + - aiosignal=1.2.0=pyhd3eb1b0_0 + - aiosqlite=0.18.0=py310h06a4308_0 + - alabaster=0.7.12=pyhd3eb1b0_0 + - anaconda=2023.07=py310_1 + - anyio=3.5.0=py310h06a4308_0 + - appdirs=1.4.4=pyhd3eb1b0_0 + - argon2-cffi=21.3.0=pyhd3eb1b0_0 + - argon2-cffi-bindings=21.2.0=py310h7f8727e_0 + - arrow=1.2.3=py310h06a4308_1 + - arrow-cpp=11.0.0=py310h7516544_0 + - astroid=2.14.2=py310h06a4308_0 + - astropy=5.1=py310ha9d4c09_0 + - asttokens=2.0.5=pyhd3eb1b0_0 + - async-timeout=4.0.2=py310h06a4308_0 + - atomicwrites=1.4.0=py_0 + - attrs=22.1.0=py310h06a4308_0 + - automat=20.2.0=py_0 + - autopep8=1.6.0=pyhd3eb1b0_1 + - aws-c-common=0.4.57=he6710b0_1 + - aws-c-event-stream=0.1.6=h2531618_5 + - aws-checksums=0.1.9=he6710b0_0 + - aws-sdk-cpp=1.8.185=hce553d0_0 + - babel=2.11.0=py310h06a4308_0 + - backcall=0.2.0=pyhd3eb1b0_0 + - bcrypt=3.2.0=py310h5eee18b_1 + - beautifulsoup4=4.12.2=py310h06a4308_0 + - binaryornot=0.4.4=pyhd3eb1b0_1 + - black=23.3.0=py310h06a4308_0 + - blas=1.0=mkl + - bleach=4.1.0=pyhd3eb1b0_0 + - blosc=1.21.3=h6a678d5_0 + - bokeh=3.2.1=py310h2f386ee_0 + - boost-cpp=1.73.0=h7f8727e_12 + - botocore=1.29.76=py310h06a4308_0 + - bottleneck=1.3.5=py310ha9d4c09_0 + - brotli=1.0.9=h5eee18b_7 + - brotli-bin=1.0.9=h5eee18b_7 + - brotlipy=0.7.0=py310h7f8727e_1002 + - brunsli=0.1=h2531618_0 + - bzip2=1.0.8=h7b6447c_0 + - c-ares=1.19.0=h5eee18b_0 + - c-blosc2=2.8.0=h6a678d5_0 + - ca-certificates=2023.05.30=h06a4308_0 + - certifi=2023.7.22=py310h06a4308_0 + - cffi=1.15.1=py310h5eee18b_3 + - cfitsio=3.470=h5893167_7 + - chardet=4.0.0=py310h06a4308_1003 + - charls=2.2.0=h2531618_0 + - charset-normalizer=2.0.4=pyhd3eb1b0_0 + - click=8.0.4=py310h06a4308_0 + - cloudpickle=2.2.1=py310h06a4308_0 + - colorama=0.4.6=py310h06a4308_0 + - colorcet=3.0.1=py310h06a4308_0 + - comm=0.1.2=py310h06a4308_0 + - constantly=15.1.0=py310h06a4308_0 + - contourpy=1.0.5=py310hdb19cb5_0 + - cookiecutter=1.7.3=pyhd3eb1b0_0 + - cryptography=41.0.2=py310h774aba0_0 + - cssselect=1.1.0=pyhd3eb1b0_0 + - curl=8.1.1=h37d81fd_2 + - cycler=0.11.0=pyhd3eb1b0_0 + - cytoolz=0.12.0=py310h5eee18b_0 + - daal4py=2023.1.1=py310h3c18c91_0 + - dal=2023.1.1=hdb19cb5_48679 + - dask=2023.6.0=py310h06a4308_0 + - dask-core=2023.6.0=py310h06a4308_0 + - datasets=2.12.0=py310h06a4308_0 + - datashader=0.15.1=py310h06a4308_0 + - datashape=0.5.4=py310h06a4308_1 + - dbus=1.13.18=hb2f20db_0 + - debugpy=1.6.7=py310h6a678d5_0 + - decorator=5.1.1=pyhd3eb1b0_0 + - defusedxml=0.7.1=pyhd3eb1b0_0 + - diff-match-patch=20200713=pyhd3eb1b0_0 + - dill=0.3.6=py310h06a4308_0 + - distributed=2023.6.0=py310h06a4308_0 + - docstring-to-markdown=0.11=py310h06a4308_0 + - docutils=0.18.1=py310h06a4308_3 + - entrypoints=0.4=py310h06a4308_0 + - et_xmlfile=1.1.0=py310h06a4308_0 + - exceptiongroup=1.0.4=py310h06a4308_0 + - executing=0.8.3=pyhd3eb1b0_0 + - expat=2.4.9=h6a678d5_0 + - filelock=3.9.0=py310h06a4308_0 + - flake8=6.0.0=py310h06a4308_0 + - flask=2.2.2=py310h06a4308_0 + - fontconfig=2.14.1=h52c9d5c_1 + - fonttools=4.25.0=pyhd3eb1b0_0 + - freetype=2.12.1=h4a9f257_0 + - frozenlist=1.3.3=py310h5eee18b_0 + - fsspec=2023.4.0=py310h06a4308_0 + - gensim=4.3.0=py310h1128e8f_0 + - gflags=2.2.2=he6710b0_0 + - giflib=5.2.1=h5eee18b_3 + - glib=2.69.1=he621ea3_2 + - glog=0.5.0=h2531618_0 + - gmp=6.2.1=h295c915_3 + - gmpy2=2.1.2=py310heeb90bb_0 + - greenlet=2.0.1=py310h6a678d5_0 + - grpc-cpp=1.46.1=h33aed49_1 + - gst-plugins-base=1.14.1=h6a678d5_1 + - gstreamer=1.14.1=h5eee18b_1 + - h5py=3.7.0=py310he06866b_0 + - hdf5=1.10.6=h3ffc7dd_1 + - heapdict=1.0.1=pyhd3eb1b0_0 + - holoviews=1.17.0=py310h06a4308_0 + - huggingface_hub=0.15.1=py310h06a4308_0 + - hvplot=0.8.4=py310h06a4308_0 + - hyperlink=21.0.0=pyhd3eb1b0_0 + - icu=58.2=he6710b0_3 + - idna=3.4=py310h06a4308_0 + - imagecodecs=2021.8.26=py310h46e8fbd_2 + - imageio=2.31.1=py310h06a4308_0 + - imagesize=1.4.1=py310h06a4308_0 + - imbalanced-learn=0.10.1=py310h06a4308_1 + - importlib-metadata=6.0.0=py310h06a4308_0 + - importlib_metadata=6.0.0=hd3eb1b0_0 + - incremental=21.3.0=pyhd3eb1b0_0 + - inflection=0.5.1=py310h06a4308_0 + - iniconfig=1.1.1=pyhd3eb1b0_0 + - intake=0.6.8=py310h06a4308_0 + - intel-openmp=2023.1.0=hdb19cb5_46305 + - intervaltree=3.1.0=pyhd3eb1b0_0 + - ipykernel=6.19.2=py310h2f386ee_0 + - ipython=8.12.0=py310h06a4308_0 + - ipython_genutils=0.2.0=pyhd3eb1b0_1 + - ipywidgets=8.0.4=py310h06a4308_0 + - isort=5.9.3=pyhd3eb1b0_0 + - itemadapter=0.3.0=pyhd3eb1b0_0 + - itemloaders=1.0.4=pyhd3eb1b0_1 + - itsdangerous=2.0.1=pyhd3eb1b0_0 + - jaraco.classes=3.2.1=pyhd3eb1b0_0 + - jedi=0.18.1=py310h06a4308_1 + - jeepney=0.7.1=pyhd3eb1b0_0 + - jellyfish=0.9.0=py310h7f8727e_0 + - jinja2=3.1.2=py310h06a4308_0 + - jinja2-time=0.2.0=pyhd3eb1b0_3 + - jmespath=0.10.0=pyhd3eb1b0_0 + - joblib=1.2.0=py310h06a4308_0 + - jpeg=9e=h5eee18b_1 + - jq=1.6=h27cfd23_1000 + - json5=0.9.6=pyhd3eb1b0_0 + - jsonschema=4.17.3=py310h06a4308_0 + - jupyter=1.0.0=py310h06a4308_8 + - jupyter_client=7.4.9=py310h06a4308_0 + - jupyter_console=6.6.3=py310h06a4308_0 + - jupyter_core=5.3.0=py310h06a4308_0 + - jupyter_events=0.6.3=py310h06a4308_0 + - jupyter_server=1.23.4=py310h06a4308_0 + - jupyter_server_fileid=0.9.0=py310h06a4308_0 + - jupyter_server_ydoc=0.8.0=py310h06a4308_1 + - jupyter_ydoc=0.2.4=py310h06a4308_0 + - jupyterlab=3.6.3=py310h06a4308_0 + - jupyterlab_pygments=0.1.2=py_0 + - jupyterlab_server=2.22.0=py310h06a4308_0 + - jupyterlab_widgets=3.0.5=py310h06a4308_0 + - jxrlib=1.1=h7b6447c_2 + - keyring=23.13.1=py310h06a4308_0 + - kiwisolver=1.4.4=py310h6a678d5_0 + - krb5=1.20.1=h568e23c_1 + - lazy-object-proxy=1.6.0=py310h7f8727e_0 + - lazy_loader=0.2=py310h06a4308_0 + - lcms2=2.12=h3be6417_0 + - ld_impl_linux-64=2.38=h1181459_1 + - lerc=3.0=h295c915_0 + - libaec=1.0.4=he6710b0_1 + - libboost=1.73.0=h28710b8_12 + - libbrotlicommon=1.0.9=h5eee18b_7 + - libbrotlidec=1.0.9=h5eee18b_7 + - libbrotlienc=1.0.9=h5eee18b_7 + - libcurl=8.1.1=h91b91d3_2 + - libdeflate=1.17=h5eee18b_0 + - libedit=3.1.20221030=h5eee18b_0 + - libev=4.33=h7f8727e_1 + - libevent=2.1.12=h8f2d780_0 + - libffi=3.4.4=h6a678d5_0 + - libgcc-ng=11.2.0=h1234567_1 + - libgfortran-ng=11.2.0=h00389a5_1 + - libgfortran5=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libllvm10=10.0.1=hbcb73fb_5 + - libllvm14=14.0.6=hdb19cb5_3 + - libnghttp2=1.52.0=ha637b67_1 + - libpng=1.6.39=h5eee18b_0 + - libpq=12.15=h37d81fd_1 + - libprotobuf=3.20.3=he621ea3_0 + - libsodium=1.0.18=h7b6447c_0 + - libspatialindex=1.9.3=h2531618_0 + - libssh2=1.10.0=h37d81fd_2 + - libstdcxx-ng=11.2.0=h1234567_1 + - libthrift=0.15.0=h0d84882_2 + - libtiff=4.5.0=h6a678d5_2 + - libuuid=1.41.5=h5eee18b_0 + - libwebp=1.2.4=h11a3e52_1 + - libwebp-base=1.2.4=h5eee18b_1 + - libxcb=1.15=h7f8727e_0 + - libxkbcommon=1.0.1=hfa300c1_0 + - libxml2=2.9.14=h74e7548_0 + - libxslt=1.1.35=h4e12654_0 + - libzopfli=1.0.3=he6710b0_0 + - linkify-it-py=2.0.0=py310h06a4308_0 + - llvmlite=0.40.0=py310he621ea3_0 + - locket=1.0.0=py310h06a4308_0 + - lxml=4.9.1=py310h1edc446_0 + - lz4=4.3.2=py310h5eee18b_0 + - lz4-c=1.9.4=h6a678d5_0 + - lzo=2.10=h7b6447c_2 + - markdown=3.4.1=py310h06a4308_0 + - markdown-it-py=2.2.0=py310h06a4308_1 + - markupsafe=2.1.1=py310h7f8727e_0 + - matplotlib=3.7.1=py310h06a4308_1 + - matplotlib-base=3.7.1=py310h1128e8f_1 + - matplotlib-inline=0.1.6=py310h06a4308_0 + - mccabe=0.7.0=pyhd3eb1b0_0 + - mdit-py-plugins=0.3.0=py310h06a4308_0 + - mdurl=0.1.0=py310h06a4308_0 + - mistune=0.8.4=py310h7f8727e_1000 + - mkl=2023.1.0=h6d00ec8_46342 + - mkl-service=2.4.0=py310h5eee18b_1 + - mkl_fft=1.3.6=py310h1128e8f_1 + - mkl_random=1.2.2=py310h1128e8f_1 + - more-itertools=8.12.0=pyhd3eb1b0_0 + - mpc=1.1.0=h10f8cd9_1 + - mpfr=4.0.2=hb69a4c5_1 + - mpi=1.0=mpich + - mpich=4.1.1=hbae89fd_0 + - mpmath=1.3.0=py310h06a4308_0 + - msgpack-python=1.0.3=py310hd09550d_0 + - multidict=6.0.2=py310h5eee18b_0 + - multipledispatch=0.6.0=py310h06a4308_0 + - multiprocess=0.70.14=py310h06a4308_0 + - munkres=1.1.4=py_0 + - mypy_extensions=0.4.3=py310h06a4308_0 + - nbclassic=0.5.5=py310h06a4308_0 + - nbclient=0.5.13=py310h06a4308_0 + - nbconvert=6.5.4=py310h06a4308_0 + - nbformat=5.7.0=py310h06a4308_0 + - ncurses=6.4=h6a678d5_0 + - nest-asyncio=1.5.6=py310h06a4308_0 + - networkx=3.1=py310h06a4308_0 + - ninja=1.10.2=h06a4308_5 + - ninja-base=1.10.2=hd09550d_5 + - nltk=3.8.1=py310h06a4308_0 + - notebook=6.5.4=py310h06a4308_1 + - notebook-shim=0.2.2=py310h06a4308_0 + - nspr=4.35=h6a678d5_0 + - nss=3.89.1=h6a678d5_0 + - numba=0.57.0=py310h1128e8f_0 + - numexpr=2.8.4=py310h85018f9_1 + - numpy=1.24.3=py310h5f9d8c6_1 + - numpy-base=1.24.3=py310hb5e798b_1 + - numpydoc=1.5.0=py310h06a4308_0 + - oniguruma=6.9.7.1=h27cfd23_0 + - openjpeg=2.4.0=h3ad879b_0 + - openpyxl=3.0.10=py310h5eee18b_0 + - openssl=1.1.1u=h7f8727e_0 + - orc=1.7.4=hb3bc3d3_1 + - packaging=23.0=py310h06a4308_0 + - pandas=1.5.3=py310h1128e8f_0 + - pandocfilters=1.5.0=pyhd3eb1b0_0 + - panel=1.2.1=py310h06a4308_0 + - param=1.13.0=py310h06a4308_0 + - parsel=1.6.0=py310h06a4308_0 + - parso=0.8.3=pyhd3eb1b0_0 + - partd=1.2.0=pyhd3eb1b0_1 + - pathspec=0.10.3=py310h06a4308_0 + - patsy=0.5.3=py310h06a4308_0 + - pcre=8.45=h295c915_0 + - pep8=1.7.1=py310h06a4308_1 + - pexpect=4.8.0=pyhd3eb1b0_3 + - pickleshare=0.7.5=pyhd3eb1b0_1003 + - pillow=9.4.0=py310h6a678d5_0 + - pip=23.2.1=py310h06a4308_0 + - platformdirs=2.5.2=py310h06a4308_0 + - plotly=5.9.0=py310h06a4308_0 + - pluggy=1.0.0=py310h06a4308_1 + - ply=3.11=py310h06a4308_0 + - pooch=1.4.0=pyhd3eb1b0_0 + - poyo=0.5.0=pyhd3eb1b0_0 + - prometheus_client=0.14.1=py310h06a4308_0 + - prompt-toolkit=3.0.36=py310h06a4308_0 + - prompt_toolkit=3.0.36=hd3eb1b0_0 + - protego=0.1.16=py_0 + - psutil=5.9.0=py310h5eee18b_0 + - ptyprocess=0.7.0=pyhd3eb1b0_2 + - pure_eval=0.2.2=pyhd3eb1b0_0 + - py-cpuinfo=8.0.0=pyhd3eb1b0_1 + - pyarrow=11.0.0=py310h468efa6_0 + - pyasn1=0.4.8=pyhd3eb1b0_0 + - pyasn1-modules=0.2.8=py_0 + - pycodestyle=2.10.0=py310h06a4308_0 + - pycparser=2.21=pyhd3eb1b0_0 + - pyct=0.5.0=py310h06a4308_0 + - pycurl=7.45.2=py310h37d81fd_0 + - pydispatcher=2.0.5=py310h06a4308_2 + - pydocstyle=6.3.0=py310h06a4308_0 + - pyerfa=2.0.0=py310h7f8727e_0 + - pyflakes=3.0.1=py310h06a4308_0 + - pygments=2.15.1=py310h06a4308_1 + - pylint=2.16.2=py310h06a4308_0 + - pylint-venv=2.3.0=py310h06a4308_0 + - pyls-spyder=0.4.0=pyhd3eb1b0_0 + - pyodbc=4.0.34=py310h6a678d5_0 + - pyopenssl=23.2.0=py310h06a4308_0 + - pyparsing=3.0.9=py310h06a4308_0 + - pyqt=5.15.7=py310h6a678d5_1 + - pyqtwebengine=5.15.7=py310h6a678d5_1 + - pyrsistent=0.18.0=py310h7f8727e_0 + - pysocks=1.7.1=py310h06a4308_0 + - pytables=3.8.0=py310h43249b6_2 + - pytest=7.4.0=py310h06a4308_0 + - python=3.10.12=h7a1cb2a_0 + - python-dateutil=2.8.2=pyhd3eb1b0_0 + - python-fastjsonschema=2.16.2=py310h06a4308_0 + - python-json-logger=2.0.7=py310h06a4308_0 + - python-lmdb=1.4.1=py310h6a678d5_0 + - python-lsp-black=1.2.1=py310h06a4308_0 + - python-lsp-jsonrpc=1.0.0=pyhd3eb1b0_0 + - python-lsp-server=1.7.2=py310h06a4308_0 + - python-slugify=5.0.2=pyhd3eb1b0_0 + - python-snappy=0.6.1=py310h6a678d5_0 + - python-xxhash=2.0.2=py310h5eee18b_1 + - pytoolconfig=1.2.5=py310h06a4308_1 + - pytorch=2.0.1=cpu_py310hdc00b08_0 + - pytz=2022.7=py310h06a4308_0 + - pyviz_comms=2.3.0=py310h06a4308_0 + - pywavelets=1.4.1=py310h5eee18b_0 + - pyxdg=0.27=pyhd3eb1b0_0 + - pyyaml=6.0=py310h5eee18b_1 + - pyzmq=23.2.0=py310h6a678d5_0 + - qdarkstyle=3.0.2=pyhd3eb1b0_0 + - qstylizer=0.2.2=py310h06a4308_0 + - qt-main=5.15.2=h327a75a_7 + - qt-webengine=5.15.9=hd2b0992_4 + - qtawesome=1.2.2=py310h06a4308_0 + - qtconsole=5.4.2=py310h06a4308_0 + - qtpy=2.2.0=py310h06a4308_0 + - qtwebkit=5.212=h4eab89a_4 + - queuelib=1.5.0=py310h06a4308_0 + - re2=2022.04.01=h295c915_0 + - readline=8.2=h5eee18b_0 + - regex=2022.7.9=py310h5eee18b_0 + - requests=2.31.0=py310h06a4308_0 + - requests-file=1.5.1=pyhd3eb1b0_0 + - responses=0.13.3=pyhd3eb1b0_0 + - rfc3339-validator=0.1.4=py310h06a4308_0 + - rfc3986-validator=0.1.1=py310h06a4308_0 + - rope=1.7.0=py310h06a4308_0 + - rtree=1.0.1=py310h06a4308_0 + - s3fs=2023.4.0=py310h06a4308_0 + - sacremoses=0.0.43=pyhd3eb1b0_0 + - scikit-image=0.20.0=py310h6a678d5_0 + - scikit-learn=1.3.0=py310h1128e8f_0 + - scikit-learn-intelex=2023.1.1=py310h06a4308_0 + - scipy=1.10.1=py310h5f9d8c6_1 + - scrapy=2.8.0=py310h06a4308_0 + - seaborn=0.12.2=py310h06a4308_0 + - secretstorage=3.3.1=py310h06a4308_1 + - send2trash=1.8.0=pyhd3eb1b0_1 + - service_identity=18.1.0=pyhd3eb1b0_1 + - setuptools=68.0.0=py310h06a4308_0 + - sip=6.6.2=py310h6a678d5_0 + - six=1.16.0=pyhd3eb1b0_1 + - smart_open=5.2.1=py310h06a4308_0 + - snappy=1.1.9=h295c915_0 + - sniffio=1.2.0=py310h06a4308_1 + - snowballstemmer=2.2.0=pyhd3eb1b0_0 + - sortedcontainers=2.4.0=pyhd3eb1b0_0 + - soupsieve=2.4=py310h06a4308_0 + - sphinx=5.0.2=py310h06a4308_0 + - sphinxcontrib-applehelp=1.0.2=pyhd3eb1b0_0 + - sphinxcontrib-devhelp=1.0.2=pyhd3eb1b0_0 + - sphinxcontrib-htmlhelp=2.0.0=pyhd3eb1b0_0 + - sphinxcontrib-jsmath=1.0.1=pyhd3eb1b0_0 + - sphinxcontrib-qthelp=1.0.3=pyhd3eb1b0_0 + - sphinxcontrib-serializinghtml=1.1.5=pyhd3eb1b0_0 + - spyder=5.4.3=py310h06a4308_1 + - spyder-kernels=2.4.3=py310h06a4308_0 + - sqlalchemy=1.4.39=py310h5eee18b_0 + - sqlite=3.41.2=h5eee18b_0 + - stack_data=0.2.0=pyhd3eb1b0_0 + - statsmodels=0.14.0=py310ha9d4c09_0 + - sympy=1.11.1=py310h06a4308_0 + - tabulate=0.8.10=py310h06a4308_0 + - tbb=2021.8.0=hdb19cb5_0 + - tbb4py=2021.8.0=py310hdb19cb5_0 + - tblib=1.7.0=pyhd3eb1b0_0 + - tenacity=8.2.2=py310h06a4308_0 + - terminado=0.17.1=py310h06a4308_0 + - text-unidecode=1.3=pyhd3eb1b0_0 + - textdistance=4.2.1=pyhd3eb1b0_0 + - threadpoolctl=2.2.0=pyh0d69192_0 + - three-merge=0.1.1=pyhd3eb1b0_0 + - tifffile=2021.7.2=pyhd3eb1b0_2 + - tinycss2=1.2.1=py310h06a4308_0 + - tk=8.6.12=h1ccaba5_0 + - tldextract=3.2.0=pyhd3eb1b0_0 + - tokenizers=0.13.2=py310he7d60b5_1 + - toml=0.10.2=pyhd3eb1b0_0 + - tomli=2.0.1=py310h06a4308_0 + - tomlkit=0.11.1=py310h06a4308_0 + - toolz=0.12.0=py310h06a4308_0 + - tornado=6.3.2=py310h5eee18b_0 + - tqdm=4.65.0=py310h2f386ee_0 + - traitlets=5.7.1=py310h06a4308_0 + - transformers=4.29.2=py310h06a4308_0 + - twisted=22.10.0=py310h5eee18b_0 + - tzdata=2023c=h04d1e81_0 + - uc-micro-py=1.0.1=py310h06a4308_0 + - ujson=5.4.0=py310h6a678d5_0 + - unidecode=1.2.0=pyhd3eb1b0_0 + - unixodbc=2.3.11=h5eee18b_0 + - urllib3=1.26.16=py310h06a4308_0 + - utf8proc=2.6.1=h27cfd23_0 + - w3lib=1.21.0=pyhd3eb1b0_0 + - watchdog=2.1.6=py310h06a4308_0 + - wcwidth=0.2.5=pyhd3eb1b0_0 + - webencodings=0.5.1=py310h06a4308_1 + - websocket-client=0.58.0=py310h06a4308_4 + - werkzeug=2.2.3=py310h06a4308_0 + - whatthepatch=1.0.2=py310h06a4308_0 + - wheel=0.38.4=py310h06a4308_0 + - widgetsnbextension=4.0.5=py310h06a4308_0 + - wrapt=1.14.1=py310h5eee18b_0 + - wurlitzer=3.0.2=py310h06a4308_0 + - xarray=2023.6.0=py310h06a4308_0 + - xxhash=0.8.0=h7f8727e_3 + - xyzservices=2022.9.0=py310h06a4308_1 + - xz=5.4.2=h5eee18b_0 + - y-py=0.5.9=py310h52d8a92_0 + - yaml=0.2.5=h7b6447c_0 + - yapf=0.31.0=pyhd3eb1b0_0 + - yarl=1.8.1=py310h5eee18b_0 + - ypy-websocket=0.8.2=py310h06a4308_0 + - zeromq=4.3.4=h2531618_0 + - zfp=0.5.5=h295c915_6 + - zict=2.2.0=py310h06a4308_0 + - zipp=3.11.0=py310h06a4308_0 + - zlib=1.2.13=h5eee18b_0 + - zlib-ng=2.0.7=h5eee18b_0 + - zope=1.0=py310h06a4308_1 + - zope.interface=5.4.0=py310h7f8727e_0 + - zstd=1.5.5=hc292b87_0 + - pip: + - absl-py==1.4.0 + - astunparse==1.6.3 + - cachetools==5.3.1 + - flatbuffers==23.5.26 + - gast==0.4.0 + - google-auth==2.22.0 + - google-auth-oauthlib==1.0.0 + - google-pasta==0.2.0 + - grpcio==1.57.0 + - keras==2.13.1 + - libclang==16.0.6 + - oauthlib==3.2.2 + - opt-einsum==3.3.0 + - protobuf==4.24.2 + - pyqt5-sip==12.11.0 + - requests-oauthlib==1.3.1 + - rsa==4.9 + - tensorboard==2.13.0 + - tensorboard-data-server==0.7.1 + - tensorflow==2.13.0 + - tensorflow-estimator==2.13.0 + - tensorflow-io-gcs-filesystem==0.33.0 + - termcolor==2.3.0 + - typing-extensions==4.5.0 + - python-decouple diff --git a/model/LoopDistribution/src/inference.py b/model/LoopDistribution/src/inference.py new file mode 100644 index 000000000000..572515b5c18a --- /dev/null +++ b/model/LoopDistribution/src/inference.py @@ -0,0 +1,437 @@ +import argparse + +# import collections +from argparse import Namespace +from atexit import register +from distutils.command.config import config +from itertools import count + +# from email import parser +import grpc +from concurrent import futures +from tqdm import tqdm +import os +import json +import glob +from ld_config import MODEL_PATH, TEST_DIR, BUILD_DIR, MODEL_DIR +import traceback +import sys + + +sys.path.extend( + [ + f"{BUILD_DIR}/MLCompilerBridge/MLModelRunner/gRPCModelRunner/Python-Utilities", + f"{MODEL_DIR}", + # f"{REPO_DIR}/llvm/lib/Transforms/models" + ] +) +import LoopDistribution_pb2, LoopDistribution_pb2_grpc +import ray +from ray import tune +from ray.rllib.agents import ppo + +from simple_q import SimpleQTrainer, DEFAULT_CONFIG +from multiagentEnv import DistributeLoopEnv +# from register_action_space import RegisterActionSpace +from ray.rllib.models import ModelCatalog +from model import SelectNodeNetwork, DistributionTask +import logging +import SerDes + +from gym.spaces import Discrete, Box, Dict +import numpy as np +from ray.tune import function +from ray.rllib.utils.torch_ops import FLOAT_MIN, FLOAT_MAX + +logger = logging.getLogger(__file__) +logging.basicConfig( + filename="inference.log", + format="%(levelname)s - %(filename)s - %(message)s", + level=logging.DEBUG, +) + +import networkx +import json + +# from dqn_agent import Agent +import torch +from argparse import Namespace +import pydot +from networkx.readwrite import json_graph + +from typing import Callable, List, Union, Optional +import io +import math +import ctypes +import log_reader +from log_reader import TensorSpec +from functools import reduce +import operator + + +parser = argparse.ArgumentParser() +parser.add_argument( + "--use_pipe", + action="store_true", + help="Use pipe communication", + required=False, + default=False, +) +parser.add_argument( + "--use_grpc", + action="store_true", + help="Use grpc communication", + required=False, + default=False, +) +parser.add_argument( + "--data_format", + type=str, + choices=["json", "protobuf", "bytes"], + help="Data format to use for communication", +) +parser.add_argument("--pipe_name", type=str, help="Pipe name to use for communication", default="loopdistppipe") +parser.add_argument("--server_port", type=str, help="Server port") + +class DistributionInference: + def __init__(self, model_path, use_pipe=False, data_format=None): + logdir = "/tmp" + logger = logging.getLogger(__file__) + logging.basicConfig( + filename="running.log", + format="%(levelname)s - %(filename)s - %(message)s", + level=logging.DEBUG, + ) + + logger = logging.getLogger(__file__) + logging.basicConfig( + filename=os.path.join(logdir, "loop-distribution.log"), + format="%(levelname)s - %(filename)s - %(message)s", + level=logging.DEBUG, + ) + + config = DEFAULT_CONFIG.copy() + config["num_workers"] = 0 + config["explore"] = False + + from ray.tune.registry import register_env + + config["env_config"]["target"] = "X86" + config["env_config"]["state_size"] = 300 + + config["env_config"]["mode"] = "inference" + config["env_config"]["dump_type"] = "One" + config["env_config"]["intermediate_data"] = "./temp" + config["env_config"]["use_pipe"] = use_pipe + config["env_config"]["data_format"] = data_format + + ModelCatalog.register_custom_model("select_node_model", SelectNodeNetwork) + ModelCatalog.register_custom_model("distribution_model", DistributionTask) + + box_obs = Box( + FLOAT_MIN, + FLOAT_MAX, + shape=(config["env_config"]["state_size"],), + dtype=np.float32, + ) + box_obs_select_node = Box( + FLOAT_MIN, + FLOAT_MAX, + shape=( + config["env_config"]["max_number_nodes"], + config["env_config"]["state_size"], + ), + dtype=np.float32, + ) + + obs_select_node = Dict( + { + "action_mask": Box( + 0, 1, shape=(config["env_config"]["max_number_nodes"],) + ), + "state": box_obs_select_node, + } + ) + + obs_distribute_node = Dict( + { + "prev_Node": box_obs, + "curr_Node": box_obs, + "dist_flag": Box(0, 1, shape=(1,)), + "action_mask": Box(0, 1, shape=(2,)), + # "state": box_obs + } + ) + + def policy_mapping_fn(agent_id, episode=None, **kwargs): + if agent_id.startswith("select_node_agent"): + return "select_node_policy" + elif agent_id.startswith("distribution_agent"): + return "distribution_policy" + + policies = { + "select_node_policy": ( + None, + obs_select_node, + Discrete(config["env_config"]["max_number_nodes"]), + { + "gamma": 0.9, + "model": { + "custom_model": "select_node_model", + "custom_model_config": { + "state_size": config["env_config"]["state_size"], + "fc1_units": 64, + "fc2_units": 64, + }, + }, + }, + ), + "distribution_policy": ( + None, + obs_distribute_node, + Discrete(2), + { + "gamma": 0.9, + "model": { + "custom_model": "distribution_model", + "custom_model_config": { + "state_size": config["env_config"]["state_size"], + "fc1_units": 64, + "fc2_units": 64, + }, + }, + }, + ), + # "vectorization_policy": (None, obs_vectorization_node, + # ) + } + + config["multiagent"] = { + "policies": policies, + "policy_mapping_fn": function(policy_mapping_fn), + } + + # def env_creator(env_config): + # return DistributeLoopEnv(env_config) + # register_env("Environment", env_creator) + + self.trained_agent = SimpleQTrainer(env=DistributeLoopEnv, config=config) + # self.train_agent = DistributionInference(model_path, test_dir) + # logging.info("{} {}".format(self.trained_agent, type(self.trained_agent))) + checkpoint = model_path + self.trained_agent.restore(checkpoint) + + self.config = config + + self.temp_rootname = "/tmp/loopdistppipe" + self.tc = None + self.fc = None + self.tensor_specs = None + self.advice_spec = None + + # config = config["env_config"] + # self.env = DistributeLoopEnv(env_config) + + def dot_to_json(self, dot_): + py_dot_graph = pydot.graph_from_dot_data(dot_)[0] + graph_netx = networkx.drawing.nx_pydot.from_pydot(py_dot_graph) + graph_json = json_graph.adjacency_data(graph_netx) + return graph_json + + def run_predict(self, test_file): + env = DistributeLoopEnv(self.config["env_config"]) + + # Use for running with custom_loop_distribution + graph = self.dot_to_json(test_file) + obs = env.reset(graph) + + env.advice_spec = self.advice_spec + env.tc = self.tc + env.fc = self.fc + env.temp_rootname = self.temp_rootname + # Use for running directly inference.py + # obs = env.reset(test_file) + + score = 0 + while True: + logging.debug("-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-") + + # return the color index for a node + # print("state {}".format(obs)) + action = {} + for agent_id, agent_obs in obs.items(): + # print("agent_id: {}".format(agent_id)) + # print("agent_obs: {}".format(agent_obs)) + policy_id = self.config["multiagent"]["policy_mapping_fn"](agent_id) + action[agent_id] = self.trained_agent.compute_action( + agent_obs, policy_id=policy_id + ) + print("action: {}".format(action[agent_id])) + + obs, reward, done, response = env.step(action) + done = done["__all__"] + # sum up reward for all agents + # episode_reward += sum(reward.values()) + + # action = self.trained_agent.compute_action(state) + + # next_state, reward, done, response = env.step(action) + logging.debug("reward : {}".format(reward)) + + # state = next_state + if done: + with open("actionlist.txt", "a") as actionfile: + actionfile.write(str(test_file) + "\n") + assert response is not None, "Allocation is not preset." + break + response = env.partition_seq + print("response: {}".format(response)) + return reward, response + + def run_predict_multiple_loops(self, rdgs): + # Load the envroinment + # env = DistributeLoopEnv(config) + # seqs = [] + dist_seq = [] + # vf_seq = [] + for rdg in rdgs: + # reward, seqs = self.run_predict(rdg) + reward, seqs = self.run_predict(rdg) + print("seqs: {}".format(seqs)) + dist_seq.append(seqs) + # vf_seq.append(seqs[1]) + + count = 0 + + select_node_agent = "select_node_agent_{}".format(count) + distribution_agent = "distribution_agent_{}".format(count) + + return [dist_seq] + + +def predict_loop_distribution(rdgs: list, trained_dist_model: str): + print("trained_dist_model: {}".format(trained_dist_model)) + sys.argv.append("") + ray.init() + + inference_obj = DistributionInference(trained_dist_model) + # agent.distribution_task.net_local.load_state_dict(torch.load(trained_dist_model, map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu"))) + # agent.vectorization_task.net_local.load_state_dict(torch.load(trained_vec_model, map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu"))) + print("Start the inference....") + logging.info("Start the inference....") + seqs = inference_obj.run_predict_multiple_loops(rdgs) + logging.info("Distrubuted seqs : {}".format(seqs)) + ray.shutdown() + + return seqs + +def run_pipe_communication(data_format, pipe_name): + def parseObservation(obs): + if data_format == "json": + if "Exit" in obs.keys(): + return "Exit" + return obs["RDG"] + elif data_format == "bytes": + if obs[0].spec().name == "Exit": + return "Exit" + rdg = "".join(chr(int(x)) for x in obs[0]) + return rdg + elif data_format == "protobuf": + pass + + ray.init() + inference_obj = DistributionInference(MODEL_PATH, data_format=data_format) + inference_obj.use_pipe = True + print("Inference model created, using pipe:", pipe_name) + serdes = SerDes.SerDes(data_format, "/tmp/" + pipe_name) + print("Serdes init...") + serdes.init() + + with open(f'{data_format}_seq_output.log', 'w') as f: + while True: + try: + print("Entered while loop...") + msg = serdes.readObservation() + msg = parseObservation(msg) + if msg == "Exit": + out = 1 + serdes.sendData(out) + continue + _, seq = inference_obj.run_predict(msg) + f.write(str(seq) + "\n") + serdes.sendData(seq) + except Exception as e: + print("*****Exception occured*******: ", e) + serdes.init() + +class service_server(LoopDistribution_pb2_grpc.LoopDistribution): + def __init__(self, inference_obj) -> None: + self.inference_obj = inference_obj + + def getAdvice(self, request, context): + try: + done = False + while not done: + msg = request + if msg == "Exit": + out = 1 + continue + _, seq = self.inference_obj.run_predict(msg) + return seq + except Exception as e: + print('Error') + traceback.print_exc() + reply = LoopDistribution_pb2.Advice(action=[]) + return reply + +if __name__ == "__main__": + args = parser.parse_args() + use_pipe = args.use_pipe + use_grpc = args.use_grpc + if not use_pipe and not use_grpc: + model_path = MODEL_PATH + test_dir = TEST_DIR + args = { + "no_render": True, + "checkpoint": model_path, + "run": "SimpleQ", + "env": "", + "config": {}, + "video_dir": "", + "steps": 0, + "episodes": 0, + "arch": "X86", + } + args = Namespace(**args) + + rdgs = [] + for path in glob.glob(os.path.join(test_dir, "*.json")): + with open(path) as f: + # print(json.dumps(json.load(f))) + rdgs.append(json.load(f)) + # rdgs.append(json.dumps(json.load(f))) + + predict_loop_distribution(rdgs, model_path) + + # for file in os.listdir(test_dir): + # reward, count = inference_obj.run_predict(file) + # # action, count = inference_obj.compute_action(file) + + select_node_agent = "select_node_agent_{}".format(count) + distribution_agent = "distribution_agent_{}".format(count) + + if use_pipe: + run_pipe_communication(args.data_format, args.pipe_name) + elif use_grpc: + server = grpc.server(futures.ThreadPoolExecutor(max_workers=10), options = [ + ('grpc.max_send_message_length', 200*1024*1024), #50MB + ('grpc.max_receive_message_length', 200*1024*1024) #50MB + ]) + ray.init() + inference_obj = DistributionInference(MODEL_PATH) + inference_obj.use_pipe = False + LoopDistribution_pb2_grpc.add_LoopDistributionServicer_to_server(service_server(inference_obj), server) + server.add_insecure_port('localhost:' + args.server_port) + server.start() + print("Server running at port: " + args.server_port) + server.wait_for_termination() \ No newline at end of file diff --git a/model/POSET-RL/Environment_pipe.py b/model/POSET-RL/Environment_pipe.py new file mode 100755 index 000000000000..43ef63491379 --- /dev/null +++ b/model/POSET-RL/Environment_pipe.py @@ -0,0 +1,614 @@ +# Defines environment for the RL model + +import os +import gym +import subprocess +import sys +import numpy as np +from gym.spaces import Discrete, Box, Dict +from Filesystem import * +import tempfile +import time +from ray.rllib.utils.torch_ops import FLOAT_MIN, FLOAT_MAX +from tqdm import tqdm +import logging +from google.protobuf.json_format import MessageToJson +import json +from po_config import BUILD_DIR, CONFIG_DIR +import grpc +sys.path.append(f"{BUILD_DIR}/tools/MLCompilerBridge/Python-Utilities") +import posetRL_pb2_grpc, posetRL_pb2 +from google.protobuf.empty_pb2 import Empty +from typing import Union +import signal +sys.path.append(f"{BUILD_DIR}/tools/MLCompilerBridge/CompilerInterface/") +from PipeCompilerInterface import PipeCompilerInterface +from GrpcCompilerInterface import GrpcCompilerInterface + +#import pipeCompilerInterface +empty_message = Empty() + + +class PhaseOrder(gym.Env): + def __init__(self, config): + self.ENV_Dir = None + self.BaseIR = None + self.CurrIR = None + self.Curr_Dir = None + self.minBinarySize = None + self.lastBinarySize = None + self.baseBinarySize = None + self.lastMcaThroughtput = None + self.OzMcaThroughtput = None + self.doneList = [] + self.StateIndex = 0 + self.embedding = None + self.iteration_counter = 0 + self.rename_Dir = False + self.FileSys_Obj = fsystem(config["llvm_dir"], f"{CONFIG_DIR}/ir2vec") + self.FileSys_Obj.createFolder("env") + self.temporaryDirectory = tempfile.gettempdir() + + self.clang_arch_flag = "-mcpu=cortex-a72" if config["target"] == "AArch64" else "" + self.opt_arch_flag = "--mcpu=cortex-a72" if config["target"] == "AArch64" else "" + + self.alpha = config["alpha"] + self.beta = config["beta"] + self.size_reward_thresh = config["size_reward_thresh"] + self.mca_reward_thresh = config["mca_reward_thresh"] + + # Action space size with optimization sub-sequences obtained from ODG + self.action_space_size = config["action_space_size"] + self.action_space = Discrete(self.action_space_size) + self.action_count = 0 + self.cur_action_seq = [] + self.cur_action_mask = [1] * self.action_space_size + self.mode = "train" + self.Obs = None + obs_space = Box(FLOAT_MIN, FLOAT_MAX, + shape=(config["state_size"], ), dtype=np.float32) + self.observation_space = Dict({"action_mask": Box( + 0, 1, shape=(self.action_space_size,)), "state": obs_space}) + + self.mode = config["mode"] + self.grpc_rtt = 0 + if "worker_index" in config.keys(): + self.worker_index = config.worker_index + else: + self.worker_index = 0 + + if self.mode != 'inference': + self.FileSys_Obj.createFolder("env") + self.make(os.path.abspath(config["train_dir"])) + self.train_Dir = os.path.abspath(config["train_dir"]) + + else: + self.FileSys_Obj.createFolder("inference") + self.FileSys_Obj.TrainingDataPath = os.path.join( + self.FileSys_Obj.PhaseOrderDir, "inference") + self.test_Benchmark = os.path.join( + self.FileSys_Obj.PhaseOrderDir, "") + + self.assembly_file_path = f"{self.temporaryDirectory}/assemblyfile_{self.worker_index}.s" + + logger = logging.getLogger("__file__") + log_level = logging.DEBUG + if os.path.exists("env.log"): + os.remove("env.log") + logging.basicConfig( + filename='env.log', format='%(levelname)s - %(filename)s - %(message)s', level=log_level) + + # pipes opening + self.data_format = config["data_format"] + self.use_pipe = config["use_pipe"] + self.tensor_specs = None + self.advice_spec = None + + self.temp_rootname = "/tmp/" + config["pipe_name"] + if self.use_pipe: + # self.temp_rootname = "/tmp/" + config["pipe_name"] + self.compiler_interface = PipeCompilerInterface(self.data_format, self.temp_rootname) + self.use_grpc = config["use_grpc"] + if self.use_grpc: + self.compiler_interface = None + self.is_init = True + + self.server_port = config["server_port"] + + def make(self, TrainingPath): + self.FileSys_Obj.generateTrainingData(TrainingPath) + self.Obs = self.FileSys_Obj.LLFileList + + # def getEmbedding(self, fileName) : + # EmbFile = self.Curr_Dir + "/" + str(self.StateIndex) + # # Get IR2Vec FlowAware embeddings + # command = self.FileSys_Obj.IR2VecBin + " -fa -vocab " + \ + # self.FileSys_Obj.SeedEmbeddingPath + " -o " + EmbFile + " -level p " + fileName + # os.system(command) + # emb = np.loadtxt(EmbFile) + # # Threshold for embedding values + # emb[emb > 100000.0] = 100000.0 + # emb[emb < -100000.0] = -100000.0 + # return emb + + def createEnv(self, fileName): + # env folder will contain folders for separate files with ll and executables + if self.mode != 'inference': + self.ENV_Dir = os.path.join(self.FileSys_Obj.PhaseOrderDir, "env") + else: + self.ENV_Dir = os.path.join( + self.FileSys_Obj.PhaseOrderDir, "inference") + + # setting current directory to point to the folder for the chosen file + self.Curr_Dir = self.ENV_Dir + "/" + os.path.splitext(fileName)[0] + logging.info("Curr_Dir {}".format(self.Curr_Dir)) + + # Creating the folder for the chosen file + self.FileSys_Obj.createFolder(self.Curr_Dir, True) + + # Copying the LL file from training folder to newly created folder + if self.mode != 'inference': + self.FileSys_Obj.copyFile(os.path.join( + self.FileSys_Obj.TrainingDataPath, fileName), self.Curr_Dir) + else: + # quiet# print("test_Benchmark {}".format(self.test_Benchmark)) + logging.info("test_Benchmark {}".format(self.test_Benchmark)) + self.FileSys_Obj.copyFile(os.path.join( + self.test_Benchmark, fileName), self.Curr_Dir) + + # Setting up different Paths and Minimum Size + self.BaseIR = os.path.join(self.Curr_Dir, fileName) + self.baseBinarySize, self.minBinarySize = self.getBinarySize( + self.BaseIR, True) + self.lastBinarySize = self.baseBinarySize + + self.CurrIR = os.path.join(self.Curr_Dir, fileName) + self.prev_action = None + + def reset(self, test_file=None, embedding=None): + self.BaseIR = None + self.CurrIR = None + self.Curr_Dir = None + self.minBinarySize = None + self.lastBinarySize = 0 + self.baseBinarySize = None + self.embedding = None + self.stub = None + self.StateIndex = 0 + self.cur_action_mask = [1] * self.action_space_size + + if self.mode != 'inference': + logging.info("Number of files {}".format(len(self.Obs))) + if (len(self.Obs) >= 1): + + index = np.random.random_integers(0, len(self.Obs) - 1) + + self.serverId = self.startServer( + self.Obs[index], "127.0.0.1:" + str(self.server_port)) + # print("Server started at pid:", self.serverId) + + if self.use_grpc and self.compiler_interface is None: + self.compiler_interface = GrpcCompilerInterface(mode='client', stub_class=posetRL_pb2_grpc.PosetRLServiceStub, hostip='127.0.0.1', hostport= self.server_port) + + self.createEnv(self.Obs[index]) + self.doneList.append(self.Obs[index]) + self.Obs.remove(self.Obs[index]) + if (len(self.Obs) == 0): + self.Obs = self.doneList.copy() + self.doneList.clear() + self.iteration_counter += 1 + self.rename_Dir = True + + else: + if not self.use_pipe and not self.use_grpc: + self.Obs = test_file + logging.info("test_file {}".format(test_file)) + index = np.random.random_integers(0, len(self.Obs) - 1) + logging.info("Obs {}".format(index)) + self.createEnv(test_file) + + + # Opening pipe files + if self.use_pipe: + if self.is_init: + self.compiler_interface.reset_pipes() + self.is_init = False + + result = self.readObservation() # DEBUG + + if result is None: + raise + else: + self.embedding = result + elif self.use_grpc: + if self.mode == 'inference': + self.embedding = np.array(embedding) + else: + self.embedding = self.stable_grpc("Action", 0) # LLVMgRPC way + # else: + # self.embedding = self.getEmbedding(self.BaseIR) + + action_mask = [1] * self.action_space_size + next_observation = {'action_mask': np.array( + action_mask), 'state': self.embedding} + self.cur_obs = next_observation + + return next_observation + + def readObservation(self): + embedding = np.empty([300]) + features = self.compiler_interface.evaluate() + + + if self.data_format == "bytes": + for i in range(len(features[0])): + embedding[i] = features[0][i] + elif self.data_format == "json": + for i in range(len(features["embedding"])): + embedding[i] = features["embedding"][i] + + return embedding + + + + def sendResponse(self, value: Union[int, float]): + self.compiler_interface.populate_buffer(int(value)) + + def getBinarySize(self, IRFile, init=False): + fileName = os.path.splitext(os.path.basename(IRFile))[0] + minBinarySize = 0 + baseBinarySize = 0 + if (init): + # Compute O0 Binary size + command = self.FileSys_Obj.ClangPath + " " + self.clang_arch_flag + " -c " + \ + self.Curr_Dir + "/" + fileName + ".ll -o " + \ + self.Curr_Dir + "/" + "base_binary.o" + os.system(command) + baseBinarySize = os.path.getsize(self.Curr_Dir + "/base_binary.o") + logging.info("base {}".format(baseBinarySize)) + + # Compute Oz Binary size + command = self.FileSys_Obj.OptPath + " " + self.opt_arch_flag + " -S -add-size-attr --enableMinSizeAttr --removeNoInlineAttr " + \ + self.Curr_Dir + "/" + fileName + ".ll -o " + \ + self.Curr_Dir + "/" + fileName + ".ll" + command = self.FileSys_Obj.OptPath + " " + self.opt_arch_flag + " -S -Oz " + \ + self.Curr_Dir + "/" + fileName + ".ll -o " + \ + self.Curr_Dir + "/" + fileName + "_Oz.ll" + os.system(command) + command = self.FileSys_Obj.ClangPath + " " + self.clang_arch_flag + " -c " + \ + self.Curr_Dir + "/" + fileName + "_Oz.ll -o " + \ + self.Curr_Dir + "/" + "Oz_binary.o" + os.system(command) + minBinarySize = os.path.getsize(self.Curr_Dir + "/Oz_binary.o") + + # Get Oz MCA Throughput + self.OzMcaThroughtput = self.getMCACost( + self.Curr_Dir + "/" + fileName + "_Oz") + logging.info("base {}".format(self.OzMcaThroughtput)) + + return baseBinarySize, minBinarySize + + # Get next action (sub-sequence) to be applied on the LLVM IR + def step(self, action_index): + prev_embedding = self.embedding + + Reward = 0 + done = False + # Get embedding for New IR + # here we can use gRPC server to get the new embeddings + # self.embedding = self.applyActionGetEmbeddings(action=action_index) + + # make call to compiler to get the updated embedding + if self.mode == 'inference' and self.use_grpc: + pass + else: + # if self.use_pipe or self.use_grpc: + # result = self.compiler_interface.evaluate() + if self.use_pipe: + self.sendResponse(action_index) + result = self.readObservation() + elif self.use_grpc: + result = self.stable_grpc("Action", action_index) # LLVMgRPC way + # else: + # Reward, NextStateIR = self.getLocalReward(action_index) + # result = self.getEmbedding(NextStateIR) + # self.CurrIR = NextStateIR + if result is None: + raise Exception("result is None") + else: + self.embedding = result + + self.cur_action_mask[action_index] = 0 + self.action_count += 1 + self.cur_action_seq.append(action_index) + next_observation = {'action_mask': np.array( + self.cur_action_mask), 'state': self.embedding} + self.cur_obs = next_observation + + # Max number of actions (optimaztions sub-sequences) to be applied + if self.action_count >= 34: + done = True + logging.info(self.cur_action_seq) + if self.mode == 'inference': + # Write pass sequence to actionfile + with open('actionlist.txt', 'a') as actionfile: + act_flag = 0 + actionfile.write('[') + for act_idx in self.cur_action_seq: + if act_flag == 1: + actionfile.write('-'+str(act_idx)) + else: + act_flag = 1 + actionfile.write(str(act_idx)) + actionfile.write('] ') + + if self.mode != 'inference': + if not self.use_pipe: + self.stable_grpc("Exit", None) + try: + # outs, errs = self.server_pid.communicate(timeout=5) + self.stable_grpc("Exit", None) + except: + self.serverId.kill() + print("Clang failing") + + Reward = self.getReward(self.assembly_file_path) + if self.use_pipe: + self.sendResponse(-1) # self.populate_buffer(-1) + self.compiler_interface.evaluate('exit') + + if self.mode != "inference": + Reward = self.getReward(self.assembly_file_path) + # else: + # self.compiler_interface.reset_pipes() + + self.cur_action_seq = [] + self.action_count = 0 + logging.info("Reward {}".format(Reward)) + logging.info("Action {}".format(action_index)) + logging.info("done {}".format(done)) + + return next_observation, Reward, done, {} + + # Get llvm-mca Block RThroughput for the IR + def getMCACost(self, new_file): + cmd1 = self.FileSys_Obj.LlcPath + " " + self.opt_arch_flag + \ + " " + new_file + ".ll" + " -o " + new_file + ".s" + os.system(cmd1) + cmd2 = self.FileSys_Obj.MCAPath + " " + \ + self.opt_arch_flag + " " + new_file + ".s" + pro = subprocess.Popen(cmd2, executable='/bin/bash', shell=True, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf8') + Output_cmd2 = pro.stdout + + line = Output_cmd2.readline() + if pro.stderr is not None: + logging.critical('Error : {}'.format(pro.stderr)) + if self.use_pipe: + currMcaThroughtput = 0 + while line: + pair = line.split(':') + if pair[0] == 'Block RThroughput': + currMcaThroughtput = float(pair[1].strip(' ')) + line = Output_cmd2.readline() + + logging.info("LLVM-MCA command: {}".format(cmd2)) + + return currMcaThroughtput + + # Get reward for an action + # def getLocalReward(self, action): + # self.StateIndex += 1 + # fileName = os.path.splitext(os.path.basename(self.BaseIR))[0] + + # logging.info("fileName {}".format(fileName)) + # logging.info("StateIndex {}".format(self.StateIndex)) + # logging.info("BaseIR {}".format(self.CurrIR)) + + # # Modified IR path + # new_IR = self.Curr_Dir + "/" + fileName + \ + # "_" + str(self.StateIndex) + ".ll" + # new_file = self.Curr_Dir + "/" + fileName + "_" + str(self.StateIndex) + + # # Applying the action and saving the IR file as _ + # # Here we can use gRPC server to apply the action + # command = self.FileSys_Obj.OptPath + " " + self.opt_arch_flag + \ + # " -S -O34 -SubNum=" + str(action) + " " + \ + # self.CurrIR + " -o " + new_IR + # os.system(command) + # command = self.FileSys_Obj.ClangPath + " " + \ + # self.clang_arch_flag + " -c " + new_IR + " -o " + new_file + ".o" + # os.system(command) + # # Size reward + # currBinarySize = os.path.getsize(new_file + ".o") + + # logging.info("lastBinarySize {}".format(self.lastBinarySize)) + # logging.info("currBinarySize {}".format(currBinarySize)) + + # if ((self.baseBinarySize - self.minBinarySize) > 0): + # reward_binarySize = (self.lastBinarySize - currBinarySize) / \ + # (self.baseBinarySize - self.minBinarySize) + # else: + # reward_binarySize = (self.lastBinarySize - + # currBinarySize) / self.baseBinarySize + + # self.lastBinarySize = currBinarySize + + # # Throughput reward + # currMcaThroughtput = self.getMCACost(new_file) + # logging.info("currMcaThroughtput: {}".format(currMcaThroughtput)) + # logging.info("OzMcaThroughtput: {}".format(self.OzMcaThroughtput)) + # logging.info("lastMcaThroughtput: {}".format(self.lastMcaThroughtput)) + + # if self.lastMcaThroughtput is None: + # mca_cost = (self.OzMcaThroughtput - + # currMcaThroughtput) / self.OzMcaThroughtput + # else: + # mca_cost = (self.lastMcaThroughtput - + # currMcaThroughtput) / self.OzMcaThroughtput + + # self.lastMcaThroughtput = currMcaThroughtput + + # logging.info("Thr-debug:{}".format(mca_cost)) + # logging.info("Size-debug:{}".format(reward_binarySize)) + + # # Reward thresholds + # if mca_cost > self.mca_reward_thresh: + # mca_cost = self.mca_reward_thresh + # elif mca_cost < -self.mca_reward_thresh: + # mca_cost = -self.mca_reward_thresh + + # if reward_binarySize > self.size_reward_thresh: + # reward_binarySize = self.size_reward_thresh + # elif reward_binarySize < -self.size_reward_thresh: + # reward_binarySize = -self.size_reward_thresh + + # # Cumulative reward with alpha and beta hyperparameters + # reward = self.alpha*reward_binarySize + self.beta*mca_cost + + # return reward, new_IR + + def getReward(self, AssemblyFilePath): + # object size reward + objectFilePath = f"{self.temporaryDirectory}/objectfile_{self.worker_index}.o" + objectFileGenerationCommand = self.FileSys_Obj.ClangPath + " -c " + \ + self.clang_arch_flag + " " + AssemblyFilePath + " -o " + objectFilePath + + os.system(objectFileGenerationCommand) + + currentBinarySize = os.path.getsize(objectFilePath) + + if ((self.baseBinarySize - self.minBinarySize) > 0): + reward_binarySize = (self.lastBinarySize - currentBinarySize) / \ + (self.baseBinarySize - self.minBinarySize) + else: + reward_binarySize = (self.lastBinarySize - + currentBinarySize) / self.baseBinarySize + + self.lastBinarySize = currentBinarySize + + llvmMcaCommand = f"{self.FileSys_Obj.MCAPath} {self.opt_arch_flag} {AssemblyFilePath}" + pro = subprocess.Popen(llvmMcaCommand, executable='/bin/bash', shell=True, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf8') + + Output_cmd2 = pro.stdout + + line = Output_cmd2.readline() + if pro.stderr is not None: + logging.critical('Error : {}'.format(pro.stderr)) + + while line: + pair = line.split(':') + if pair[0] == 'Block RThroughput': + currMcaThroughtput = float(pair[1].strip(' ')) + line = Output_cmd2.readline() + + logging.info("currMcaThroughtput: {}".format(currMcaThroughtput)) + logging.info("OzMcaThroughtput: {}".format(self.OzMcaThroughtput)) + logging.info("lastMcaThroughtput: {}".format(self.lastMcaThroughtput)) + + if self.lastMcaThroughtput is None: + mca_cost = (self.OzMcaThroughtput - + currMcaThroughtput) / self.OzMcaThroughtput + else: + mca_cost = (self.lastMcaThroughtput - + currMcaThroughtput) / self.OzMcaThroughtput + + self.lastMcaThroughtput = currMcaThroughtput + + logging.info("Thr-debug:{}".format(mca_cost)) + logging.info("Size-debug:{}".format(reward_binarySize)) + + # Reward thresholds + if mca_cost > self.mca_reward_thresh: + mca_cost = self.mca_reward_thresh + elif mca_cost < -self.mca_reward_thresh: + mca_cost = -self.mca_reward_thresh + + if reward_binarySize > self.size_reward_thresh: + reward_binarySize = self.size_reward_thresh + elif reward_binarySize < -self.size_reward_thresh: + reward_binarySize = -self.size_reward_thresh + + # Cumulative reward with alpha and beta hyperparameters + reward = self.alpha*reward_binarySize + self.beta*mca_cost + + return reward + + def set_config(path): + global config_path + config_path = path + return config_path + + def startServer(self, filename, ip): + optPath = f"{BUILD_DIR}/bin/opt" + clangPath = f"{BUILD_DIR}/bin/clang" + filepath = self.train_Dir + "/" + filename + newfilepath = self.assembly_file_path + data_format = self.data_format + + cmd = f"{clangPath} -S -mllvm --OPosetRL -mllvm -ml-config-path={CONFIG_DIR} -mllvm --training -mllvm -data-format={data_format} -mllvm --server_address={ip} {filepath} -o {newfilepath}" + if self.use_pipe: + cmd = cmd + " -mllvm -use-pipe" + pid = subprocess.Popen(cmd, executable='/bin/bash', + shell=True, preexec_fn=os.setsid) + return pid + + def repeatedgRPCFieldToNumpyArray(self, gRPCObj): + jsonObj = MessageToJson(gRPCObj) + dictObj = json.loads(jsonObj) + array = dictObj['embedding'] + return np.array(array) + + def applyActionGetEmbeddings(self, action): + request = posetRL_pb2.ActionRequest(action=action) + + self.compiler_interface.populate_buffer(request) + response = self.compiler_interface.evaluate() + # response = self.stub.applyActionGetEmbeddings(request) + return self.repeatedgRPCFieldToNumpyArray(response) + + def stopServer(self, sig): + self.serverId.send_signal(sig) + return_code = self.serverId.wait() + print("Return code:", return_code) + + def stable_grpc(self, op, action): + attempt = 0 + max_retries = 5 + retry_wait_seconds = 0.1 + retry_wait_backoff_exponent = 1.5 + + result = None + while True: + try: + t1 = time.time() + if op != "Exit": + result = self.applyActionGetEmbeddings(action=action) + else: + result = self.stopServer(signal.SIGTERM) + t2 = time.time() + self.grpc_rtt += t2-t1 + break + except grpc.RpcError as e: + + if e.code() == grpc.StatusCode.UNAVAILABLE: + # print("Error in grpc") + # if op == 'Exit' and self.last_task_done == 0: + # raise + attempt += 1 + if attempt > max_retries: + print("Maximum attempts completed") + return None + # raise #ServiceTransportError( f"{self.url} {e.details()} ({max_retries} retries)") from None + remaining = max_retries - attempt + time.sleep(retry_wait_seconds) + retry_wait_seconds *= retry_wait_backoff_exponent + else: + if self.mode != 'inference': + print("Unknown error", e.code()) + return None + else: + raise + return result + diff --git a/model/POSET-RL/README.md b/model/POSET-RL/README.md new file mode 100644 index 000000000000..f549bbf5e95b --- /dev/null +++ b/model/POSET-RL/README.md @@ -0,0 +1,56 @@ +# POSET-RL +POSET-RL uses a reinforcement learning approach to find pass sequence (for optimal code size + execution time), as the search space of optimization sequences is too big to enumerate. For a compiler with `m` optimization passes, if the sequence length is fixed as `n`, then there can be potentially `mxn` combinations, allowing repetitions. + +This repo contains the source code and relevant information described in the [paper](https://ieeexplore.ieee.org/abstract/document/9804673) ([arXiv](https://arxiv.org/abs/2208.04238) ,[slides](https://llvm.org/devmtg/2022-04-03/slides/POSET-RL.Phase.ordering.for.Optimizing.Size.and.Execution.Time.using.Reinforcement.Learning.pdf)). +Please see [here](https://compilers.cse.iith.ac.in/projects/posetrl) for more details. + +> POSET-RL: Phase ordering for Optimizing Size and Execution Time using Reinforcement Learning: Shalini Jain, Yashas Andaluri, S. VenkataKeerthy and Ramakrishna Upadrasta, ISPASS 2022 + + +## Environment Setup + +- Setup the environment using the `.yml` using the following commands + ```bash + conda env create -f posetrl_env.yml + ``` +## Setup Environment Variables + +- Create a `.env` file in the path `model\POSETRL\src`. +- The `.env` file contains the necessary environment variables. +- Refer `.env.example` present in `model\POSETRL\src` for setting the required variables. + +- `MODEL_DIR`= +- `BUILD_DIR`= +- `CONFIG_DIR`= + + +## Training +### gRPC + +```bash +cd ml-llvm-project/model/POSET_RL/src + +python experiment.py --llvm_dir= --train_dir= --train_iterations= --use_grpc + +#The --train_dir option must specify a path to a directory of .ll files +``` +### Pipes +```bash +cd ml-llvm-project/model/POSET_RL/src + +python experiment.py --llvm_dir= --train_dir= --train_iterations= --use_pipe --data_format= + +#The --train_dir option must specify a path to a directory of .ll files +#Model will be generated as a pytorch checkpoint in ml-llvm-project/model/checkpoint_dir after every 10 epochs +#The output of the above generates the training logs +``` +### ONNX + +```bash +cd ml-llvm-project/model/POSET_RL/src + +python inference.py --test-dir= --use_grpc --server_address= --model= + +``` + +### Model Inference: [Refer to Model Inference](../../llvm/lib/Transforms/IPO/PosetRL/README.md ) diff --git a/model/POSET-RL/inference.py b/model/POSET-RL/inference.py new file mode 100755 index 000000000000..721bb31f4132 --- /dev/null +++ b/model/POSET-RL/inference.py @@ -0,0 +1,266 @@ +# Script to perform inference on test LLVM IR files +# Use run-inference.sh to call this script +# Usage: python inference.py --ir2vec_dir \ +# --test_dir \ +# --model \ +# [--isAArch] +# --alpha +# --beta +# --size_reward_thresh +# --mca_reward_thresh +# Example: python inference.py --ir2vec_dir POSET-RL/IR2Vec \ +# --test_dir test_ll \ +# --model POSET_RL/saved_models/model \ +# [--isAArch] +# --alpha 10 +# --beta 5 +# --size_reward_thresh 0.2 +# --mca_reward_thresh 0.2 + +import argparse +import numpy as np +import argparse +import os + +# import utils +import logging +import time + +import ray +from ray import tune +from ray.rllib.agents import ppo +from ray.rllib.agents import dqn +from ray.rllib.agents.dqn import DQNTrainer, DEFAULT_CONFIG +from Environment_pipe import PhaseOrder +from ray.rllib.models import ModelCatalog +from model import CustomPhaseOrderModel +from ray.tune.registry import register_env +from datetime import datetime +from po_config import BUILD_DIR + +import sys +sys.path.append(f"{BUILD_DIR}/tools/MLCompilerBridge/Python-Utilities") +import posetRL_pb2_grpc, posetRL_pb2 + +sys.path.append(f"{BUILD_DIR}/tools/MLCompilerBridge/CompilerInterface/") +from GrpcCompilerInterface import GrpcCompilerInterface +from Filesystem import * + +logger = logging.getLogger(__file__) +logging.basicConfig( + filename="inference.log", + format="%(levelname)s - %(filename)s - %(message)s", + level=logging.DEBUG, +) + +import networkx +from networkx.readwrite import json_graph +import json +import torch +import pydot + +import grpc +from concurrent import futures +import traceback + +parser = argparse.ArgumentParser() +parser.add_argument("--model", help="Path to saved checkpoint") +parser.add_argument( + "-a", "--isAArch", required=False, default=False, action="store_true" +) +parser.add_argument("-alpha", "--alpha", required=False, type=float, default=10) +parser.add_argument("-beta", "--beta", required=False, type=float, default=5) +parser.add_argument( + "-size_reward_thresh", + "--size_reward_thresh", + required=False, + type=float, + default=0.2, +) +parser.add_argument( + "-mca_reward_thresh", "--mca_reward_thresh", required=False, type=float, default=0.2 +) +parser.add_argument( + "--use_pipe", + action="store_true", + help="Use pipe communication", + required=False, + default=False, +) +parser.add_argument("--server_port", type=str, help="Server port", default=50051) +parser.add_argument( + "--data_format", + type=str, + choices=["json", "protobuf", "bytes"], + help="Data format to use for communication", +) +parser.add_argument("--pipe_name",type=str,help="String Pipe name",default="posetrl_pipe") +parser.add_argument("--use_grpc", action='store_true', help = "Use grpc communication", required=False, default=False) +parser.add_argument("--export_onnx", action="store_true", help="Export the model to ONNX") + +class PhaseOrderInference: + def __init__(self, model_path, use_pipe=False, use_grpc=False, data_format="json", export_onnx=False): + print("use_pipe {}".format(use_pipe)) + logdir = "/tmp" + logger = logging.getLogger(__file__) + logging.basicConfig( + filename="running.log", + format="%(levelname)s - %(filename)s - %(message)s", + level=logging.DEBUG, + ) + + config = DEFAULT_CONFIG.copy() + + cfg = { + "hiddens": [], + "dueling": False, + } + + ModelCatalog.register_custom_model("My_torch_model", CustomPhaseOrderModel) + target_arch = "AArch64" if args.isAArch else "X86" + # Define model and environment config + config = dict( + { + "model": { + "custom_model": "My_torch_model", + "custom_model_config": { + "state_size": 300, + "fc1_units": 64, + "fc2_units": 64, + }, + }, + "env_config": { + "target": target_arch, + "state_size": 300, + "mode": "inference", + "dump_type": "One", + "intermediate_data": "./temp", + "llvm_dir": BUILD_DIR, + "alpha": args.alpha, + "beta": args.beta, + "size_reward_thresh": args.size_reward_thresh, + "mca_reward_thresh": args.mca_reward_thresh, + "action_space_size": 34, + "use_pipe": use_pipe, + "data_format": data_format, + "use_grpc": use_grpc, + "server_port": args.server_port, + "pipe_name": args.pipe_name, + "export_onnx": export_onnx + }, + "framework": "torch", + "explore": False, + "num_workers": 0, + "train_batch_size": 1, + }, + **cfg + ) + + def env_creator(env_config): + return PhaseOrder(env_config) + + # Create environment + register_env("Environment", env_creator) + + self.train_agent = DQNTrainer(env="Environment", config=config) + + checkpoint = model_path + # Load saved model + self.train_agent.restore(checkpoint) + + self.config = config + + # Dump the onnx model from the checkpoint + if args.export_onnx: + torch.onnx.export(self.train_agent.get_policy().model, ({"obs": torch.randn(1, 334)}, {}), export_params=True, f="/path/to/ml-llvm-project/model/POSET-RL/onnx-model/posetrl_model.onnx", verbose=True, input_names=["obs"], output_names=["output"]) + + + def dot_to_json(self, dot_): + py_dot_graph = pydot.graph_from_dot_data(dot_)[0] + graph_netx = networkx.drawing.nx_pydot.from_pydot(py_dot_graph) + graph_json = json_graph.adjacency_data(graph_netx) + return graph_json + + # Predict best optimization sequence for the given LLVM IR + def run_predict(self, test_file=None): + env = PhaseOrder(self.config["env_config"]) + + print("test_file {}".format(test_file)) + state = env.reset(test_file) + score = 0 + while True: + logging.debug("-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-") + + action = self.train_agent.compute_action(state) + print("action {}".format(action)) + + next_state, reward, done, response = env.step(action) + + logging.debug("reward : {}".format(reward)) + + state = next_state + if done: + with open("actionlist.txt", "a") as actionfile: + actionfile.write(str(test_file) + "\n") + assert response is not None, "Allocation is not preset." + break + + return reward, response + +class service_server(posetRL_pb2_grpc.PosetRLService): + def __init__(self, inference_obj): + self.inference_obj = inference_obj + self.new_file = True + self.state = None + self.env = None + self.action = None + + def getAdvice(self, request, context): + try: + done = False + if self.new_file: + self.env = PhaseOrder(self.inference_obj.config["env_config"]) + self.state = self.env.reset(embedding=request.embedding) + self.new_file = False + print("Episode Started") + else: + self.env.embedding = np.array(request.embedding) + self.state, reward, done, response = self.env.step(self.action) + if not done: + self.action = self.inference_obj.train_agent.compute_action(self.state) + reply=posetRL_pb2.ActionRequest(action=self.action.item()) + else: + reply=posetRL_pb2.ActionRequest(action=-1) + self.new_file = True + print("Episode Finished") + return reply + except: + print('Error') + traceback.print_exc() + reply=posetRL_pb2.ActionRequest(action=-1) + return reply + + + + +if __name__ == "__main__": + args = parser.parse_args() + logging.info("Start the inference....") + + ray.init() + + inference_obj = PhaseOrderInference( + args.model, args.use_pipe, args.use_grpc, args.data_format, args.export_onnx + ) + if args.use_pipe: + print("about to enter while loop...") + while True: + reward, response = inference_obj.run_predict() + elif args.use_grpc: + # ray.init() + compiler_interface = GrpcCompilerInterface(mode = 'server', add_server_method=posetRL_pb2_grpc.add_PosetRLServiceServicer_to_server, grpc_service_obj=service_server(inference_obj), hostport= args.server_port) + compiler_interface.start_server() + + else: + print("Please use options use_grpc or use_pipe") + diff --git a/model/POSET-RL/posetrl_env.yml b/model/POSET-RL/posetrl_env.yml new file mode 100644 index 000000000000..a1d2f73b3f32 --- /dev/null +++ b/model/POSET-RL/posetrl_env.yml @@ -0,0 +1,206 @@ +name: rllib_env_posetrl +channels: + - pytorch + - conda-forge + - anaconda + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _pytorch_select=0.2=gpu_0 + - absl-py=0.10.0=py37_0 + - aiohttp=3.6.2=py37h7b6447c_0 + - argon2-cffi=20.1.0=py37h4abf009_2 + - async-timeout=3.0.1=py37_0 + - async_generator=1.10=py_0 + - attrs=20.2.0=py_0 + - backcall=0.2.0=py_0 + - blas=1.0=mkl + - bleach=3.2.1=pyh9f0ad1d_0 + - blinker=1.4=py37_0 + - brotlipy=0.7.0=py37h7b6447c_1000 + - c-ares=1.16.1=h7b6447c_0 + - ca-certificates=2020.11.8=ha878542_0 + - cachetools=4.1.1=py_0 + - certifi=2020.11.8=py37h89c1867_0 + - cffi=1.14.2=py37he30daa8_0 + - chardet=3.0.4=py37_1003 + - click=7.1.2=py_0 + - cryptography=3.1.1=py37h1ba5d50_0 + - cudatoolkit=11.0.221=h6bb024c_0 + - cycler=0.10.0=py37_0 + - dbus=1.13.16=hb2f20db_0 + - decorator=4.4.2=py_0 + - defusedxml=0.6.0=py_0 + - entrypoints=0.3=pyhd8ed1ab_1003 + - expat=2.2.9=he6710b0_2 + - fontconfig=2.13.0=h9420a91_0 + - freetype=2.10.2=h5ab3b9f_0 + - glib=2.65.0=h3eb4bd4_0 + - google-auth-oauthlib=0.4.1=py_2 + - grpcio=1.31.0=py37hf8bcb03_0 + - gst-plugins-base=1.14.0=hbbd80ab_1 + - gstreamer=1.14.0=hb31296c_0 + - icu=58.2=he6710b0_3 + - idna=2.10=py_0 + - importlib-metadata=1.7.0=py37_0 + - importlib_metadata=1.7.0=0 + - intel-openmp=2020.2=254 + - ipykernel=5.3.4=py37h5ca1d4c_0 + - ipython=7.19.0=py37hb070fc8_0 + - ipython_genutils=0.2.0=py37_0 + - ipywidgets=7.5.1=pyh9f0ad1d_1 + - jedi=0.17.2=py37_0 + - jinja2=2.11.2=pyh9f0ad1d_0 + - jpeg=9b=h024ee3a_2 + - jsonschema=3.2.0=py_2 + - jupyter_client=6.1.7=py_0 + - jupyter_core=4.6.3=py37_0 + - jupyterlab_pygments=0.1.2=pyh9f0ad1d_0 + - kiwisolver=1.2.0=py37hfd86e86_0 + - lcms2=2.11=h396b838_0 + - ld_impl_linux-64=2.33.1=h53a641e_7 + - libedit=3.1.20191231=h14c3975_1 + - libffi=3.3=he6710b0_2 + - libgcc-ng=9.1.0=hdf63c60_0 + - libpng=1.6.37=hbc83047_0 + - libprotobuf=3.13.0=hd408876_0 + - libsodium=1.0.18=h7b6447c_0 + - libstdcxx-ng=9.1.0=hdf63c60_0 + - libtiff=4.1.0=h2733197_1 + - libuuid=1.0.3=h1bed415_2 + - libuv=1.40.0=h7b6447c_0 + - libxcb=1.14=h7b6447c_0 + - libxml2=2.9.10=he19cac6_1 + - lz4-c=1.9.2=he6710b0_1 + - markdown=3.2.2=py37_0 + - markupsafe=1.1.1=py37hb5d75c8_2 + - matplotlib=3.3.1=0 + - matplotlib-base=3.3.1=py37h817c723_0 + - mistune=0.8.4=py37h4abf009_1002 + - mkl=2020.2=256 + - mkl-service=2.3.0=py37he904b0f_0 + - mkl_fft=1.1.0=py37h23d657b_0 + - mkl_random=1.1.1=py37h0573a6f_0 + - multidict=4.7.6=py37h7b6447c_1 + - nbclient=0.5.1=py_0 + - nbconvert=6.0.7=py37h89c1867_3 + - nbformat=5.0.8=py_0 + - ncurses=6.2=he6710b0_1 + - nest-asyncio=1.4.3=pyhd8ed1ab_0 + - networkx=2.5=py_0 + - ninja=1.10.1=py37hfd86e86_0 + - notebook=6.1.5=py37h89c1867_0 + - numpy=1.19.1=py37hbc911f0_0 + - numpy-base=1.19.1=py37hfa32c7d_0 + - oauthlib=3.1.0=py_0 + - olefile=0.46=py37_0 + - openssl=1.1.1h=h516909a_0 + - packaging=20.4=pyh9f0ad1d_0 + - pandas=1.1.1=py37he6710b0_0 + - pandoc=2.11.2=h36c2ea0_0 + - pcre=8.44=he6710b0_0 + - pexpect=4.8.0=pyhd3eb1b0_3 + - pickleshare=0.7.5=py37_1001 + - pillow=7.2.0=py37hb39fc2d_0 + - pip=20.2.3=py37_0 + - prometheus_client=0.9.0=pyhd3deb0d_0 + - prompt-toolkit=3.0.8=py_0 + - ptyprocess=0.6.0=pyhd3eb1b0_2 + - pyasn1=0.4.8=py_0 + - pyasn1-modules=0.2.8=py_0 + - pycparser=2.20=py_2 + - pydot=1.3.0=py37_0 + - pygments=2.7.2=pyhd3eb1b0_0 + - pyjwt=1.7.1=py37_0 + - pyopenssl=19.1.0=py_1 + - pyparsing=2.4.7=py_0 + - pyqt=5.9.2=py37h22d08a2_1 + - pyrsistent=0.17.3=py37h4abf009_1 + - pysocks=1.7.1=py37_1 + - python=3.7.9=h7579374_0 + - python-dateutil=2.8.1=py_0 + - python_abi=3.7=1_cp37m + - pytorch=1.7.0=py3.7_cuda11.0.221_cudnn8.0.3_0 + - pytz=2020.1=py_0 + - qt=5.9.7=h5867ecd_1 + - readline=8.0=h7b6447c_0 + - requests=2.24.0=py_0 + - requests-oauthlib=1.3.0=py_0 + - rsa=4.6=py_0 + - send2trash=1.5.0=py_0 + - setuptools=49.6.0=py37_0 + - sip=4.19.24=py37he6710b0_0 + - six=1.15.0=py_0 + - sqlite=3.33.0=h62c20be_0 + - tensorboard=2.2.1=pyh532a8cf_0 + - tensorboard-plugin-wit=1.6.0=py_0 + - terminado=0.9.1=py37h89c1867_1 + - testpath=0.4.4=py_0 + - tk=8.6.10=hbc83047_0 + - torchaudio=0.7.0=py37 + - torchvision=0.8.1=py37_cu110 + - tornado=6.0.4=py37h7b6447c_1 + - tqdm=4.51.0=pyhd3eb1b0_0 + - traitlets=5.0.5=py_0 + - typing_extensions=3.7.4.3=py_0 + - urllib3=1.25.10=py_0 + - wcwidth=0.2.5=py_0 + - webencodings=0.5.1=py37_1 + - werkzeug=1.0.1=py_0 + - wheel=0.35.1=py_0 + - widgetsnbextension=3.5.1=py37h89c1867_4 + - xz=5.2.5=h7b6447c_0 + - yarl=1.5.1=py37h7b6447c_0 + - zeromq=4.3.3=he6710b0_3 + - zipp=3.1.0=py_0 + - zlib=1.2.11=h7b6447c_3 + - zstd=1.4.5=h9ceee32_0 + - pip: + - aiohttp-cors==0.7.0 + - aioredis==1.3.1 + - blessings==1.7 + - cached-property==1.5.2 + - cloudpickle==1.6.0 + - colorama==0.4.4 + - dataclasses==0.6 + - dm-tree==0.1.6 + - filelock==3.0.12 + - future==0.18.2 + - google-api-core==1.30.0 + - google-auth==1.32.0 + - googleapis-common-protos==1.53.0 + - gpustat==0.6.0 + - gym==0.18.3 + - h5py==3.1.0 + - hiredis==2.0.0 + - joblib==0.17.0 + - json5==0.9.5 + - jupyterlab==2.2.9 + - jupyterlab-server==1.2.0 + - keras==2.4.3 + - lz4==3.1.3 + - msgpack==1.0.2 + - nvidia-ml-py3==7.352.0 + - online-triplet-loss==0.0.4 + - opencensus==0.7.13 + - opencensus-context==0.1.2 + - opencv-python-headless==4.3.0.36 + - pandocfilters==1.4.3 + - parso==0.7.1 + - protobuf==3.17.3 + - psutil==5.8.0 + - py-spy==0.3.7 + - pydantic==1.8.2 + - pyglet==1.5.15 + - pyyaml==5.3.1 + - pyzmq==20.0.0 + - ray==1.4.0 + - redis==3.5.3 + - scikit-learn==0.23.2 + - scipy==1.5.4 + - sklearn==0.0 + - tabulate==0.8.9 + - tensorboardx==2.3 + - threadpoolctl==2.1.0 + - torchsummary==1.5.1 + - python-decouple diff --git a/model/POSET-RL/src/.env.example b/model/POSET-RL/src/.env.example new file mode 100644 index 000000000000..cb300b3de00d --- /dev/null +++ b/model/POSET-RL/src/.env.example @@ -0,0 +1,3 @@ +CONFIG_DIR = +BUILD_DIR = +MODEL_DIR = \ No newline at end of file diff --git a/model/POSET-RL/src/Environment_pipe.py b/model/POSET-RL/src/Environment_pipe.py new file mode 100755 index 000000000000..6e446641053f --- /dev/null +++ b/model/POSET-RL/src/Environment_pipe.py @@ -0,0 +1,700 @@ +# Defines environment for the RL model + +import os +import gym +import subprocess +import sys +import numpy as np +from gym.spaces import Discrete, Box, Dict +from Filesystem import * +import tempfile +import time +from ray.rllib.utils.torch_ops import FLOAT_MIN, FLOAT_MAX +from tqdm import tqdm +import logging +from google.protobuf.json_format import MessageToJson +import json +from po_config import BUILD_DIR, CONFIG_DIR +import grpc +sys.path.append(f"{BUILD_DIR}/MLCompilerBridge/MLModelRunner/gRPCModelRunner/Python-Utilities/") +import posetRL_pb2_grpc, posetRL_pb2 +from google.protobuf.empty_pb2 import Empty +from typing import Union + +sys.path.append(f"{BUILD_DIR}/MLCompilerBridge/CompilerInterface/") +from PipeCompilerInterface import PipeCompilerInterface +from GrpcCompilerInterface import GrpcCompilerInterface + +#import pipeCompilerInterface +empty_message = Empty() + + +class PhaseOrder(gym.Env): + def __init__(self, config): + self.ENV_Dir = None + self.BaseIR = None + self.CurrIR = None + self.Curr_Dir = None + self.minBinarySize = None + self.lastBinarySize = None + self.baseBinarySize = None + self.lastMcaThroughtput = None + self.OzMcaThroughtput = None + self.doneList = [] + self.StateIndex = 0 + self.embedding = None + self.iteration_counter = 0 + self.rename_Dir = False + self.FileSys_Obj = fsystem(config["llvm_dir"], f"{CONFIG_DIR}/ir2vec") + self.FileSys_Obj.createFolder("env") + self.temporaryDirectory = tempfile.gettempdir() + + self.clang_arch_flag = "-mcpu=cortex-a72" if config["target"] == "AArch64" else "" + self.opt_arch_flag = "--mcpu=cortex-a72" if config["target"] == "AArch64" else "" + + self.alpha = config["alpha"] + self.beta = config["beta"] + self.size_reward_thresh = config["size_reward_thresh"] + self.mca_reward_thresh = config["mca_reward_thresh"] + + # Action space size with optimization sub-sequences obtained from ODG + self.action_space_size = config["action_space_size"] + self.action_space = Discrete(self.action_space_size) + self.action_count = 0 + self.cur_action_seq = [] + self.cur_action_mask = [1] * self.action_space_size + self.mode = "train" + self.Obs = None + obs_space = Box(FLOAT_MIN, FLOAT_MAX, + shape=(config["state_size"], ), dtype=np.float32) + self.observation_space = Dict({"action_mask": Box( + 0, 1, shape=(self.action_space_size,)), "state": obs_space}) + + self.mode = config["mode"] + self.grpc_rtt = 0 + if "worker_index" in config.keys(): + self.worker_index = config.worker_index + else: + self.worker_index = 0 + + if self.mode != 'inference': + self.FileSys_Obj.createFolder("env") + self.make(os.path.abspath(config["train_dir"])) + self.train_Dir = os.path.abspath(config["train_dir"]) + + else: + self.FileSys_Obj.createFolder("inference") + self.FileSys_Obj.TrainingDataPath = os.path.join( + self.FileSys_Obj.PhaseOrderDir, "inference") + self.test_Benchmark = os.path.join( + self.FileSys_Obj.PhaseOrderDir, config["test_dir"]) + + self.assembly_file_path = f"{self.temporaryDirectory}/assemblyfile_{self.worker_index}.s" + + logger = logging.getLogger("__file__") + log_level = logging.DEBUG + if os.path.exists("env.log"): + os.remove("env.log") + logging.basicConfig( + filename='env.log', format='%(levelname)s - %(filename)s - %(message)s', level=log_level) + + # pipes opening + self.data_format = config["data_format"] + self.use_pipe = config["use_pipe"] + self.tensor_specs = None + self.advice_spec = None + + self.temp_rootname = "/tmp/" + config["pipe_name"] + if self.use_pipe: + self.compiler_interface = PipeCompilerInterface(self.data_format, self.temp_rootname) + + self.use_grpc = config["use_grpc"] + if self.use_grpc: + self.compiler_interface = None + self.is_init = True + + self.server_port = config["server_port"] + + def make(self, TrainingPath): + self.FileSys_Obj.generateTrainingData(TrainingPath) + self.Obs = self.FileSys_Obj.LLFileList + + # def getEmbedding(self, fileName) : + # EmbFile = self.Curr_Dir + "/" + str(self.StateIndex) + # # Get IR2Vec FlowAware embeddings + # command = self.FileSys_Obj.IR2VecBin + " -fa -vocab " + \ + # self.FileSys_Obj.SeedEmbeddingPath + " -o " + EmbFile + " -level p " + fileName + # os.system(command) + # emb = np.loadtxt(EmbFile) + # # Threshold for embedding values + # emb[emb > 100000.0] = 100000.0 + # emb[emb < -100000.0] = -100000.0 + # return emb + + def createEnv(self, fileName): + # env folder will contain folders for separate files with ll and executables + if self.mode != 'inference': + self.ENV_Dir = os.path.join(self.FileSys_Obj.PhaseOrderDir, "env") + else: + self.ENV_Dir = os.path.join( + self.FileSys_Obj.PhaseOrderDir, "inference") + + # setting current directory to point to the folder for the chosen file + self.Curr_Dir = self.ENV_Dir + "/" + os.path.splitext(fileName)[0] + logging.info("Curr_Dir {}".format(self.Curr_Dir)) + + # Creating the folder for the chosen file + self.FileSys_Obj.createFolder(self.Curr_Dir, True) + + # Copying the LL file from training folder to newly created folder + if self.mode != 'inference': + self.FileSys_Obj.copyFile(os.path.join( + self.FileSys_Obj.TrainingDataPath, fileName), self.Curr_Dir) + else: + # quiet# print("test_Benchmark {}".format(self.test_Benchmark)) + logging.info("test_Benchmark {}".format(self.test_Benchmark)) + self.FileSys_Obj.copyFile(os.path.join( + self.test_Benchmark, fileName), self.Curr_Dir) + + # Setting up different Paths and Minimum Size + self.BaseIR = os.path.join(self.Curr_Dir, fileName) + self.baseBinarySize, self.minBinarySize = self.getBinarySize( + self.BaseIR, True) + self.lastBinarySize = self.baseBinarySize + + self.CurrIR = os.path.join(self.Curr_Dir, fileName) + self.prev_action = None + + def reset(self, test_file=None, embedding=None): + self.BaseIR = None + self.CurrIR = None + self.Curr_Dir = None + self.minBinarySize = None + self.lastBinarySize = 0 + self.baseBinarySize = None + self.embedding = None + self.stub = None + self.StateIndex = 0 + self.cur_action_mask = [1] * self.action_space_size + + if self.mode != 'inference': + logging.info("Number of files {}".format(len(self.Obs))) + if (len(self.Obs) >= 1): + + index = np.random.random_integers(0, len(self.Obs) - 1) + + self.serverId = self.startServer( + self.Obs[index], "127.0.0.1:" + str(self.server_port)) + # print("Server started at pid:", self.serverId) + + if self.use_grpc and self.compiler_interface is None: + self.compiler_interface = GrpcCompilerInterface(mode='client', stub_class=posetRL_pb2_grpc.PosetRLServiceStub, hostip='127.0.0.1', hostport= self.server_port) + + self.createEnv(self.Obs[index]) + self.doneList.append(self.Obs[index]) + self.Obs.remove(self.Obs[index]) + if (len(self.Obs) == 0): + self.Obs = self.doneList.copy() + self.doneList.clear() + self.iteration_counter += 1 + self.rename_Dir = True + + else: + if not self.use_pipe and not self.use_grpc: + self.Obs = test_file + logging.info("test_file {}".format(test_file)) + index = np.random.random_integers(0, len(self.Obs) - 1) + logging.info("Obs {}".format(index)) + self.createEnv(test_file) + + + # Opening pipe files + if self.use_pipe: + if self.is_init: + self.compiler_interface.reset_pipes() + self.is_init = False + + result = self.readObservation() # DEBUG + + if result is None: + raise + else: + self.embedding = result + elif self.use_grpc: + if self.mode == 'inference': + self.embedding = np.array(embedding) + else: + self.embedding = self.stable_grpc("Action", 0) # LLVMgRPC way + # else: + # self.embedding = self.getEmbedding(self.BaseIR) + + action_mask = [1] * self.action_space_size + next_observation = {'action_mask': np.array( + action_mask), 'state': self.embedding} + self.cur_obs = next_observation + + return next_observation + + def readObservation(self): + embedding = np.empty([300]) + features = self.compiler_interface.evaluate() + + + if self.data_format == "bytes": + for i in range(len(features[0])): + embedding[i] = features[0][i] + elif self.data_format == "json": + for i in range(len(features["embedding"])): + embedding[i] = features["embedding"][i] + + return embedding + + + + def sendResponse(self, value: Union[int, float]): + self.compiler_interface.populate_buffer(int(value)) + + def getBinarySize(self, IRFile, init=False): + fileName = os.path.splitext(os.path.basename(IRFile))[0] + minBinarySize = 0 + baseBinarySize = 0 + if (init): + # Compute O0 Binary size + command = self.FileSys_Obj.ClangPath + " " + self.clang_arch_flag + " -c " + \ + self.Curr_Dir + "/" + fileName + ".ll -o " + \ + self.Curr_Dir + "/" + "base_binary.o" + f" -mllvm -ml-config-path={CONFIG_DIR}" + # print("O0 binary object compile command: "+command) + os.system(command) + baseBinarySize = os.path.getsize(self.Curr_Dir + "/base_binary.o") + logging.info("base {}".format(baseBinarySize)) + + # Compute Oz Binary size + command = self.FileSys_Obj.OptPath + " " + self.opt_arch_flag + " -S -add-size-attr --enableMinSizeAttr --removeNoInlineAttr " + \ + self.Curr_Dir + "/" + fileName + ".ll -o " + \ + self.Curr_Dir + "/" + fileName + ".ll" + f"-ml-config-path={CONFIG_DIR} " + command = self.FileSys_Obj.OptPath + " " + self.opt_arch_flag + " -S -Oz " + \ + self.Curr_Dir + "/" + fileName + ".ll -o " + \ + self.Curr_Dir + "/" + fileName + "_Oz.ll" + f" -ml-config-path={CONFIG_DIR} " + os.system(command) + command = self.FileSys_Obj.ClangPath + " " + self.clang_arch_flag + " -c " + \ + self.Curr_Dir + "/" + fileName + "_Oz.ll -o " + \ + self.Curr_Dir + "/" + "Oz_binary.o" + f" -mllvm -ml-config-path={CONFIG_DIR} " + os.system(command) + minBinarySize = os.path.getsize(self.Curr_Dir + "/Oz_binary.o") + + # Get Oz MCA Throughput + self.OzMcaThroughtput = self.getMCACost( + self.Curr_Dir + "/" + fileName + "_Oz") + logging.info("base {}".format(self.OzMcaThroughtput)) + + return baseBinarySize, minBinarySize + + # Get next action (sub-sequence) to be applied on the LLVM IR + def step(self, action_index): + prev_embedding = self.embedding + + Reward = 0 + done = False + # Get embedding for New IR + # here we can use gRPC server to get the new embeddings + # self.embedding = self.applyActionGetEmbeddings(action=action_index) + + # make call to compiler to get the updated embedding + if self.mode == 'inference' and self.use_grpc: + pass + else: + # if self.use_pipe or self.use_grpc: + # result = self.compiler_interface.evaluate() + if self.use_pipe: + self.sendResponse(action_index) + result = self.readObservation() + elif self.use_grpc: + result = self.stable_grpc("Action", action_index) # LLVMgRPC way + # else: + # Reward, NextStateIR = self.getLocalReward(action_index) + # result = self.getEmbedding(NextStateIR) + # self.CurrIR = NextStateIR + if result is None: + raise Exception("result is None") + else: + self.embedding = result + + self.cur_action_mask[action_index] = 0 + self.action_count += 1 + self.cur_action_seq.append(action_index) + next_observation = {'action_mask': np.array( + self.cur_action_mask), 'state': self.embedding} + self.cur_obs = next_observation + + # Max number of actions (optimaztions sub-sequences) to be applied + if self.action_count >= 34: + done = True + logging.info(self.cur_action_seq) + if self.mode == 'inference': + # Write pass sequence to actionfile + with open('actionlist.txt', 'a') as actionfile: + act_flag = 0 + actionfile.write('[') + for act_idx in self.cur_action_seq: + if act_flag == 1: + actionfile.write('-'+str(act_idx)) + else: + act_flag = 1 + actionfile.write(str(act_idx)) + actionfile.write('] ') + + if self.mode != 'inference': + if not self.use_pipe: + # self.stable_grpc("Exit", None) + try: + outs, errs = self.server_pid.communicate(timeout=5) + except: + self.serverId.kill() + print("Clang failing") + + Reward = self.getReward(self.assembly_file_path) + if self.use_pipe: + self.sendResponse(-1) # self.populate_buffer(-1) + self.compiler_interface.evaluate('exit') + + if self.mode != "inference": + Reward = self.getReward(self.assembly_file_path) + # else: + # self.compiler_interface.reset_pipes() + + self.cur_action_seq = [] + self.action_count = 0 + logging.info("Reward {}".format(Reward)) + logging.info("Action {}".format(action_index)) + logging.info("done {}".format(done)) + + return next_observation, Reward, done, {} + + # Get llvm-mca Block RThroughput for the IR + def getMCACost(self, new_file): + cmd1 = self.FileSys_Obj.LlcPath + " " + self.opt_arch_flag + \ + " " + new_file + ".ll" + " -o " + new_file + ".s" + f" -ml-config-path={CONFIG_DIR}" + os.system(cmd1) + cmd2 = self.FileSys_Obj.MCAPath + " " + \ + self.opt_arch_flag + " " + new_file + ".s" + pro = subprocess.Popen(cmd2, executable='/bin/bash', shell=True, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf8') + Output_cmd2 = pro.stdout + + line = Output_cmd2.readline() + if pro.stderr is not None: + logging.critical('Error : {}'.format(pro.stderr)) + if self.use_pipe: + currMcaThroughtput = 0 + while line: + pair = line.split(':') + if pair[0] == 'Block RThroughput': + currMcaThroughtput = float(pair[1].strip(' ')) + line = Output_cmd2.readline() + + logging.info("LLVM-MCA command: {}".format(cmd2)) + + return currMcaThroughtput + + # Get reward for an action +<<<<<<< HEAD + def getLocalReward(self, action): + self.StateIndex += 1 + fileName = os.path.splitext(os.path.basename(self.BaseIR))[0] + + logging.info("fileName {}".format(fileName)) + logging.info("StateIndex {}".format(self.StateIndex)) + logging.info("BaseIR {}".format(self.CurrIR)) + + # Modified IR path + new_IR = self.Curr_Dir + "/" + fileName + \ + "_" + str(self.StateIndex) + ".ll" + new_file = self.Curr_Dir + "/" + fileName + "_" + str(self.StateIndex) + + # Applying the action and saving the IR file as _ + # Here we can use gRPC server to apply the action + command = self.FileSys_Obj.OptPath + " " + self.opt_arch_flag + \ + " -S -O34 -SubNum=" + str(action) + " " + \ + self.CurrIR + " -o " + new_IR + f" -ml-config-path={CONFIG_DIR}" + os.system(command) + command = self.FileSys_Obj.ClangPath + " " + \ + self.clang_arch_flag + " -c " + new_IR + " -o " + new_file + ".o" + f" -mllvm -ml-config-path={CONFIG_DIR}" + os.system(command) + # Size reward + currBinarySize = os.path.getsize(new_file + ".o") + + logging.info("lastBinarySize {}".format(self.lastBinarySize)) + logging.info("currBinarySize {}".format(currBinarySize)) + + if ((self.baseBinarySize - self.minBinarySize) > 0): + reward_binarySize = (self.lastBinarySize - currBinarySize) / \ + (self.baseBinarySize - self.minBinarySize) + else: + reward_binarySize = (self.lastBinarySize - + currBinarySize) / self.baseBinarySize + + self.lastBinarySize = currBinarySize + + # Throughput reward + currMcaThroughtput = self.getMCACost(new_file) + logging.info("currMcaThroughtput: {}".format(currMcaThroughtput)) + logging.info("OzMcaThroughtput: {}".format(self.OzMcaThroughtput)) + logging.info("lastMcaThroughtput: {}".format(self.lastMcaThroughtput)) + + if self.lastMcaThroughtput is None: + mca_cost = (self.OzMcaThroughtput - + currMcaThroughtput) / self.OzMcaThroughtput + else: + mca_cost = (self.lastMcaThroughtput - + currMcaThroughtput) / self.OzMcaThroughtput + + self.lastMcaThroughtput = currMcaThroughtput + + logging.info("Thr-debug:{}".format(mca_cost)) + logging.info("Size-debug:{}".format(reward_binarySize)) + + # Reward thresholds + if mca_cost > self.mca_reward_thresh: + mca_cost = self.mca_reward_thresh + elif mca_cost < -self.mca_reward_thresh: + mca_cost = -self.mca_reward_thresh + + if reward_binarySize > self.size_reward_thresh: + reward_binarySize = self.size_reward_thresh + elif reward_binarySize < -self.size_reward_thresh: + reward_binarySize = -self.size_reward_thresh + + # Cumulative reward with alpha and beta hyperparameters + reward = self.alpha*reward_binarySize + self.beta*mca_cost + + return reward, new_IR +======= + # def getLocalReward(self, action): + # self.StateIndex += 1 + # fileName = os.path.splitext(os.path.basename(self.BaseIR))[0] + + # logging.info("fileName {}".format(fileName)) + # logging.info("StateIndex {}".format(self.StateIndex)) + # logging.info("BaseIR {}".format(self.CurrIR)) + + # # Modified IR path + # new_IR = self.Curr_Dir + "/" + fileName + \ + # "_" + str(self.StateIndex) + ".ll" + # new_file = self.Curr_Dir + "/" + fileName + "_" + str(self.StateIndex) + + # # Applying the action and saving the IR file as _ + # # Here we can use gRPC server to apply the action + # command = self.FileSys_Obj.OptPath + " " + self.opt_arch_flag + \ + # " -S -O34 -SubNum=" + str(action) + " " + \ + # self.CurrIR + " -o " + new_IR + # os.system(command) + # command = self.FileSys_Obj.ClangPath + " " + \ + # self.clang_arch_flag + " -c " + new_IR + " -o " + new_file + ".o" + # os.system(command) + # # Size reward + # currBinarySize = os.path.getsize(new_file + ".o") + + # logging.info("lastBinarySize {}".format(self.lastBinarySize)) + # logging.info("currBinarySize {}".format(currBinarySize)) + + # if ((self.baseBinarySize - self.minBinarySize) > 0): + # reward_binarySize = (self.lastBinarySize - currBinarySize) / \ + # (self.baseBinarySize - self.minBinarySize) + # else: + # reward_binarySize = (self.lastBinarySize - + # currBinarySize) / self.baseBinarySize + + # self.lastBinarySize = currBinarySize + + # # Throughput reward + # currMcaThroughtput = self.getMCACost(new_file) + # logging.info("currMcaThroughtput: {}".format(currMcaThroughtput)) + # logging.info("OzMcaThroughtput: {}".format(self.OzMcaThroughtput)) + # logging.info("lastMcaThroughtput: {}".format(self.lastMcaThroughtput)) + + # if self.lastMcaThroughtput is None: + # mca_cost = (self.OzMcaThroughtput - + # currMcaThroughtput) / self.OzMcaThroughtput + # else: + # mca_cost = (self.lastMcaThroughtput - + # currMcaThroughtput) / self.OzMcaThroughtput + + # self.lastMcaThroughtput = currMcaThroughtput + + # logging.info("Thr-debug:{}".format(mca_cost)) + # logging.info("Size-debug:{}".format(reward_binarySize)) + + # # Reward thresholds + # if mca_cost > self.mca_reward_thresh: + # mca_cost = self.mca_reward_thresh + # elif mca_cost < -self.mca_reward_thresh: + # mca_cost = -self.mca_reward_thresh + + # if reward_binarySize > self.size_reward_thresh: + # reward_binarySize = self.size_reward_thresh + # elif reward_binarySize < -self.size_reward_thresh: + # reward_binarySize = -self.size_reward_thresh + + # # Cumulative reward with alpha and beta hyperparameters + # reward = self.alpha*reward_binarySize + self.beta*mca_cost + + # return reward, new_IR +>>>>>>> 9f81943cf9d3... Removing ir2vec related dependencies and recursive code cleanup + + def getReward(self, AssemblyFilePath): + # object size reward + objectFilePath = f"{self.temporaryDirectory}/objectfile_{self.worker_index}.o" + objectFileGenerationCommand = self.FileSys_Obj.ClangPath + " -c " + \ + self.clang_arch_flag + " " + AssemblyFilePath + " -o " + objectFilePath + f" -mllvm -ml-config-path={CONFIG_DIR}" + + os.system(objectFileGenerationCommand) + + currentBinarySize = os.path.getsize(objectFilePath) + + if ((self.baseBinarySize - self.minBinarySize) > 0): + reward_binarySize = (self.lastBinarySize - currentBinarySize) / \ + (self.baseBinarySize - self.minBinarySize) + else: + reward_binarySize = (self.lastBinarySize - + currentBinarySize) / self.baseBinarySize + + self.lastBinarySize = currentBinarySize + + llvmMcaCommand = f"{self.FileSys_Obj.MCAPath} {self.opt_arch_flag} {AssemblyFilePath}" #+ " -ml-config-path={CONFIG_DIR}" + pro = subprocess.Popen(llvmMcaCommand, executable='/bin/bash', shell=True, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf8') + + Output_cmd2 = pro.stdout + + line = Output_cmd2.readline() + if pro.stderr is not None: + logging.critical('Error : {}'.format(pro.stderr)) + + while line: + pair = line.split(':') + if pair[0] == 'Block RThroughput': + currMcaThroughtput = float(pair[1].strip(' ')) + line = Output_cmd2.readline() + + logging.info("currMcaThroughtput: {}".format(currMcaThroughtput)) + logging.info("OzMcaThroughtput: {}".format(self.OzMcaThroughtput)) + logging.info("lastMcaThroughtput: {}".format(self.lastMcaThroughtput)) + + if self.lastMcaThroughtput is None: + mca_cost = (self.OzMcaThroughtput - + currMcaThroughtput) / self.OzMcaThroughtput + else: + mca_cost = (self.lastMcaThroughtput - + currMcaThroughtput) / self.OzMcaThroughtput + + self.lastMcaThroughtput = currMcaThroughtput + + logging.info("Thr-debug:{}".format(mca_cost)) + logging.info("Size-debug:{}".format(reward_binarySize)) + + # Reward thresholds + if mca_cost > self.mca_reward_thresh: + mca_cost = self.mca_reward_thresh + elif mca_cost < -self.mca_reward_thresh: + mca_cost = -self.mca_reward_thresh + + if reward_binarySize > self.size_reward_thresh: + reward_binarySize = self.size_reward_thresh + elif reward_binarySize < -self.size_reward_thresh: + reward_binarySize = -self.size_reward_thresh + + # Cumulative reward with alpha and beta hyperparameters + reward = self.alpha*reward_binarySize + self.beta*mca_cost + + return reward + + def set_config(path): + global config_path + config_path = path + return config_path + + def startServer(self, filename, ip): + optPath = f"{BUILD_DIR}/bin/opt" + clangPath = f"{BUILD_DIR}/bin/clang" + filepath = self.train_Dir + "/" + filename + newfilepath = self.assembly_file_path + data_format = self.data_format + + cmd = f"{clangPath} -S -mllvm --OPosetRL -mllvm -ml-config-path={CONFIG_DIR} -mllvm --training -mllvm -data-format={data_format} -mllvm --server_address={ip} {filepath} -o {newfilepath}" + if self.use_pipe: + cmd = cmd + " -mllvm -use-pipe" + pid = subprocess.Popen(cmd, executable='/bin/bash', + shell=True, preexec_fn=os.setsid, stdin=subprocess.PIPE, text=True) + + return pid + + def repeatedgRPCFieldToNumpyArray(self, gRPCObj): + jsonObj = MessageToJson(gRPCObj) + dictObj = json.loads(jsonObj) + array = dictObj['embedding'] + return np.array(array) + + def applyActionGetEmbeddings(self, action): + request = posetRL_pb2.ActionRequest(action=action) + + self.compiler_interface.populate_buffer(request) + response = self.compiler_interface.evaluate() + # response = self.stub.applyActionGetEmbeddings(request) + return self.repeatedgRPCFieldToNumpyArray(response) + + def stopServer(self): +<<<<<<< HEAD + request = posetRL_pb2.ActionRequest(action=-1) + self.compiler_interface.populate_buffer(request) + self.compiler_interface.evaluate() + # self.stub.applyActionGetEmbeddings(request) + +======= + self.serverId.stdin.write("Terminate\n") + self.serverId.stdin.flush() + try: + out, errs = self.serverId.communicate(timeout=15) + except: + self.serverId.kill() + out, errs = self.serverId.communicate() + print("Force Stop") + +>>>>>>> 146ca72982db... Changed the StopServer logic to the new logic of sending the text to stdin + def stable_grpc(self, op, action): + attempt = 0 + max_retries = 5 + retry_wait_seconds = 0.1 + retry_wait_backoff_exponent = 1.5 + + result = None + while True: + try: + t1 = time.time() + if op != "Exit": + result = self.applyActionGetEmbeddings(action=action) + else: + result = self.stopServer() + t2 = time.time() + self.grpc_rtt += t2-t1 + break + except grpc.RpcError as e: + + if e.code() == grpc.StatusCode.UNAVAILABLE: + # print("Error in grpc") + # if op == 'Exit' and self.last_task_done == 0: + # raise + attempt += 1 + if attempt > max_retries: + print("Maximum attempts completed") + return None + # raise #ServiceTransportError( f"{self.url} {e.details()} ({max_retries} retries)") from None + remaining = max_retries - attempt + time.sleep(retry_wait_seconds) + retry_wait_seconds *= retry_wait_backoff_exponent + else: + if self.mode != 'inference': + print("Unknown error", e.code()) + return None + else: + raise + return result diff --git a/model/POSET-RL/src/Filesystem.py b/model/POSET-RL/src/Filesystem.py new file mode 100755 index 000000000000..f29c29da694a --- /dev/null +++ b/model/POSET-RL/src/Filesystem.py @@ -0,0 +1,38 @@ +# Defines filesystem utilities + +import os +import shutil + +class fsystem: + # Get paths relative to LLVM and IR2Vec directories + def __init__(self, LLVMPath="", IR2Vec=""): + self.LLVMPath = os.path.abspath(LLVMPath) + self.OptPath = os.path.join(self.LLVMPath, "bin", "opt") + self.MCAPath = os.path.join(self.LLVMPath, "bin", "llvm-mca") + self.AddOptAttr = os.path.join(self.LLVMPath, "add-size-attr.so") + self.ClangPath = os.path.join(self.LLVMPath, "bin", "clang") + self.LlcPath = os.path.join(self.LLVMPath, "bin" , "llc") + self.IR2Vec = os.path.abspath(IR2Vec) + self.SeedEmbeddingPath = os.path.join(self.IR2Vec, "seedEmbeddingVocab-300-llvm10.txt") + self.IR2VecBin = os.path.join(self.IR2Vec, "ir2vec") + self.TrainingDataPath = None + self.PhaseOrderDir = os.getcwd() + + self.LLFileList = [] + + def generateTrainingData(self, path): + self.TrainingDataPath = os.path.join(self.PhaseOrderDir,path) + for file in os.listdir(self.TrainingDataPath): + self.LLFileList.append(file) + + def createFolder(self, path, new=False): + path = os.path.join(self.PhaseOrderDir,path) + if(not os.path.exists(path)): + os.mkdir(path) + elif(os.path.exists(path)): + if(new): + shutil.rmtree(path) + os.mkdir(path) + + def copyFile(self, src, dest): + shutil.copy(src, dest) diff --git a/model/POSET-RL/src/collect-results.sh b/model/POSET-RL/src/collect-results.sh new file mode 100755 index 000000000000..10fca844ed8a --- /dev/null +++ b/model/POSET-RL/src/collect-results.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Usage: bash collect-results.sh +# Prints size, throughput and pass sequence statistics to a csv file +INFERENCE_DIR=$1 +LLVM_BUILD_DIR=$2 +OUTPUT=$3 +echo "Filename, Oz .o size, Oz RThr, .o < Oz, Num for min size, Pass sequence, Model .o size, Model RThr, Complete .o, Complete RThr" > $OUTPUT +for dir in $INFERENCE_DIR/*; do + if [ -d $dir ] + then + echo -n "${dir##*/}, " >> $OUTPUT + if [ ! -f "${dir}/Oz_binary.o" ] + then + echo -n "No *_Oz.ll, " >> $OUTPUT + else + echo -n "$(du -b ${dir}/Oz_binary.o | awk '{print $1}'), " >> $OUTPUT + $LLVM_BUILD_DIR/bin/llc "${dir}/${dir##*/}_Oz.ll" -o "${dir##*/}_Oz.s" + echo -n "$($LLVM_BUILD_DIR/bin/llvm-mca ${dir##*/}_Oz.s | head -15 | grep 'Block RThroughput' | cut -f 2 -d':' | awk '{print $1}'), " >> $OUTPUT + rm "${dir##*/}_Oz.s" + fi + num="" + minsize=-1 + for i in $(seq 1 15); do + if [ -f "${dir}/${dir##*/}_${i}.o" ] + then + size=$(du -b ${dir}/${dir##*/}_${i}.o | awk '{print $1}') + if [[ $minsize -lt '0' ]] || [[ $minsize -ge $size ]] + then + num=$i + minsize=$size + fi + else + echo "No *_.ll" + fi + done + actionseq=$(cat actionlist.txt | grep -w "${dir##*/}.ll" | cut -f 1 -d ' ') + echo -n ", ${num}, ${actionseq}, ${minsize}, " >> $OUTPUT + echo -n "$($LLVM_BUILD_DIR/bin/llvm-mca ${dir}/${dir##*/}_${num}.s | head -15 | grep 'Block RThroughput' | cut -f 2 -d':' | awk '{print $1}')" >> $OUTPUT + if [ -f "${dir}/${dir##*/}_15.o" ] + then + size=$(du -b ${dir}/${dir##*/}_15.o | awk '{print $1}') + else + echo "No *_.ll" + fi + echo -n ", ${size}, " >> $OUTPUT + echo "$($LLVM_BUILD_DIR/bin/llvm-mca ${dir}/${dir##*/}_15.s | head -15 | grep 'Block RThroughput' | cut -f 2 -d':' | awk '{print $1}')" >> $OUTPUT + fi +done diff --git a/model/POSET-RL/src/experiment.py b/model/POSET-RL/src/experiment.py new file mode 100755 index 000000000000..ae57a1440d86 --- /dev/null +++ b/model/POSET-RL/src/experiment.py @@ -0,0 +1,172 @@ +# Train RLLib model +# For more details on RLLib: https://docs.ray.io/en/latest/rllib/index.html +# Usage: python experiment.py --llvm_dir \ +# --ir2vec_dir \ +# --train_dir \ +# --train-iterations +# [--isAArch] +# --log-dir +# --alpha +# --beta +# --size_reward_thresh +# --mca_reward_thresh +# Example: python experiment.py --llvm_dir POSET_RL/llvm-project-10/build \ +# --ir2vec_dir POSET_IR2Vec \ +# --train_dir train_ll \ +# --train-iterations 100 \ +# --isAArch \ +# --log-dir 10-5-0.2-0.2-aarch \ +# --alpha 10 \ +# --beta 5 \ +# --size_reward_thresh 0.2 \ +# --mca_reward_thresh 0.2 + +import argparse +import os + +import ray +from ray import tune +from ray.rllib.agents.dqn import DQNTrainer, DEFAULT_CONFIG +#from Environment_1 import PhaseOrder +from Environment_pipe import PhaseOrder +from ray.rllib.models import ModelCatalog +from model import CustomPhaseOrderModel +from po_config import BUILD_DIR, MODEL_DIR + +from Filesystem import * + +import logging +#import utils + +parser = argparse.ArgumentParser() +parser.add_argument("-llvm", "--llvm_dir", required=True, help = "path to llvm-build directory") +parser.add_argument("-ir2vec", "--ir2vec_dir", required=True, help = "path to IR2vec directory which has seed embedding and IR2Vec binary files") +parser.add_argument("-train", "--train_dir", required=True, help = "path to directory with LLVM IR files for training") +parser.add_argument("-iter", "--train-iterations", required=False, type=int, default=300) +parser.add_argument("-a", "--isAArch", required=False, default=False, action='store_true') +parser.add_argument("-log", "--log_dir", required=False, type=str, default="0.2thresh-10alpha-5beta-x86") +parser.add_argument("-alpha", "--alpha", required=False, type=float, default=10) +parser.add_argument("-beta", "--beta", required=False, type=float, default=5) +parser.add_argument("-size_reward_thresh", "--size_reward_thresh", required=False, type=float, default=0.2) +parser.add_argument("-mca_reward_thresh", "--mca_reward_thresh", required=False, type=float, default=0.2) +parser.add_argument("--use_pipe", action='store_true', help = "Use pipe communication", required=False, default=False) +parser.add_argument("--use_grpc", action='store_true', help = "Use grpc communication", required=False, default=False) +parser.add_argument("--pipe_name",type=str,help="String Pipe name", default='posetrl_pipe') +parser.add_argument( + "--data_format", + type=str, + choices=["json", "protobuf", "bytes"], + help="Data format to use for communication", +) +parser.add_argument("--server_port", type=str, help="Server port", default=50051) + +# Use for resuming training from checkpoint +checkpoint = None + +def experiment(config): + iterations = config.pop("train-iterations") + global checkpoint + train_results = {} + print(config) + train_agent = DQNTrainer(config=config, env=PhaseOrder) + if checkpoint is not None: + train_agent.restore(checkpoint) + + for i in range(iterations): + train_results = train_agent.train() + if i == iterations - 1 or i%10 == 0: + tune.report(**train_results) + checkpoint = train_agent.save(tune.get_trial_dir()) + # train_agent.export_policy_model("/home/cs20btech11018/repos/ML-Phase-Ordering/RLLib-PhaseOrder/poset-RL-onnx-model", onnx=int(os.getenv("ONNX_OPSET", "11"))) + # break + + train_agent.stop() + +if __name__ == '__main__': + args = parser.parse_args() + logger = logging.getLogger("__file__") + log_level = logging.DEBUG + + if os.path.exists("running.log"): + os.remove("running.log") + logging.basicConfig(filename='running.log', format='%(levelname)s - %(filename)s - %(message)s', level=log_level) + logging.info('Starting training') + logging.info(args) + + ray.init() + default_config = DEFAULT_CONFIG.copy() + + cfg = { + "hiddens": [], + "dueling": False, + } + + ModelCatalog.register_custom_model("My_torch_model", CustomPhaseOrderModel) + target_arch = "AArch64" if args.isAArch else "X86" + # Define model and environment config with below hyperparameters + config = dict( + { + "model": { + "custom_model": "My_torch_model", + "custom_model_config": { + "state_size": 300, + "fc1_units": 64, + "fc2_units": 64 + }, + }, + "env": PhaseOrder, + "lr": 0.0001, + "env_config": { + "target": target_arch, + "state_size": 300, + "mode": "train", + "dump_type": "One", + "intermediate_data": "./temp", + "llvm_dir": args.llvm_dir, + "ir2vec_dir": args.ir2vec_dir, + "train_dir": args.train_dir, + "alpha": args.alpha, + "beta": args.beta, + "size_reward_thresh": args.size_reward_thresh, + "mca_reward_thresh": args.mca_reward_thresh, + "action_space_size": 34, + "use_pipe": args.use_pipe, + "data_format": args.data_format, + "use_grpc": args.use_grpc, + "server_port": args.server_port, + "pipe_name": args.pipe_name + }, + "train_batch_size": 512, + "exploration_config": { + "type": "EpsilonGreedy", + "initial_epsilon": 1.0, + "final_epsilon": 0.01, + "epsilon_timesteps": 20000, + }, + # "horizon":10, + "framework": "torch", + "train-iterations": args.train_iterations, + "batch_mode": "truncate_episodes", + "seed": 1, + "num_gpus": 0, + "num_workers": 1, + "num_gpus_per_worker": 0 + }, + **cfg) + # config = dict(config,**default_config) + config["timesteps_per_iteration"] = 90 + + if args.use_grpc: + experiment_name = "grpc_results" + elif args.use_pipe: + experiment_name = f"pipe_{args.data_format}_results" + else: + experiment_name = "orignal_run_results" + + #Start model training with given config + tune.run( + experiment, + config=config, + resources_per_trial=DQNTrainer.default_resource_request(config), + local_dir=(MODEL_DIR + "/checkpoint_dir"), + name=experiment_name) # name=args.log_dir diff --git a/model/POSET-RL/src/inference.py b/model/POSET-RL/src/inference.py new file mode 100755 index 000000000000..1e036d322992 --- /dev/null +++ b/model/POSET-RL/src/inference.py @@ -0,0 +1,289 @@ +# Script to perform inference on test LLVM IR files +# Use run-inference.sh to call this script +# Usage: python inference.py --ir2vec_dir \ +# --test_dir \ +# --model \ +# [--isAArch] +# --alpha +# --beta +# --size_reward_thresh +# --mca_reward_thresh +# Example: python inference.py --ir2vec_dir POSET-RL/IR2Vec \ +# --test_dir test_ll \ +# --model POSET_RL/saved_models/model \ +# [--isAArch] +# --alpha 10 +# --beta 5 +# --size_reward_thresh 0.2 +# --mca_reward_thresh 0.2 + +import argparse +import numpy as np +import argparse +import os + +# import utils +import logging +import time + +import ray +from ray import tune +from ray.rllib.agents import ppo +from ray.rllib.agents import dqn +from ray.rllib.agents.dqn import DQNTrainer, DEFAULT_CONFIG +from Environment_pipe import PhaseOrder +from ray.rllib.models import ModelCatalog +from model import CustomPhaseOrderModel +from ray.tune.registry import register_env +from datetime import datetime +from po_config import BUILD_DIR + +import sys +sys.path.append(f"{BUILD_DIR}/MLCompilerBridge/MLModelRunner/gRPCModelRunner/Python-Utilities") +import posetRL_pb2_grpc, posetRL_pb2 + +sys.path.append(f"{BUILD_DIR}/MLCompilerBridge/CompilerInterface/") +from GrpcCompilerInterface import GrpcCompilerInterface + +from Filesystem import * + +logger = logging.getLogger(__file__) +logging.basicConfig( + filename="inference.log", + format="%(levelname)s - %(filename)s - %(message)s", + level=logging.DEBUG, +) + +import networkx +from networkx.readwrite import json_graph +import json +import torch +import pydot + +import grpc +from concurrent import futures +import traceback + +parser = argparse.ArgumentParser() +# parser.add_argument( +# "--ir2vec_dir", +# required=False, +# help="path to IR2vec directory which has seed embedding and IR2Vec binary files", +# ) +parser.add_argument( + "--test_dir", help="Path to test directory", required=False, default="./" +) +parser.add_argument("--model", help="Path to saved checkpoint") +parser.add_argument( + "-a", "--isAArch", required=False, default=False, action="store_true" +) +parser.add_argument("-alpha", "--alpha", required=False, type=float, default=10) +parser.add_argument("-beta", "--beta", required=False, type=float, default=5) +parser.add_argument( + "-size_reward_thresh", + "--size_reward_thresh", + required=False, + type=float, + default=0.2, +) +parser.add_argument( + "-mca_reward_thresh", "--mca_reward_thresh", required=False, type=float, default=0.2 +) +parser.add_argument( + "--use_pipe", + action="store_true", + help="Use pipe communication", + required=False, + default=False, +) +parser.add_argument("--server_port", type=str, help="Server port", default=50051) +parser.add_argument( + "--data_format", + type=str, + choices=["json", "protobuf", "bytes"], + help="Data format to use for communication", +) +parser.add_argument("--pipe_name",type=str,help="String Pipe name",default="posetrl_pipe") +parser.add_argument("--use_grpc", action='store_true', help = "Use grpc communication", required=False, default=False) +parser.add_argument("--export_onnx", action="store_true", help="Export the model to ONNX") + +class PhaseOrderInference: + def __init__(self, model_path, use_pipe=False, use_grpc=False, data_format="json", export_onnx=False): + print("use_pipe {}".format(use_pipe)) + logdir = "/tmp" + logger = logging.getLogger(__file__) + logging.basicConfig( + filename="running.log", + format="%(levelname)s - %(filename)s - %(message)s", + level=logging.DEBUG, + ) + + config = DEFAULT_CONFIG.copy() + + cfg = { + "hiddens": [], + "dueling": False, + } + + ModelCatalog.register_custom_model("My_torch_model", CustomPhaseOrderModel) + target_arch = "AArch64" if args.isAArch else "X86" + # Define model and environment config + config = dict( + { + "model": { + "custom_model": "My_torch_model", + "custom_model_config": { + "state_size": 300, + "fc1_units": 64, + "fc2_units": 64, + }, + }, + "env_config": { + "target": target_arch, + "state_size": 300, + "mode": "inference", + "dump_type": "One", + "intermediate_data": "./temp", + "llvm_dir": BUILD_DIR, + # "ir2vec_dir": args.ir2vec_dir, + "test_dir": args.test_dir, + "alpha": args.alpha, + "beta": args.beta, + "size_reward_thresh": args.size_reward_thresh, + "mca_reward_thresh": args.mca_reward_thresh, + "action_space_size": 34, + "use_pipe": use_pipe, + "data_format": data_format, + "use_grpc": use_grpc, + "server_port": args.server_port, + "pipe_name": args.pipe_name, + "export_onnx": export_onnx + }, + "framework": "torch", + "explore": False, + "num_workers": 0, + "train_batch_size": 1, + }, + **cfg + ) + + def env_creator(env_config): + return PhaseOrder(env_config) + + # Create environment + register_env("Environment", env_creator) + + self.train_agent = DQNTrainer(env="Environment", config=config) + + checkpoint = model_path + # Load saved model + self.train_agent.restore(checkpoint) + + self.config = config + + # Dump the onnx model from the checkpoint + if args.export_onnx: + torch.onnx.export(self.train_agent.get_policy().model, ({"obs": torch.randn(1, 334)}, {}), export_params=True, f="/path/to/ml-llvm-project/model/POSET-RL/onnx-model/posetrl_model.onnx", verbose=True, input_names=["obs"], output_names=["output"]) + + + def dot_to_json(self, dot_): + py_dot_graph = pydot.graph_from_dot_data(dot_)[0] + graph_netx = networkx.drawing.nx_pydot.from_pydot(py_dot_graph) + graph_json = json_graph.adjacency_data(graph_netx) + return graph_json + + # Predict best optimization sequence for the given LLVM IR + def run_predict(self, test_file=None): + env = PhaseOrder(self.config["env_config"]) + + print("test_file {}".format(test_file)) + state = env.reset(test_file) + score = 0 + while True: + logging.debug("-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-^_^-") + + action = self.train_agent.compute_action(state) + print("action {}".format(action)) + + next_state, reward, done, response = env.step(action) + + logging.debug("reward : {}".format(reward)) + + state = next_state + if done: + with open("actionlist.txt", "a") as actionfile: + actionfile.write(str(test_file) + "\n") + assert response is not None, "Allocation is not preset." + break + + return reward, response + +class service_server(posetRL_pb2_grpc.PosetRLService): + def __init__(self, inference_obj): + self.inference_obj = inference_obj + self.new_file = True + self.state = None + self.env = None + self.action = None + + def getAdvice(self, request, context): + try: + done = False + if self.new_file: + self.env = PhaseOrder(self.inference_obj.config["env_config"]) + self.state = self.env.reset(embedding=request.embedding) + self.new_file = False + print("Episode Started") + else: + self.env.embedding = np.array(request.embedding) + self.state, reward, done, response = self.env.step(self.action) + if not done: + self.action = self.inference_obj.train_agent.compute_action(self.state) + reply=posetRL_pb2.ActionRequest(action=self.action.item()) + else: + reply=posetRL_pb2.ActionRequest(action=-1) + self.new_file = True + print("Episode Finished") + return reply + except: + print('Error') + traceback.print_exc() + reply=posetRL_pb2.ActionRequest(action=-1) + return reply + + + + +if __name__ == "__main__": + args = parser.parse_args() + logging.info("Start the inference....") + + ray.init() + + inference_obj = PhaseOrderInference( + args.model, args.use_pipe, args.use_grpc, args.data_format, args.export_onnx + ) + if args.use_pipe: + print("about to enter while loop...") + while True: + reward, response = inference_obj.run_predict() + elif args.use_grpc: + # ray.init() + compiler_interface = GrpcCompilerInterface(mode = 'server', add_server_method=posetRL_pb2_grpc.add_PosetRLServiceServicer_to_server, grpc_service_obj=service_server(inference_obj), hostport= args.server_port) + compiler_interface.start_server() + + else: + now = datetime.now() + date_time = now.strftime("%m-%d-%Y-%H-%M-%S") + file_name = "timetaken-spec06-posetrl-orignal-" + date_time + ".txt" + repeat_count = 3 + for file in os.listdir(args.test_dir): + f = open(file_name, "a") + count = 0 + while count < repeat_count: + start = time.time() + reward, response = inference_obj.run_predict(file) + end = time.time() + f.write("Time taken for {} is {}\n".format(file, end - start)) + count+=1 + f.close() diff --git a/model/POSET-RL/src/log_reader.py b/model/POSET-RL/src/log_reader.py new file mode 100644 index 000000000000..147aca9d4ee6 --- /dev/null +++ b/model/POSET-RL/src/log_reader.py @@ -0,0 +1,149 @@ +"""Reader for training log. + +See lib/Analysis/TrainingLogger.cpp for a description of the format. +""" +import ctypes +import dataclasses +import io +import json +import math +import sys +from typing import List, Optional +from functools import reduce +import operator +import numpy + +_element_types = { + "float": ctypes.c_float, + "double": ctypes.c_double, + "int8_t": ctypes.c_int8, + "uint8_t": ctypes.c_uint8, + "int16_t": ctypes.c_int16, + "uint16_t": ctypes.c_uint16, + "int32_t": ctypes.c_int32, + "uint32_t": ctypes.c_uint32, + "int64_t": ctypes.c_int64, + "uint64_t": ctypes.c_uint64, +} + + +@dataclasses.dataclass(frozen=True) +class TensorSpec: + name: str + port: int + shape: List[int] + element_type: type + + @staticmethod + def from_dict(d: dict): + name = d["name"] + port = d["port"] + shape = [int(e) for e in d["shape"]] + element_type_str = d["type"] + if element_type_str not in _element_types: + raise ValueError(f"uknown type: {element_type_str}") + return TensorSpec( + name=name, + port=port, + shape=shape, + element_type=_element_types[element_type_str], + ) + + +class TensorValue: + def __init__(self, spec: TensorSpec, buffer: bytes): + self._spec = spec + self._buffer = buffer + self._view = ctypes.cast(self._buffer, ctypes.POINTER(self._spec.element_type)) + # self._len = math.prod(self._spec.shape) + self._len = reduce(operator.mul, self._spec.shape, 1) + # self._view = numpy.frombuffer(self._buffer, float) + # print("Value of", self._spec.name, "is:", self._view) + + def spec(self) -> TensorSpec: + return self._spec + + def __len__(self) -> int: + return self._len + + def __getitem__(self, index): + if index < 0 or index >= self._len: + raise IndexError(f"Index {index} out of range [0..{self._len})") + return self._view[index] + + +def read_tensor(fs: io.BufferedReader, ts: TensorSpec) -> TensorValue: + size = reduce(operator.mul, ts.shape, 1) * ctypes.sizeof(ts.element_type) + # size = math.prod(ts.shape) * ctypes.sizeof(ts.element_type) + data = fs.read(size) + return TensorValue(ts, data) + + +def pretty_print_tensor_value(tv: TensorValue): + print(f'{tv.spec().name}: {",".join([str(v) for v in tv])}') + + +def read_header(f: io.BufferedReader): + line = f.readline() + header = json.loads(line) + tensor_specs = [TensorSpec.from_dict(ts) for ts in header["features"]] + score_spec = TensorSpec.from_dict(header["score"]) if "score" in header else None + advice_spec = TensorSpec.from_dict(header["advice"]) if "advice" in header else None + return tensor_specs, score_spec, advice_spec + + +def read_one_observation( + context: Optional[str], + event_str: str, + f: io.BufferedReader, + tensor_specs: List[TensorSpec], + score_spec: Optional[TensorSpec], +): + features = [] + for ts in tensor_specs: + features.append(read_tensor(f, ts)) + f.readline() + return context, None, features, None + + +def read_stream(fname: str): + with io.BufferedReader(io.FileIO(fname, "rb")) as f: + tensor_specs, score_spec, _ = read_header(f) + context = None + while True: + event_str = f.readline() + if not event_str: + break + context, observation_id, features, score = read_one_observation( + context, event_str, f, tensor_specs, score_spec + ) + yield context, observation_id, features, score + +def read_stream2(f: io.BufferedReader): + context = None + while True: + tensor_specs, score_spec, _ = read_header(f) + # event_str = f.readline() + # print("Event: ", event_str) + # if not event_str: + # break + context, observation_id, features, score = read_one_observation( + context, '', f, tensor_specs, score_spec + ) + yield context, observation_id, features, score + +def main(args): + last_context = None + for ctx, obs_id, features, score in read_stream(args[1]): + if last_context != ctx: + print(f"context: {ctx}") + last_context = ctx + print(f"observation: {obs_id}") + for fv in features: + pretty_print_tensor_value(fv) + if score: + pretty_print_tensor_value(score) + + +if __name__ == "__main__": + main(sys.argv) diff --git a/model/POSET-RL/src/model.py b/model/POSET-RL/src/model.py new file mode 100755 index 000000000000..20416a2b9618 --- /dev/null +++ b/model/POSET-RL/src/model.py @@ -0,0 +1,41 @@ +# RL model definition + +import torch +import torch.nn as nn + +import ray +from ray.rllib.models import ModelCatalog +from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 +from ray.rllib.utils.torch_ops import FLOAT_MIN + +import os +import logging +import pandas as pd + +logger = logging.getLogger(__file__) +logging.basicConfig(filename='running.log', format='%(levelname)s - %(filename)s - %(message)s', level=logging.DEBUG) + +class CustomPhaseOrderModel(TorchModelV2, nn.Module): + def __init__(self, obs_space, action_space, num_outputs, model_config, name, **customized_model_kwargs): + TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name, **customized_model_kwargs) + nn.Module.__init__(self) + custom_config = model_config["custom_model_config"] + self.model = nn.Sequential( + nn.Linear(custom_config["state_size"], custom_config["fc1_units"]), + nn.ReLU(), + nn.Linear(custom_config["fc1_units"], custom_config["fc1_units"]), + nn.ReLU(), + nn.Linear(custom_config["fc2_units"], num_outputs) + ) + + def forward(self, input_dict, state, seq_lens): + + model_out = self.model( + input_dict["obs"]["state"] + ) + + mask = input_dict["obs"]["action_mask"] + inf_mask = torch.clamp(torch.log(mask), min=FLOAT_MIN) + model_out = model_out + inf_mask + + return model_out, state diff --git a/model/POSET-RL/src/onnx.py b/model/POSET-RL/src/onnx.py new file mode 100644 index 000000000000..2575b6d9dfd4 --- /dev/null +++ b/model/POSET-RL/src/onnx.py @@ -0,0 +1,4 @@ +import onnx +model_path = "/home/cs20mtech12003/ML-Phase-Ordering/RLLib-PhaseOrder/poset-RL-onnx-model/model.onnx" +model = onnx.load(model_path) +onnx.checker.check_model(model) diff --git a/model/POSET-RL/src/po_config.py b/model/POSET-RL/src/po_config.py new file mode 100644 index 000000000000..7ae280db3f0b --- /dev/null +++ b/model/POSET-RL/src/po_config.py @@ -0,0 +1,5 @@ +from decouple import config + +CONFIG_DIR = config('CONFIG_DIR') +BUILD_DIR = config('BUILD_DIR') +MODEL_DIR = config('MODEL_DIR')