graphcore-research
diff --git a/‎.devcontainer.json‎
Lines changed: 24 additions & 0 deletions b/‎.devcontainer.json‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎.dockerignore‎
Lines changed: 2 additions & 0 deletions b/‎.dockerignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/ci-ipu.yaml‎
Lines changed: 0 additions & 34 deletions b/‎.github/workflows/ci-ipu.yaml‎
Lines changed: 0 additions & 34 deletions
diff --git a/‎.github/workflows/ci.yaml‎
Lines changed: 8 additions & 6 deletions b/‎.github/workflows/ci.yaml‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎Dockerfile‎
Lines changed: 39 additions & 0 deletions b/‎Dockerfile‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 16 additions & 1 deletion b/‎README.md‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎analysis/almost_scaled_dot_product_attention/demo_transformer.py‎
Lines changed: 1 addition & 31 deletions b/‎analysis/almost_scaled_dot_product_attention/demo_transformer.py‎
Lines changed: 1 addition & 31 deletions
diff --git a/‎docs/development.md‎
Lines changed: 0 additions & 40 deletions b/‎docs/development.md‎
Lines changed: 0 additions & 40 deletions
diff --git a/‎docs/user_guide.rst‎
Lines changed: 0 additions & 7 deletions b/‎docs/user_guide.rst‎
Lines changed: 0 additions & 7 deletions
diff --git a/‎requirements-dev-ipu.txt‎
Lines changed: 0 additions & 12 deletions b/‎requirements-dev-ipu.txt‎
Lines changed: 0 additions & 12 deletions
@@ -0,0 +1,24 @@
+{
+    "build": {
+        "dockerfile": "Dockerfile"
+    },
+    "workspaceFolder": "/home/developer/unit-scaling",
+    "customizations": {
+        "vscode": {
+            "extensions": [
+                "ms-python.python",
+                "ms-toolsai.jupyter"
+            ],
+	    "settings": {
+                "terminal.integrated.defaultProfile.linux": "zsh",
+                "terminal.integrated.profiles.linux": { "zsh": { "path": "/bin/zsh" } }
+            }
+        }
+    },
+    "mounts": [
+        "source=${localEnv:HOME}/.ssh,target=/home/developer/.ssh,type=bind,readonly=true",
+        "source=${localEnv:HOME}/.gitconfig,target=/home/developer/.gitconfig,type=bind,readonly=true",
+        "source=${localWorkspaceFolder},target=/home/developer/unit-scaling,type=bind"
+    ],
+    "remoteUser": "developer"
+}
@@ -0,0 +1,2 @@
+*
+!requirements*.txt
@@ -15,14 +15,16 @@ jobs:
     runs-on: ubuntu-latest
     timeout-minutes: 10
     steps:
-      - uses: actions/checkout@v3
-      - name: Install dependencies
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Build Docker Image
         run: |
-          sudo apt-get update
-          sudo apt-get install -y git
-          pip install -r requirements-dev.txt
+          docker build -t unit-scaling-dev:latest . 
+
       - name: Run CI
-        run: ./dev ci
+        run: docker run --rm -v $(pwd):/home/developer/unit-scaling unit-scaling-dev:latest ./dev ci
+
       - name: Publish documentation
         if: ${{github.ref == 'refs/heads/main'}}
         uses: Cecilapp/GitHub-Pages-deploy@v3
 
@@ -0,0 +1,39 @@
+# Use PyTorch base image
+FROM pytorch/pytorch:latest
+
+# Install additional dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    vim \
+    sudo \
+    make \
+    g++ \
+    zsh \
+    && chsh -s /bin/zsh \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*   # cleanup (smaller image)
+
+# Configure a non-root user with sudo privileges
+ARG USERNAME=developer  # Change this to preferred username
+ARG USER_UID=1001
+ARG USER_GID=$USER_UID
+RUN groupadd --gid $USER_GID $USERNAME \
+    && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \
+    && echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
+    && chmod 0440 /etc/sudoers.d/$USERNAME
+USER $USERNAME
+
+# Set working directory
+WORKDIR /home/$USERNAME/unit-scaling
+
+# Puts pip install libs on $PATH & sets correct locale
+ENV PATH="$PATH:/home/$USERNAME/.local/bin" \
+    LC_ALL=C.UTF-8
+
+# Install Python dependencies
+COPY requirements-dev.txt .
+RUN pip install -r requirements-dev.txt
+
+# Creates basic .zshrc
+RUN sudo cp /etc/zsh/newuser.zshrc.recommended /home/$USERNAME/.zshrc
+
+CMD ["/bin/zsh"]
@@ -17,7 +17,22 @@ To install the `unit-scaling` library, run:
 pip install git+https://github.com/graphcore-research/unit-scaling.git
 ```
 
-For development on this repository, see [docs/development.md](docs/development.md).
+## Development
+
+For development in this repository, we recommend using the provided docker container.
+This image can be built and entered interactively using:
+
+```sh
+docker build -t unit-scaling-dev:latest .
+docker run -it --rm  --user developer:developer -v $(pwd):/home/developer/unit-scaling unit-scaling-dev:latest
+# To use git within the container, add `-v ~/.ssh:/home/developer/.ssh:ro -v ~/.gitconfig:/home/developer/.gitconfig:ro`.
+```
+
+For vscode users, this repo also contains a `.devcontainer.json` file, which enables the container to be used as a full-featured IDE (see the [Dev Container docs](https://code.visualstudio.com/docs/devcontainers/containers) for details on how to use this feature).
+
+Key development functionality is contained within the `./dev` script. This includes running unit tests, linting, formatting, documentation generation and more. Run `./dev --help` for the available options. Running `./dev` without arguments is equivalent to using the `--ci` option, which runs all of the available dev checks. This is the test used for GitHub CI.
+
+We encourage pull requests from the community. Please reach out to us with any questions about contributing.
 
 ## What is u-μP?
 
 
@@ -10,13 +10,6 @@
 from torch import nn, Tensor
 import tqdm
 
-try:
-    import poptorch
-
-    poptorch_available = True
-except ModuleNotFoundError:
-    poptorch_available = False
-
 
 class Config(dict):
     def __init__(self, *args: Any, **kwargs: Any):
@@ -132,7 +125,7 @@ def forward(self, indices: Tensor) -> Tensor:
         )
 
 
-def train_cpu() -> Tensor:
+def train() -> Tensor:
     model = Model()
     opt = torch.optim.Adam(model.parameters(), lr=CONFIG.lr)
     losses = []
@@ -143,26 +136,3 @@ def train_cpu() -> Tensor:
         opt.step()
         losses.append(float(loss))
     return torch.tensor(losses)
-
-
-def train_ipu() -> Tensor:
-    model = Model()
-    options = poptorch.Options()
-    options.showCompilationProgressBar(False)
-    opt = torch.optim.Adam(model.parameters(), lr=CONFIG.lr)
-    session = poptorch.trainingModel(model, options, opt)
-    try:
-        return torch.tensor(
-            [
-                float(session(batch.int()))
-                for batch in tqdm.tqdm(
-                    islice(batches(), CONFIG.steps), total=CONFIG.steps
-                )
-            ]
-        )
-    finally:
-        session.destroy()
-
-
-def train() -> Tensor:
-    return train_ipu() if poptorch_available else train_cpu()
@@ -55,13 +55,6 @@ The advantage of using a unit-scaled model is as follows:
    scales have stayed within range for all unit-scaled models tested thus far.
 3. This can enable the use of smaller, more efficient number formats out-of-the-box,
    such as FP16 and even FP8.
-4. As the behaviour of some ops depends on scale, unit-scaling a model can change its
-   training dynamics slightly. In some experiments this has been shown to lead to
-   loss decreasing faster, though further work is needed to validate this.
-
-For a more in-depth treatment of unit scaling, see our paper
-`Unit Scaling: Out-of-the-Box Low-Precision Training (ICML, 2023)
-<https://arxiv.org/abs/2303.11257>`_.
 
 
 How to unit-scale a model