-
| How to add a new backend for llama.cpp? Is there any docments? | 
Beta Was this translation helpful? Give feedback.
Replies: 1 comment 1 reply
-
| If by backend you mean hardware accelerator or GPU, unfortunately I did not find any documentation on it but I did manage to make it register a backend with operations. Backends are contained within the  
 Inform GGML that you have a backend, and provide the necessary interface for GGML to interact with it. ggml_backend_reg_t ggml_backend_YOUR_BACKEND_NAME_reg(void) {
    static struct ggml_backend_reg ggml_backend_YOUR_BACKEND_NAME_reg = {
        /* .api_version = */ GGML_YOUR_BACKEND_NAME_BACKEND_VERSION,
        /* .interface   = */ ggml_backend_YOUR_BACKEND_NAME_reg_i,
        /* .context     = */ NULL,
    };
    return &ggml_backend_zdnn_reg;
}
GGML_BACKEND_DL_IMPL(ggml_backend_zdnn_reg)
 static const struct ggml_backend_reg_i ggml_backend_YOUR_BACKEND_NAME_reg_i = {
    /* .get_name            = */ ggml_backend_YOUR_BACKEND_NAME_reg_get_name,
    /* .get_device_count    = */ ggml_backend_YOUR_BACKEND_NAME_reg_get_device_count,
    /* .get_device          = */ ggml_backend_YOUR_BACKEND_NAME_reg_get_device,
    /* .get_proc_address    = */ ggml_backend_YOUR_BACKEND_NAME_reg_get_proc_address,
};
 static ggml_backend_dev_t ggml_backend_YOUR_BACKEND_NAME_reg_get_device(ggml_backend_reg_t reg, size_t index) {
    GGML_ASSERT(index == 0);
    static ggml_backend_device ggml_backend_YOUR_BACKEND_NAME_device = {
        /* .interface   = */ ggml_backend_YOUR_BACKEND_NAME_device_i,
        /* .register    = */ reg,
        /* .context     = */ nullptr,
    };
    return &ggml_backend_YOUR_BACKEND_NAME_device;
}
static void * ggml_backend_YOUR_BACKEND_NAME_reg_get_proc_address(ggml_backend_reg_t reg, const char * name) {
    return nullptr;
    GGML_UNUSED(reg);
    GGML_UNUSED(name);
}
 static const struct ggml_backend_device_i ggml_backend_YOUR_BACKEND_NAME_device_i = {
    /* .get_name                = */ ggml_backend_YOUR_BACKEND_NAME_device_get_name,
    /* .get_description         = */ ggml_backend_YOUR_BACKEND_NAME_device_get_desc,
    /* .get_memory              = */ ggml_backend_YOUR_BACKEND_NAME_device_get_memory,
    /* .get_type                = */ ggml_backend_YOUR_BACKEND_NAME_device_get_type,
    /* .get_props               = */ ggml_backend_YOUR_BACKEND_NAME_device_get_props,
    /* .init_backend            = */ ggml_backend_YOUR_BACKEND_NAME_device_init_backend,
    /* .get_buffer_type         = */ ggml_backend_YOUR_BACKEND_NAME_device_get_buffer_type,
    /* .get_host_buffer_type    = */ NULL,
    /* .buffer_from_host_ptr    = */ ggml_backend_YOUR_BACKEND_NAME_device_buffer_from_host_ptr,
    /* .supports_op             = */ ggml_backend_YOUR_BACKEND_NAME_device_supports_op,
    /* .supports_buft           = */ ggml_backend_YOUR_BACKEND_NAME_device_supports_buft,
    /* .offload_op              = */ NULL,
    /* .event_new               = */ NULL,
    /* .event_free              = */ NULL,
    /* .event_synchronize       = */ NULL,
};
 static bool ggml_backend_YOUR_BACKEND_NAME_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
    const struct ggml_tensor * src0 = op->src[0];
    const struct ggml_tensor * src1 = op->src[1];
    switch (op->op) {
        // GGML required ops
        case GGML_OP_NONE:
        case GGML_OP_RESHAPE:
        case GGML_OP_VIEW:
        case GGML_OP_PERMUTE:
        case GGML_OP_TRANSPOSE:
            break;
        case GGML_OP_ADD:
        case GGML_OP_ADD1:
        case GGML_OP_SUB:
        case GGML_OP_MUL:
        case GGML_OP_DIV:
        case GGML_OP_SQRT:
        case GGML_OP_LOG:
        case GGML_OP_NORM:
        case GGML_OP_MUL_MAT:
        case GGML_OP_MUL_MAT_ID:
        case GGML_OP_SOFT_MAX:
        case GGML_OP_LEAKY_RELU:
            return false; // TODO: disable all support first to showcase device reg
        case GGML_OP_UNARY:
            switch (ggml_get_unary_op(op)) {
                case GGML_UNARY_OP_ABS:
                case GGML_UNARY_OP_SGN:
                case GGML_UNARY_OP_NEG:
                case GGML_UNARY_OP_STEP:
                case GGML_UNARY_OP_TANH:
                case GGML_UNARY_OP_ELU:
                case GGML_UNARY_OP_RELU:
                case GGML_UNARY_OP_SIGMOID:
                case GGML_UNARY_OP_GELU:
                case GGML_UNARY_OP_GELU_QUICK:
                case GGML_UNARY_OP_SILU:
                case GGML_UNARY_OP_HARDSWISH:
                case GGML_UNARY_OP_HARDSIGMOID:
                case GGML_UNARY_OP_EXP:
                    break;
                default:
                    return false;
            }
        default:
            return false;
    }
    return true;
    GGML_UNUSED(dev);
}
 inline bool ggml_YOUR_BACKEND_NAME_compute_forward(ggml_backend_YOUR_BACKEND_NAME_context & ctx,
                                                    ggml_tensor * dst) {
    switch (dst->op) {
        case GGML_OP_ADD:
        case GGML_OP_ADD1:
        case GGML_OP_SUB:
        case GGML_OP_MUL:
        case GGML_OP_DIV:
        case GGML_OP_SQRT:
        case GGML_OP_LOG:
        case GGML_OP_NORM:
        case GGML_OP_MUL_MAT:
        case GGML_OP_MUL_MAT_ID:
        case GGML_OP_SOFT_MAX:
        case GGML_OP_LEAKY_RELU:
            return false;
        case GGML_OP_UNARY:
            switch (ggml_get_unary_op(dst)) {
                case GGML_UNARY_OP_ABS:
                case GGML_UNARY_OP_SGN:
                case GGML_UNARY_OP_NEG:
                case GGML_UNARY_OP_STEP:
                case GGML_UNARY_OP_TANH:
                case GGML_UNARY_OP_ELU:
                case GGML_UNARY_OP_RELU:
                case GGML_UNARY_OP_SIGMOID:
                case GGML_UNARY_OP_GELU:
                case GGML_UNARY_OP_GELU_QUICK:
                case GGML_UNARY_OP_SILU:
                case GGML_UNARY_OP_HARDSWISH:
                case GGML_UNARY_OP_HARDSIGMOID:
                    return false;
                case GGML_UNARY_OP_EXP:
                    break;
                default:
                    return false;
            }
        default:
            return false;
    }
    return true;
}That should be the main bulk of registering a device and getting the compute operation forwarded to your backend. Please take note that GGML's matrix multiplication is computed as  You may choose to refer to my zDNN implementation here: https://github.com/taronaeo/llama.cpp-s390x/blob/zdnn-accelerator-backend/ggml/src/ggml-zdnn/ggml-zdnn.cpp | 
Beta Was this translation helpful? Give feedback.
If by backend you mean hardware accelerator or GPU, unfortunately I did not find any documentation on it but I did manage to make it register a backend with operations.
Backends are contained within the
ggml/src/ggml-YOUR-BACKEND-NAMEdirectory. You will have to self-provide the CMakeLists.txt to compile your relevant backend and it has to contain the following functions to register it properly.Inform GGML that you have a backend, and provide the necessary interface for GGML to interact with it.