Oneflow-Inc · mosout · Jul 21, 2022 · Aug 4, 2022 · Aug 4, 2022
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,5 +1,7 @@
 cmake_minimum_required(VERSION 3.18.0)
 
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON CACHE BOOL "")
+
 project(OneFlowServing)
 
 if(NOT CMAKE_BUILD_TYPE)

diff --git a/examples_embedding/embedding/client.py b/examples_embedding/embedding/client.py
@@ -0,0 +1,36 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import time
+import numpy as np
+import tritonclient.http as httpclient
+
+
+if __name__ == '__main__':
+    triton_client = httpclient.InferenceServerClient(url='127.0.0.1:8000')
+
+    data = np.ones((10000,39)).astype(np.int64)
+
+    inputs = []
+    inputs.append(httpclient.InferInput('INPUT_0', data.shape, "INT64"))
+    inputs[0].set_data_from_numpy(data, binary_data=True)
+    outputs = []
+    outputs.append(httpclient.InferRequestedOutput('OUTPUT_0', binary_data=True, class_count=1))
+    now = time.time()
+    results = triton_client.infer("embedding", inputs=inputs, outputs=outputs)
+    print(time.time() - now)
+    output_data0 = results.as_numpy('OUTPUT_0')
+    print(output_data0.shape)
diff --git a/examples_embedding/embedding/config.pbtxt b/examples_embedding/embedding/config.pbtxt
@@ -0,0 +1,27 @@
+name: "embedding"
+backend: "oneflow"
+max_batch_size: 10000
+
+input [
+  {
+    name: "INPUT_0"
+    data_type: TYPE_INT64
+    dims: [ 39 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT_0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind: KIND_GPU
+    gpus: [ 0 ]
+  }
+]
diff --git a/src/triton/model_state.cpp b/src/triton/model_state.cpp
@@ -338,6 +338,7 @@ ModelState::LoadModel(
 
   graph->reset(
       new oneflow_api::Graph(oneflow_api::Graph::Load(model_path, device)));
+
   if (MaxBatchSize() > 0) {
     (*graph)->set_batch_size(MaxBatchSize());
   }