delete setup_ops_base.py

ckl117 · ckl117 · commit 1d978a9ae700 · 2025-08-13T21:42:08.000+08:00
diff --git a/build.sh b/build.sh
@@ -34,7 +34,6 @@ EGG_DIR="fastdeploy.egg-info"
 
 # custom_ops directory config
 OPS_SRC_DIR="custom_ops"
-OPS_TMP_DIR_BASE="tmp_base"
 OPS_TMP_DIR="tmp"
 
 # command line log config
@@ -71,25 +70,20 @@ function copy_ops(){
     PY_VERSION="py${PY_MAIN_VERSION}.${PY_SUB_VERSION}"
     SYSTEM_VERSION=`${python} -c "import platform; print(platform.system().lower())"`
     PROCESSOR_VERSION=`${python} -c "import platform; print(platform.processor())"`
-    WHEEL_BASE_NAME="fastdeploy_base_ops-${OPS_VERSION}-${PY_VERSION}-${SYSTEM_VERSION}-${PROCESSOR_VERSION}.egg"
     WHEEL_NAME="fastdeploy_ops-${OPS_VERSION}-${PY_VERSION}-${SYSTEM_VERSION}-${PROCESSOR_VERSION}.egg"
     WHEEL_CPU_NAME="fastdeploy_cpu_ops-${OPS_VERSION}-${PY_VERSION}-${SYSTEM_VERSION}-${PROCESSOR_VERSION}.egg"
     is_rocm=`$python -c "import paddle; print(paddle.is_compiled_with_rocm())"`
     if [ "$is_rocm" = "True" ]; then
       DEVICE_TYPE="rocm"
-      mkdir -p ../fastdeploy/model_executor/ops/base
-      cp -r ./${OPS_TMP_DIR_BASE}/${WHEEL_BASE_NAME}/* ../fastdeploy/model_executor/ops/base
       cp -r ./${OPS_TMP_DIR}/${WHEEL_NAME}/* ../fastdeploy/model_executor/ops/gpu
-      echo -e "BASE and ROCM ops have been copy to fastdeploy"
+      echo -e "ROCM ops have been copy to fastdeploy"
       return
     fi
-    mkdir -p ../fastdeploy/model_executor/ops/base
     is_cuda=`$python -c "import paddle; print(paddle.is_compiled_with_cuda())"`
     if [ "$is_cuda" = "True" ]; then
       DEVICE_TYPE="gpu"
-      cp -r ./${OPS_TMP_DIR_BASE}/${WHEEL_BASE_NAME}/* ../fastdeploy/model_executor/ops/base
       cp -r ./${OPS_TMP_DIR}/${WHEEL_NAME}/* ../fastdeploy/model_executor/ops/gpu
-      echo -e "BASE and CUDA ops have been copy to fastdeploy"
+      echo -e "CUDA ops have been copy to fastdeploy"
       return
     fi
 
@@ -112,9 +106,8 @@ function copy_ops(){
     if_corex=`$python -c "import paddle; print(paddle.is_compiled_with_custom_device(\"iluvatar_gpu\"))"`
     if [ "$if_corex" = "True" ]; then
       DEVICE_TYPE="iluvatar-gpu"
-      cp -r ./${OPS_TMP_DIR_BASE}/${WHEEL_BASE_NAME}/* ../fastdeploy/model_executor/ops/base
       cp -r ./${OPS_TMP_DIR}/${WHEEL_NAME}/* ../fastdeploy/model_executor/ops/iluvatar
-      echo -e "BASE and Iluvatar ops have been copy to fastdeploy"
+      echo -e "Iluvatar ops have been copy to fastdeploy"
       return
     fi
 
@@ -127,19 +120,15 @@ function copy_ops(){
     fi
 
     DEVICE_TYPE="cpu"
-    cp -r ./${OPS_TMP_DIR_BASE}/${WHEEL_BASE_NAME}/* ../fastdeploy/model_executor/ops/base
     cd ../../../../
     cp -r ${OPS_TMP_DIR}/${WHEEL_CPU_NAME}/* ../fastdeploy/model_executor/ops/cpu
-    echo -e "BASE and CPU ops have been copy to fastdeploy"
+    echo -e "CPU ops have been copy to fastdeploy"
     return
 }
 
 function build_and_install_ops() {
   cd $OPS_SRC_DIR
   export no_proxy=bcebos.com,paddlepaddle.org.cn,${no_proxy}
-  echo -e "${BLUE}[build]${NONE} build and install fastdeploy_base_ops..."
-  ${python} setup_ops_base.py install --install-lib ${OPS_TMP_DIR_BASE}
-  find ${OPS_TMP_DIR_BASE} -type f -name "*.o" -exec rm -f {} \;
   echo -e "${BLUE}[build]${NONE} build and install fastdeploy_ops..."
   TMP_DIR_REAL_PATH=`readlink -f ${OPS_TMP_DIR}`
   is_xpu=`$python -c "import paddle; print(paddle.is_compiled_with_xpu())"`
@@ -213,7 +202,6 @@ function cleanup() {
   fi
 
   rm -rf $OPS_SRC_DIR/$BUILD_DIR $OPS_SRC_DIR/$EGG_DIR
-  rm -rf $OPS_SRC_DIR/$OPS_TMP_DIR_BASE
   rm -rf $OPS_SRC_DIR/$OPS_TMP_DIR
 }
 
diff --git a/custom_ops/gpu_ops/cpp_extensions.cc b/custom_ops/gpu_ops/cpp_extensions.cc
@@ -785,6 +785,12 @@ std::vector<paddle::Tensor> TopKRenorm(const paddle::Tensor &probs,
 
 std::vector<paddle::Tensor> MinPSamplingFromProbs(const paddle::Tensor &probs,
                                                const paddle::Tensor &min_p);
+
+void SaveOutMmsgStatic(const paddle::Tensor& x,
+                       const paddle::Tensor& not_need_stop,
+                       int64_t rank_id,
+                       bool save_each_rank);
+
 PYBIND11_MODULE(fastdeploy_ops, m) {
 
   m.def("get_expert_token_num", &GetExpertTokenNum, py::arg("topk_ids"),
@@ -1143,4 +1149,6 @@ PYBIND11_MODULE(fastdeploy_ops, m) {
   m.def("top_k_renorm_probs", &TopKRenorm, "top_k_renorm_probs function");
 
   m.def("min_p_sampling", &MinPSamplingFromProbs, "min_p_sampling function");
+
+  m.def("save_output", &SaveOutMmsgStatic, "save_output function");
 }
diff --git a/custom_ops/gpu_ops/get_output_ep.cc b/custom_ops/gpu_ops/get_output_ep.cc
@@ -109,11 +109,11 @@ void GetOutputEp(const paddle::Tensor& x,
     return;
 }
 
-void GetOutputStatic(const paddle::Tensor& x, int64_t rank_id, bool wait_flag) {
+void GetOutputEPStatic(const paddle::Tensor& x, int64_t rank_id, bool wait_flag) {
     GetOutputEp(x, rank_id, wait_flag, 1);
 }
 
-void GetOutputDynamic(const paddle::Tensor& x,
+void GetOutputEPDynamic(const paddle::Tensor& x,
                       int64_t rank_id,
                       bool wait_flag,
                       int msg_queue_id) {
@@ -125,11 +125,11 @@ PD_BUILD_STATIC_OP(get_output_ep)
     .Attrs({"rank_id: int64_t", "wait_flag: bool"})
     .Outputs({"x_out"})
     .SetInplaceMap({{"x", "x_out"}})
-    .SetKernelFn(PD_KERNEL(GetOutputStatic));
+    .SetKernelFn(PD_KERNEL(GetOutputEPStatic));
 
 PD_BUILD_STATIC_OP(get_output_ep_dynamic)
     .Inputs({"x"})
     .Attrs({"rank_id: int64_t", "wait_flag: bool", "msg_queue_id: int"})
     .Outputs({"x_out"})
     .SetInplaceMap({{"x", "x_out"}})
-    .SetKernelFn(PD_KERNEL(GetOutputDynamic));
+    .SetKernelFn(PD_KERNEL(GetOutputEPDynamic));
diff --git a/custom_ops/setup_ops.py b/custom_ops/setup_ops.py
@@ -199,6 +199,11 @@ def find_end_files(directory, end_str):
         if not os.listdir(json_dir):
             raise ValueError("Git clone nlohmann_json failed!")
     sources = [
+        "gpu_ops/save_with_output_msg.cc",
+        "gpu_ops/get_output.cc",
+        "gpu_ops/get_output_msg_with_topk.cc",
+        "gpu_ops/save_output_msg_with_topk.cc",
+        "gpu_ops/transfer_output.cc",
         "gpu_ops/set_value_by_flags.cu",
         "gpu_ops/token_penalty_multi_scores.cu",
         "gpu_ops/stop_generation.cu",
@@ -250,6 +255,11 @@ def find_end_files(directory, end_str):
     )
 elif paddle.is_compiled_with_cuda():
     sources = [
+        "gpu_ops/save_with_output_msg.cc",
+        "gpu_ops/get_output.cc",
+        "gpu_ops/get_output_msg_with_topk.cc",
+        "gpu_ops/save_output_msg_with_topk.cc",
+        "gpu_ops/transfer_output.cc",
         "gpu_ops/set_mask_value.cu",
         "gpu_ops/set_value_by_flags.cu",
         "gpu_ops/ngram_mask.cu",
@@ -532,6 +542,11 @@ def find_end_files(directory, end_str):
                 ]
             },
             sources=[
+                "gpu_ops/save_with_output_msg.cc",
+                "gpu_ops/get_output.cc",
+                "gpu_ops/get_output_msg_with_topk.cc",
+                "gpu_ops/save_output_msg_with_topk.cc",
+                "gpu_ops/transfer_output.cc",
                 "gpu_ops/get_padding_offset.cu",
                 "gpu_ops/set_value_by_flags.cu",
                 "gpu_ops/rebuild_padding.cu",
@@ -587,6 +602,12 @@ def find_end_files(directory, end_str):
         name="fastdeploy_cpu_ops",
         ext_modules=CppExtension(
             sources=[
+                "gpu_ops/save_with_output_msg.cc",
+                "gpu_ops/get_output.cc",
+                "gpu_ops/get_output_msg_with_topk.cc",
+                "gpu_ops/save_output_msg_with_topk.cc",
+                "gpu_ops/transfer_output.cc",
+                "cpu_ops/rebuild_padding.cc",
                 "cpu_ops/simd_sort.cc",
                 "cpu_ops/set_value_by_flags.cc",
                 "cpu_ops/token_penalty_multi_scores.cc",
diff --git a/custom_ops/setup_ops_base.py b/custom_ops/setup_ops_base.py
diff --git a/fastdeploy/model_executor/ops/gpu/__init__.py b/fastdeploy/model_executor/ops/gpu/__init__.py
@@ -19,7 +19,6 @@
 
 PACKAGE = "fastdeploy.model_executor.ops.gpu"
 
-import_custom_ops(PACKAGE, "..base.fastdeploy_base_ops", globals())
 import_custom_ops(PACKAGE, ".fastdeploy_ops", globals())
 
 
diff --git a/fastdeploy/model_executor/ops/iluvatar/__init__.py b/fastdeploy/model_executor/ops/iluvatar/__init__.py
@@ -17,7 +17,6 @@
 
 PACKAGE = "fastdeploy.model_executor.ops.iluvatar"
 
-import_custom_ops(PACKAGE, "..base.fastdeploy_base_ops", globals())
 import_custom_ops(PACKAGE, ".fastdeploy_ops", globals())
 
 from .moe_ops import iluvatar_moe_expert_ffn as moe_expert_ffn  # noqa: F401

Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,6 @@`
`19`	`19`
`20`	`20`	`PACKAGE = "fastdeploy.model_executor.ops.gpu"`
`21`	`21`
`22`		`-import_custom_ops(PACKAGE, "..base.fastdeploy_base_ops", globals())`
`23`	`22`	`import_custom_ops(PACKAGE, ".fastdeploy_ops", globals())`
`24`	`23`
`25`	`24`