-
Notifications
You must be signed in to change notification settings - Fork 45
Expand file tree
/
Copy pathsetup.py
More file actions
296 lines (251 loc) · 11.5 KB
/
setup.py
File metadata and controls
296 lines (251 loc) · 11.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
# Copyright 2024 Tencent Inc. All rights reserved.
#
# ==============================================================================
import os
import pathlib
import shlex
import shutil
import re
import subprocess
from distutils.file_util import copy_file
from setuptools import setup, Extension
from setuptools.command.build_ext import build_ext as build_ext_orig
# Retrieve the version from Git tags
def get_version():
if 'CUSTOM_KSANA_VERSION' in os.environ:
custom_ksana_version = os.environ['CUSTOM_KSANA_VERSION']
return custom_ksana_version
git_describe_output = subprocess.run(
["git", "describe", "--tags", "--always"],
stdout=subprocess.PIPE,
text=True).stdout.strip()
match = re.match(r'^(v[\d\.]+(?:[-\.]?(?:rc|post|beta|alpha)[-\.\d]*)?)', git_describe_output)
print(match)
if match:
return match.group(1)
else:
return git_describe_output
class CMakeExtension(Extension):
"""
An extension module that will be built using CMake.
We override the default behavior since we don't have sources here.
"""
def __init__(self, name):
super().__init__(name, sources=[])
def is_run_on_npu_device() -> bool:
"""
Check if the current environment is running on an NPU device.
This is done by checking if 'torch_npu' is in the installed packages.
"""
import pkgutil
return "torch_npu" in [pkg.name for pkg in pkgutil.iter_modules()]
class BuildExt(build_ext_orig):
"""
A custom build extension for building CMake-based projects.
"""
def run(self):
"""
Run method that builds the CMake extension before proceeding with the standard build.
"""
for ext in self.extensions:
self.build_cmake(ext)
super().run()
def build_cmake(self, ext):
"""
Build the extension using CMake.
"""
# Absolute path to the current working directory
cwd = pathlib.Path().absolute()
# Ensure the build temporary directory exists
build_temp = pathlib.Path(self.build_temp)
build_temp.mkdir(parents=True, exist_ok=True)
# Ensure the extension output directory exists
extdir = pathlib.Path(self.get_ext_fullpath(ext.name))
extdir.mkdir(parents=True, exist_ok=True)
# Collect all the implemented endpoints from the 'endpoints/warpper' directory
endpoints_dir = cwd / "src" / "ksana_llm" / "endpoints" / "wrapper"
implemented_endpoints = [
entry.name for entry in endpoints_dir.iterdir() if entry.is_dir()
]
# Configuration: 'Debug' or 'Release'
config = "Debug" if self.debug else "Release"
# Parse any additional CMake arguments from the environment variable 'CMAKE_ARGS'
cmake_args = shlex.split(os.environ.get("CMAKE_ARGS", ""))
# Initialize a dictionary to hold all the CMake options
cmake_options = {
"CMAKE_LIBRARY_OUTPUT_DIRECTORY": str(extdir.parent.absolute()),
"CMAKE_BUILD_TYPE": config,
"WITH_TESTING": "OFF",
}
# Determine if running on an NPU device, and set options accordingly
if is_run_on_npu_device():
# NPU device specific options
cmake_options.update({
"WITH_CUDA": "OFF",
"WITH_ACL": "ON",
})
else:
# CUDA device specific options
cmake_options.update({
"WITH_CUDA": "ON",
"SM": "80,86,89", # Specify the supported GPU architectures
})
# Dictionary to track the status (enabled/disabled) of each endpoint
endpoint_status = {}
# Iterate over implemented endpoints to set their build options
for endpoint in implemented_endpoints:
# Formulate the option name, e.g., 'WITH_GRPC_ENDPOINT'
option_name = f"WITH_{endpoint.upper()}_ENDPOINT"
# Default to enabling the endpoint
endpoint_status[endpoint] = True
cmake_options[option_name] = "ON"
# Check if the endpoint is explicitly turned off via '-D' argument
turned_off_option = f"-D{option_name}=OFF"
if turned_off_option in cmake_args:
# If the option is found in cmake_args, disable the endpoint
endpoint_status[endpoint] = False
cmake_options[option_name] = "OFF"
# Convert the cmake_options dictionary into command-line arguments
cmake_args.extend([f"-D{key}={value}" for key, value in cmake_options.items()])
# Build arguments for 'cmake --build'
build_args = ["--config", config]
# Use all available CPU cores for parallel build
build_args += ["--", f"-j{os.cpu_count()}"]
# Change to the build temporary directory and run CMake configuration
os.chdir(build_temp)
self.spawn(["cmake", str(cwd)] + cmake_args)
if not self.dry_run:
# Build the project
self.spawn(["cmake", "--build", "."] + build_args)
# Return to the original directory
os.chdir(cwd)
# After building, copy the necessary files to the package directory
self.copy_built_libraries(extdir, build_temp, cwd, implemented_endpoints, endpoint_status)
self.copy_python_files_and_dirs(extdir, cwd, implemented_endpoints, endpoint_status)
def copy_built_libraries(self, extdir, build_temp, cwd, implemented_endpoints, endpoint_status):
"""
Copy the built libraries to the appropriate locations in the package.
"""
# Directories for the built libraries
deps_lib = cwd / "src" / "ksana_llm" / "python" / "lib"
build_temp_lib = build_temp / "lib"
# Ensure the dependencies library directory exists
deps_lib.mkdir(parents=True, exist_ok=True)
# Create package directories
package_lib_dir = extdir.parent / "ksana_llm" / "lib"
package_lib_dir.mkdir(parents=True, exist_ok=True)
package_root_dir = extdir.parent / "ksana_llm"
# List of target libraries to copy (unified list)
target_libs = ["libtorch_serving.so", "libloguru.so"]
# Include endpoint libraries that are enabled
for endpoint in implemented_endpoints:
if endpoint_status[endpoint] and endpoint != "triton":
target_libs.append(f"lib{endpoint}_endpoint.so")
# Also copy tbb dynamic library if it exists
tbb_so = build_temp / "tbb_install" / "lib64" / "libtbb.so.12"
if tbb_so.exists():
target_libs.append("libtbb.so.12")
# Copy libraries to both local development and package directories
for target_lib in target_libs:
if target_lib == "libtbb.so.12":
src_lib = tbb_so
else:
src_lib = build_temp_lib / target_lib
if not src_lib.exists():
continue
# Copy to the dependencies library directory for local development
dst_lib_deps = deps_lib / target_lib
copy_file(str(src_lib), str(dst_lib_deps))
# Copy to package directory: pybind11 module to root, others to lib
if target_lib == "libtorch_serving.so":
dst_lib_package = package_root_dir / target_lib # libtorch_serving.so in site-packages/ksana_llm
else:
dst_lib_package = package_lib_dir / target_lib # other libs in /ksana_llm/lib/
copy_file(str(src_lib), str(dst_lib_package))
# Copy deepseek-ai deepgemm package
src_dir = build_temp / "third_party" / "DeepGEMM" / "deep_gemm"
dst_dir = extdir.parent / "ksana_llm" / "deepseek_deep_gemm"
if src_dir.exists():
shutil.copytree(src_dir, dst_dir, dirs_exist_ok=True)
def copy_python_files_and_dirs(self, extdir, cwd, implemented_endpoints, endpoint_status):
"""
Copy the required Python files and directories into the package.
"""
# List of directories to copy into the package
need_dirs = ["weight_map", "ksana_plugin", "simple_router", "openaiapi", "utilize"]
# Copy each required directory
for need_dir in need_dirs:
src_dir = cwd / "src" / "ksana_llm" / "python" / need_dir
dst_dir = extdir.parent / "ksana_llm" / need_dir
shutil.copytree(src_dir, dst_dir, dirs_exist_ok=True)
# Copy endpoint configuration files if they exist
for endpoint in implemented_endpoints:
if endpoint_status[endpoint]:
if endpoint == "triton":
# For 'triton backend', copy the entire endpoint directory
src_dir = cwd / "src" / "ksana_llm" / "endpoints" / "wrapper" / endpoint
dst_dir = extdir.parent / "ksana_llm" / "triton_backend"
shutil.copytree(src_dir, dst_dir, dirs_exist_ok=True)
src_conf = cwd / "examples" / "ksana_llm.yaml"
dst_conf = extdir.parent / "ksana_llm/triton_backend/config/ksana_llm/1/ksana_llm.yaml"
shutil.copyfile(src_conf, dst_conf)
else:
# For other endpoints, copy the 'rpc_config' directory
src_dir = cwd / "src" / "ksana_llm" / "endpoints" / "wrapper" / endpoint / "rpc_config"
dst_dir = extdir.parent / "ksana_llm" / "rpc_config"
shutil.copytree(src_dir, dst_dir, dirs_exist_ok=True)
# List of required Python files to copy into the package
server_need_files = [
"serving_server.py",
"serving_generate_client.py",
"serving_forward_client.py",
]
# Copy each required Python file
for need_file in server_need_files:
src_file = cwd / "src" / "ksana_llm" / "python" / need_file
dst_dir = extdir.parent / "ksana_llm"
copy_file(str(src_file), str(dst_dir))
# Copy trt deepgemm package
src_dir = (cwd / "3rdparty" / "LLM_kernels" / "csrc" / "kernels" / "nvidia" /
"others" / "tensorrt-llm" / "dev" / "deep_gemm")
dst_dir = extdir.parent / "ksana_llm" / "deep_gemm"
shutil.copytree(src_dir, dst_dir, dirs_exist_ok=True)
# Copy triton cubin kernels
if 'KSANA_TRITON_KERNEL_PATH' in os.environ:
triton_kernel_dir = os.environ['KSANA_TRITON_KERNEL_PATH'] + "/triton_kernel_files"
else:
home_dir = os.path.expanduser("~")
triton_kernel_dir = f"{home_dir}/.cache/KsanaLLM/triton_kernel_files"
dst_dir = extdir.parent / "ksana_llm" / "triton_kernel_files"
shutil.copytree(triton_kernel_dir, dst_dir, dirs_exist_ok=True)
# Setup configuration for the ksana_llm package
setup(
name="ksana_llm",
version=get_version(),
author="ksana_llm",
author_email="ksana_llm@tencent.com",
description="Ksana LLM inference server",
platforms="python3",
url="https://xxx/KsanaLLM/",
packages=["ksana_llm"],
package_dir={
"": "src/ksana_llm/python",
},
package_data={
'ksana_llm': [
'libtorch_serving.so',
'lib*.so*',
'lib/*.so*',
'triton_kernel_files/**/*',
'rpc_config/**/*',
'triton_backend/**/*',
],
},
include_package_data=True,
ext_modules=[CMakeExtension("ksana_llm")],
python_requires=">=3",
install_requires=open("requirements.txt").readlines(),
cmdclass={
"build_ext": BuildExt,
},
)