mirror of
https://github.com/yunionio/cloudpods.git
synced 2026-05-06 21:52:54 +08:00
feat(llm): init vllm as llm_container (#24338)
* feat(llm): init vllm as llm_container * feat(vllm): fix save-instant-model for vllm * feat(llm): support quick-model for vllm * feat(vllm): support preferred-model in llm * fix(llm): add ValidateSpec in llm_container_driver * feat(vllm): auto-set vllm params
This commit is contained in:
@@ -11,6 +11,7 @@ func init() {
|
||||
cmd.BatchCreate(new(options.LLMCreateOptions))
|
||||
cmd.List(new(options.LLMListOptions))
|
||||
cmd.Show(new(options.LLMShowOptions))
|
||||
cmd.Update(new(options.LLMUpdateOptions))
|
||||
cmd.Delete(new(options.LLMDeleteOptions))
|
||||
// cmd.Perform("change-model", new(options.LLMChangeModelOptions))
|
||||
cmd.Perform("syncstatus", new(options.LLMIdOptions))
|
||||
|
||||
@@ -19,6 +19,7 @@ const (
|
||||
var (
|
||||
LLM_IMAGE_TYPES = sets.NewString(
|
||||
string(LLM_IMAGE_TYPE_OLLAMA),
|
||||
string(LLM_IMAGE_TYPE_VLLM),
|
||||
string(LLM_IMAGE_TYPE_DIFY),
|
||||
string(LLM_IMAGE_TYPE_COMFYUI),
|
||||
string(LLM_IMAGE_TYPE_OPENCLAW),
|
||||
|
||||
@@ -82,6 +82,14 @@ type LLMCreateInput struct {
|
||||
LLMSpec *LLMSpec `json:"llm_spec,omitempty"`
|
||||
}
|
||||
|
||||
// LLMUpdateInput is the request body for updating an LLM (including llm_spec overrides).
|
||||
type LLMUpdateInput struct {
|
||||
apis.VirtualResourceBaseUpdateInput
|
||||
|
||||
InstantModelQuotaGb *int `json:"instant_model_quota_gb,omitempty"`
|
||||
LLMSpec *LLMSpec `json:"llm_spec,omitempty"`
|
||||
}
|
||||
|
||||
type LLMBaseListInput struct {
|
||||
apis.VirtualResourceListInput
|
||||
apis.EnabledResourceBaseListInput
|
||||
|
||||
88
pkg/apis/llm/vllm_const.go
Normal file
88
pkg/apis/llm/vllm_const.go
Normal file
@@ -0,0 +1,88 @@
|
||||
package llm
|
||||
|
||||
import "time"
|
||||
|
||||
const (
|
||||
LLM_VLLM = "vllm"
|
||||
LLM_VLLM_DEFAULT_PORT = 8000
|
||||
LLM_VLLM_EXEC_PATH = "python3 -m vllm.entrypoints.openai.api_server"
|
||||
|
||||
LLM_VLLM_HF_ENDPOINT = "https://hf-mirror.com"
|
||||
|
||||
// Directory constants
|
||||
LLM_VLLM_CACHE_DIR = "/root/.cache/huggingface"
|
||||
LLM_VLLM_BASE_PATH = "/data/models"
|
||||
LLM_VLLM_MODELS_PATH = "/data/models/huggingface"
|
||||
|
||||
// Health check
|
||||
LLM_VLLM_HEALTH_CHECK_TIMEOUT = 120 * time.Second // 2 minutes
|
||||
LLM_VLLM_HEALTH_CHECK_INTERVAL = 10 * time.Second // 10 seconds
|
||||
|
||||
// Default vLLM memory params when Python estimation fails (conservative to avoid OOM)
|
||||
LLM_VLLM_DEFAULT_GPU_MEMORY_UTIL = 0.9
|
||||
LLM_VLLM_DEFAULT_MAX_MODEL_LEN = 2048
|
||||
LLM_VLLM_DEFAULT_MAX_NUM_SEQS = 1
|
||||
|
||||
// Prefixes for parsing resolveModelAndParams output line (KEY=value)
|
||||
LLM_VLLM_RESOLVE_OUTPUT_PREFIX_GPU_UTIL = "GPU_MEMORY_UTIL="
|
||||
LLM_VLLM_RESOLVE_OUTPUT_PREFIX_MAX_LEN = "MAX_MODEL_LEN="
|
||||
LLM_VLLM_RESOLVE_OUTPUT_PREFIX_MAX_NUM_SEQ = "MAX_NUM_SEQS="
|
||||
)
|
||||
|
||||
const (
|
||||
|
||||
// vllmEstimateParamsScript is a Python script run inside the container to estimate
|
||||
// --gpu-memory-utilization, --max-model-len, and --max-num-seqs from GPU memory and model config.
|
||||
// Args: sys.argv[1]=model path, sys.argv[2]=tensor_parallel_size.
|
||||
// Prints one line: GPU_MEMORY_UTIL=0.9 MAX_MODEL_LEN=2624 MAX_NUM_SEQS=1 (eval-safe).
|
||||
LLM_VLLM_ESTIMATE_PARAMS_SCRIPT = `
|
||||
import sys, json, os
|
||||
model_path = sys.argv[1] if len(sys.argv) > 1 else ""
|
||||
tp = int(sys.argv[2]) if len(sys.argv) > 2 else 1
|
||||
if not model_path or not os.path.isdir(model_path):
|
||||
sys.exit(1)
|
||||
config_path = os.path.join(model_path, "config.json")
|
||||
if not os.path.isfile(config_path):
|
||||
sys.exit(1)
|
||||
with open(config_path) as f:
|
||||
config = json.load(f)
|
||||
def get_nested(d, *keys):
|
||||
for k in keys:
|
||||
d = d.get(k) if isinstance(d, dict) else None
|
||||
if d is None:
|
||||
return None
|
||||
return d
|
||||
num_layers = config.get("num_hidden_layers") or config.get("n_layer") or get_nested(config, "text_config", "num_hidden_layers") or 0
|
||||
num_heads = config.get("num_attention_heads") or config.get("n_head") or get_nested(config, "text_config", "num_attention_heads") or 0
|
||||
num_kv_heads = config.get("num_key_value_heads") or get_nested(config, "text_config", "num_key_value_heads") or num_heads
|
||||
hidden_size = config.get("hidden_size") or config.get("n_embd") or get_nested(config, "text_config", "hidden_size") or 0
|
||||
head_dim = hidden_size // num_heads if num_heads else (hidden_size // 64)
|
||||
max_pos = config.get("max_position_embeddings") or config.get("n_positions") or get_nested(config, "text_config", "max_position_embeddings") or 4096
|
||||
if not num_layers or not num_kv_heads or not head_dim:
|
||||
sys.exit(1)
|
||||
try:
|
||||
import torch
|
||||
total_mem = sum(torch.cuda.get_device_properties(i).total_memory for i in range(torch.cuda.device_count()))
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
gpu_util = 0.9
|
||||
activation_overhead = 2 * (1024**3)
|
||||
num_params = config.get("num_parameters") or config.get("num_params") or get_nested(config, "text_config", "num_parameters")
|
||||
if num_params is not None:
|
||||
model_bytes = num_params * 2
|
||||
else:
|
||||
model_bytes = 12 * (1024**3)
|
||||
available_kv = total_mem * gpu_util - model_bytes - activation_overhead
|
||||
if available_kv <= 0:
|
||||
available_kv = total_mem * 0.5
|
||||
kv_per_token = num_layers * 2 * num_kv_heads * head_dim * 2
|
||||
max_num_seqs = 4
|
||||
max_model_len = int(available_kv / (kv_per_token * max_num_seqs))
|
||||
max_model_len = max(1, min(max_model_len, max_pos))
|
||||
if max_model_len < 256:
|
||||
max_num_seqs = 1
|
||||
max_model_len = int(available_kv / (kv_per_token * max_num_seqs))
|
||||
max_model_len = max(1, min(max_model_len, max_pos))
|
||||
print("GPU_MEMORY_UTIL=%s MAX_MODEL_LEN=%d MAX_NUM_SEQS=%d" % (gpu_util, max_model_len, max_num_seqs))
|
||||
`
|
||||
)
|
||||
@@ -41,7 +41,7 @@ func (b *baseDriver) StartLLM(ctx context.Context, userCred mcclient.TokenCreden
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *baseDriver) ValidateCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *api.LLMSkuCreateInput) (*api.LLMSkuCreateInput, error) {
|
||||
func (b *baseDriver) ValidateLLMSkuCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *api.LLMSkuCreateInput) (*api.LLMSkuCreateInput, error) {
|
||||
imgObj, err := validators.ValidateModel(ctx, userCred, models.GetLLMImageManager(), &input.LLMImageId)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "validate image_id %s", input.LLMImageId)
|
||||
@@ -67,7 +67,7 @@ func (b *baseDriver) ValidateCreateData(ctx context.Context, userCred mcclient.T
|
||||
return input, nil
|
||||
}
|
||||
|
||||
func (b *baseDriver) ValidateUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSkuUpdateInput) (*api.LLMSkuUpdateInput, error) {
|
||||
func (b *baseDriver) ValidateLLMSkuUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSkuUpdateInput) (*api.LLMSkuUpdateInput, error) {
|
||||
llmImageId := input.LLMImageId
|
||||
if llmImageId != "" {
|
||||
imgObj, err := validators.ValidateModel(ctx, userCred, models.GetLLMImageManager(), &llmImageId)
|
||||
@@ -111,5 +111,16 @@ func MatchContainerToUpdateByName(ctr *computeapi.SContainer, podCtrs []*compute
|
||||
}
|
||||
|
||||
func (b *baseDriver) MatchContainerToUpdate(ctr *computeapi.SContainer, podCtrs []*computeapi.PodContainerCreateInput) (*computeapi.PodContainerCreateInput, error) {
|
||||
if len(podCtrs) == 1 {
|
||||
return podCtrs[0], nil
|
||||
}
|
||||
return MatchContainerToUpdateByName(ctr, podCtrs)
|
||||
}
|
||||
|
||||
func (b *baseDriver) ValidateLLMCreateSpec(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSpec) (*api.LLMSpec, error) {
|
||||
return input, nil
|
||||
}
|
||||
|
||||
func (b *baseDriver) ValidateLLMUpdateSpec(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, input *api.LLMSpec) (*api.LLMSpec, error) {
|
||||
return input, nil
|
||||
}
|
||||
|
||||
@@ -37,6 +37,36 @@ func (d *dify) GetSpec(sku *models.SLLMSku) interface{} {
|
||||
return sku.LLMSpec.Dify
|
||||
}
|
||||
|
||||
// mergeDifySpecInto merges src into dst. If fillEmpty is true, only fills dst when dst is empty; otherwise overwrites dst when src is non-empty.
|
||||
func mergeDifySpecInto(dst, src *api.LLMSpecDify, fillEmpty bool) {
|
||||
if dst == nil || src == nil {
|
||||
return
|
||||
}
|
||||
mergeStr := func(dstPtr *string, srcVal string) {
|
||||
if fillEmpty {
|
||||
if *dstPtr == "" && srcVal != "" {
|
||||
*dstPtr = srcVal
|
||||
}
|
||||
} else {
|
||||
if srcVal != "" {
|
||||
*dstPtr = srcVal
|
||||
}
|
||||
}
|
||||
}
|
||||
mergeStr(&dst.PostgresImageId, src.PostgresImageId)
|
||||
mergeStr(&dst.RedisImageId, src.RedisImageId)
|
||||
mergeStr(&dst.NginxImageId, src.NginxImageId)
|
||||
mergeStr(&dst.DifyApiImageId, src.DifyApiImageId)
|
||||
mergeStr(&dst.DifyPluginImageId, src.DifyPluginImageId)
|
||||
mergeStr(&dst.DifyWebImageId, src.DifyWebImageId)
|
||||
mergeStr(&dst.DifySandboxImageId, src.DifySandboxImageId)
|
||||
mergeStr(&dst.DifySSRFImageId, src.DifySSRFImageId)
|
||||
mergeStr(&dst.DifyWeaviateImageId, src.DifyWeaviateImageId)
|
||||
if (fillEmpty && len(dst.CustomizedEnvs) == 0 && len(src.CustomizedEnvs) > 0) || (!fillEmpty && len(src.CustomizedEnvs) > 0) {
|
||||
dst.CustomizedEnvs = src.CustomizedEnvs
|
||||
}
|
||||
}
|
||||
|
||||
// mergeDify merges llm and sku Dify specs; llm takes priority, use sku when llm is nil or zero.
|
||||
func mergeDify(llm, sku *api.LLMSpecDify) *api.LLMSpecDify {
|
||||
if llm != nil && !llm.IsZero() {
|
||||
@@ -88,14 +118,60 @@ func (d *dify) GetPrimaryContainer(ctx context.Context, llm *models.SLLM, contai
|
||||
return nil, errors.Error("api container not found")
|
||||
}
|
||||
|
||||
func (d *dify) ValidateCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *api.LLMSkuCreateInput) (*api.LLMSkuCreateInput, error) {
|
||||
func (d *dify) ValidateLLMSkuCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *api.LLMSkuCreateInput) (*api.LLMSkuCreateInput, error) {
|
||||
if input.LLMSpec == nil || input.LLMSpec.Dify == nil {
|
||||
return nil, errors.Wrap(httperrors.ErrInputParameter, "dify SKU requires llm_spec with type dify and image ids")
|
||||
}
|
||||
if input.MountedModels != nil {
|
||||
return nil, errors.Wrap(httperrors.ErrInputParameter, "dify SKU does not support mounted models")
|
||||
}
|
||||
difySpec := input.LLMSpec.Dify
|
||||
|
||||
// Reuse ValidateLLMCreateSpec to normalize/validate LLMSpec.
|
||||
spec, err := d.ValidateLLMCreateSpec(ctx, userCred, nil, input.LLMSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
input.LLMSpec = spec
|
||||
if input.LLMSpec != nil && input.LLMSpec.Dify != nil {
|
||||
input.LLMImageId = input.LLMSpec.Dify.DifyApiImageId
|
||||
}
|
||||
return input, nil
|
||||
}
|
||||
|
||||
func (d *dify) ValidateLLMSkuUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSkuUpdateInput) (*api.LLMSkuUpdateInput, error) {
|
||||
if input.MountedModels != nil {
|
||||
return nil, errors.Wrap(httperrors.ErrInputParameter, "dify SKU does not support mounted models")
|
||||
}
|
||||
if input.LLMSpec == nil {
|
||||
return input, nil
|
||||
}
|
||||
|
||||
// Reuse ValidateLLMUpdateSpec by treating current SKU spec as the \"current llm spec\".
|
||||
fakeLLM := &models.SLLM{LLMSpec: sku.LLMSpec}
|
||||
spec, err := d.ValidateLLMUpdateSpec(ctx, userCred, fakeLLM, input.LLMSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
input.LLMSpec = spec
|
||||
|
||||
// if dify_api_image_id is set, use it as the primary image id
|
||||
if input.LLMSpec != nil && input.LLMSpec.Dify != nil && input.LLMSpec.Dify.DifyApiImageId != "" {
|
||||
input.LLMImageId = input.LLMSpec.Dify.DifyApiImageId
|
||||
}
|
||||
return input, nil
|
||||
}
|
||||
|
||||
// ValidateLLMCreateSpec implements ILLMContainerDriver. Validates image ids and merges empty fields from SKU spec.
|
||||
func (d *dify) ValidateLLMCreateSpec(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSpec) (*api.LLMSpec, error) {
|
||||
if input == nil || input.Dify == nil {
|
||||
return input, nil
|
||||
}
|
||||
difySpec := input.Dify
|
||||
// Merge empty fields from SKU so the stored spec is complete
|
||||
if sku != nil && sku.LLMSpec != nil && sku.LLMSpec.Dify != nil {
|
||||
mergeDifySpecInto(difySpec, sku.LLMSpec.Dify, true)
|
||||
}
|
||||
// Validate non-empty image ids
|
||||
for _, imgId := range []*string{&difySpec.PostgresImageId, &difySpec.RedisImageId, &difySpec.NginxImageId, &difySpec.DifyApiImageId, &difySpec.DifyPluginImageId, &difySpec.DifyWebImageId, &difySpec.DifySandboxImageId, &difySpec.DifySSRFImageId, &difySpec.DifyWeaviateImageId} {
|
||||
if *imgId == "" {
|
||||
continue
|
||||
@@ -105,63 +181,68 @@ func (d *dify) ValidateCreateData(ctx context.Context, userCred mcclient.TokenCr
|
||||
return nil, errors.Wrapf(err, "validate image_id %s", *imgId)
|
||||
}
|
||||
img := imgObj.(*models.SLLMImage)
|
||||
if img.LLMType != input.LLMType {
|
||||
return nil, errors.Wrapf(httperrors.ErrInvalidStatus, "image %s is not of type %s", *imgId, input.LLMType)
|
||||
if img.LLMType != string(api.LLM_CONTAINER_DIFY) {
|
||||
return nil, errors.Wrapf(httperrors.ErrInvalidStatus, "image %s is not of type dify", *imgId)
|
||||
}
|
||||
*imgId = img.Id
|
||||
}
|
||||
input.LLMImageId = difySpec.DifyApiImageId
|
||||
return input, nil
|
||||
return &api.LLMSpec{Dify: difySpec}, nil
|
||||
}
|
||||
|
||||
func (d *dify) ValidateUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSkuUpdateInput) (*api.LLMSkuUpdateInput, error) {
|
||||
if input.MountedModels != nil {
|
||||
return nil, errors.Wrap(httperrors.ErrInputParameter, "dify SKU does not support mounted models")
|
||||
// ValidateLLMUpdateSpec implements ILLMContainerDriver. Merges input with current LLM spec (non-empty overwrites); validates image ids.
|
||||
func (d *dify) ValidateLLMUpdateSpec(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, input *api.LLMSpec) (*api.LLMSpec, error) {
|
||||
if input == nil || input.Dify == nil {
|
||||
return input, nil
|
||||
}
|
||||
if input.LLMSpec == nil || input.LLMSpec.Dify == nil {
|
||||
return nil, nil
|
||||
}
|
||||
currentSpec := d.GetSpec(sku)
|
||||
if currentSpec == nil {
|
||||
return nil, nil
|
||||
}
|
||||
updated := *currentSpec.(*api.LLMSpecDify)
|
||||
difySpec := input.LLMSpec.Dify
|
||||
mergeStr := func(dst *string, src string) {
|
||||
if src != "" {
|
||||
*dst = src
|
||||
// Start from current LLM spec, or SKU spec as base
|
||||
var base *api.LLMSpecDify
|
||||
if llm != nil && llm.LLMSpec != nil && llm.LLMSpec.Dify != nil {
|
||||
b := *llm.LLMSpec.Dify
|
||||
base = &b
|
||||
if llm.LLMSpec.Dify.CustomizedEnvs != nil {
|
||||
base.CustomizedEnvs = make([]*api.DifyCustomizedEnv, len(llm.LLMSpec.Dify.CustomizedEnvs))
|
||||
copy(base.CustomizedEnvs, llm.LLMSpec.Dify.CustomizedEnvs)
|
||||
}
|
||||
} else if llm != nil {
|
||||
sku, err := llm.GetLLMSku(llm.LLMSkuId)
|
||||
if err == nil && sku != nil && sku.LLMSpec != nil && sku.LLMSpec.Dify != nil {
|
||||
b := *sku.LLMSpec.Dify
|
||||
base = &b
|
||||
if sku.LLMSpec.Dify.CustomizedEnvs != nil {
|
||||
base.CustomizedEnvs = make([]*api.DifyCustomizedEnv, len(sku.LLMSpec.Dify.CustomizedEnvs))
|
||||
copy(base.CustomizedEnvs, sku.LLMSpec.Dify.CustomizedEnvs)
|
||||
}
|
||||
}
|
||||
}
|
||||
mergeStr(&updated.PostgresImageId, difySpec.PostgresImageId)
|
||||
mergeStr(&updated.RedisImageId, difySpec.RedisImageId)
|
||||
mergeStr(&updated.NginxImageId, difySpec.NginxImageId)
|
||||
mergeStr(&updated.DifyApiImageId, difySpec.DifyApiImageId)
|
||||
mergeStr(&updated.DifyPluginImageId, difySpec.DifyPluginImageId)
|
||||
mergeStr(&updated.DifyWebImageId, difySpec.DifyWebImageId)
|
||||
mergeStr(&updated.DifySandboxImageId, difySpec.DifySandboxImageId)
|
||||
mergeStr(&updated.DifySSRFImageId, difySpec.DifySSRFImageId)
|
||||
mergeStr(&updated.DifyWeaviateImageId, difySpec.DifyWeaviateImageId)
|
||||
if len(difySpec.CustomizedEnvs) > 0 {
|
||||
updated.CustomizedEnvs = difySpec.CustomizedEnvs
|
||||
if base == nil {
|
||||
base = &api.LLMSpecDify{}
|
||||
}
|
||||
for _, imgId := range []*string{&updated.PostgresImageId, &updated.RedisImageId, &updated.NginxImageId, &updated.DifyApiImageId, &updated.DifyPluginImageId, &updated.DifyWebImageId, &updated.DifySandboxImageId, &updated.DifySSRFImageId, &updated.DifyWeaviateImageId} {
|
||||
if *imgId != "" {
|
||||
imgObj, err := validators.ValidateModel(ctx, userCred, models.GetLLMImageManager(), imgId)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "validate image_id %s", *imgId)
|
||||
}
|
||||
img := imgObj.(*models.SLLMImage)
|
||||
if img.LLMType != sku.LLMType {
|
||||
return nil, errors.Wrapf(httperrors.ErrInvalidStatus, "image %s is not of type %s", *imgId, sku.LLMType)
|
||||
}
|
||||
*imgId = img.GetId()
|
||||
mergeDifySpecInto(base, input.Dify, false)
|
||||
// Validate non-empty image ids
|
||||
for _, imgId := range []*string{&base.PostgresImageId, &base.RedisImageId, &base.NginxImageId, &base.DifyApiImageId, &base.DifyPluginImageId, &base.DifyWebImageId, &base.DifySandboxImageId, &base.DifySSRFImageId, &base.DifyWeaviateImageId} {
|
||||
if *imgId == "" {
|
||||
continue
|
||||
}
|
||||
imgObj, err := validators.ValidateModel(ctx, userCred, models.GetLLMImageManager(), imgId)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "validate image_id %s", *imgId)
|
||||
}
|
||||
img := imgObj.(*models.SLLMImage)
|
||||
if img.LLMType != string(api.LLM_CONTAINER_DIFY) {
|
||||
return nil, errors.Wrapf(httperrors.ErrInvalidStatus, "image %s is not of type dify", *imgId)
|
||||
}
|
||||
*imgId = img.Id
|
||||
}
|
||||
// if dify_api_image_id is set, use it as the primary image id
|
||||
if input.LLMSpec.Dify.DifyApiImageId != "" {
|
||||
input.LLMImageId = input.LLMSpec.Dify.DifyApiImageId
|
||||
return &api.LLMSpec{Dify: base}, nil
|
||||
}
|
||||
|
||||
// GetContainerSpec is required by ILLMContainerDriver but not used for Dify; pod creation uses GetContainerSpecs. Return the first container so the interface is satisfied.
|
||||
func (d *dify) GetContainerSpec(ctx context.Context, llm *models.SLLM, image *models.SLLMImage, sku *models.SLLMSku, props []string, devices []computeapi.SIsolatedDevice, diskId string) *computeapi.PodContainerCreateInput {
|
||||
specs := d.GetContainerSpecs(ctx, llm, image, sku, props, devices, diskId)
|
||||
if len(specs) == 0 {
|
||||
return nil
|
||||
}
|
||||
return input, nil
|
||||
return specs[0]
|
||||
}
|
||||
|
||||
// GetContainerSpecs returns all Dify pod containers (postgres, redis, api, worker, nginx, etc.). Uses effective spec (llm + sku merged by driver).
|
||||
|
||||
578
pkg/llm/drivers/llm_container/vllm.go
Normal file
578
pkg/llm/drivers/llm_container/vllm.go
Normal file
@@ -0,0 +1,578 @@
|
||||
package llm_container
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"yunion.io/x/log"
|
||||
"yunion.io/x/pkg/errors"
|
||||
|
||||
commonapi "yunion.io/x/onecloud/pkg/apis"
|
||||
computeapi "yunion.io/x/onecloud/pkg/apis/compute"
|
||||
api "yunion.io/x/onecloud/pkg/apis/llm"
|
||||
"yunion.io/x/onecloud/pkg/llm/models"
|
||||
"yunion.io/x/onecloud/pkg/mcclient"
|
||||
)
|
||||
|
||||
func init() {
|
||||
models.RegisterLLMContainerDriver(newVLLM())
|
||||
}
|
||||
|
||||
type vllm struct {
|
||||
baseDriver
|
||||
}
|
||||
|
||||
func newVLLM() models.ILLMContainerDriver {
|
||||
return &vllm{baseDriver: newBaseDriver(api.LLM_CONTAINER_VLLM)}
|
||||
}
|
||||
|
||||
// escapeShellSingleQuoted escapes s for use inside a single-quoted shell string (each ' becomes '\”).
|
||||
func escapeShellSingleQuoted(s string) string {
|
||||
return strings.ReplaceAll(s, "'", "'\\''")
|
||||
}
|
||||
|
||||
func (v *vllm) GetSpec(sku *models.SLLMSku) interface{} {
|
||||
if sku == nil || sku.LLMType != string(api.LLM_CONTAINER_VLLM) || sku.LLMSpec == nil || sku.LLMSpec.Vllm == nil {
|
||||
return nil
|
||||
}
|
||||
return sku.LLMSpec.Vllm
|
||||
}
|
||||
|
||||
func (v *vllm) GetEffectiveSpec(llm *models.SLLM, sku *models.SLLMSku) interface{} {
|
||||
var skuSpec *api.LLMSpecVllm
|
||||
if s := v.GetSpec(sku); s != nil {
|
||||
skuSpec = s.(*api.LLMSpecVllm)
|
||||
}
|
||||
if llm != nil && llm.LLMSpec != nil && llm.LLMSpec.Vllm != nil {
|
||||
if llm.LLMSpec.Vllm.PreferredModel != "" {
|
||||
out := *llm.LLMSpec.Vllm
|
||||
return &out
|
||||
}
|
||||
// llm explicitly present but empty -> fall back to sku default
|
||||
}
|
||||
if skuSpec != nil {
|
||||
out := *skuSpec
|
||||
return &out
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *vllm) ValidateLLMSkuCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *api.LLMSkuCreateInput) (*api.LLMSkuCreateInput, error) {
|
||||
input, err := v.baseDriver.ValidateLLMSkuCreateData(ctx, userCred, input)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Reuse ValidateLLMCreateSpec; ensure llm_spec.vllm always exists for vLLM SKU.
|
||||
spec, err := v.ValidateLLMCreateSpec(ctx, userCred, nil, input.LLMSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if spec == nil {
|
||||
spec = &api.LLMSpec{Vllm: &api.LLMSpecVllm{}}
|
||||
} else if spec.Vllm == nil {
|
||||
spec.Vllm = &api.LLMSpecVllm{}
|
||||
}
|
||||
input.LLMSpec = spec
|
||||
return input, nil
|
||||
}
|
||||
|
||||
func (v *vllm) ValidateLLMSkuUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSkuUpdateInput) (*api.LLMSkuUpdateInput, error) {
|
||||
input, err := v.baseDriver.ValidateLLMSkuUpdateData(ctx, userCred, sku, input)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if input.LLMSpec == nil {
|
||||
return input, nil
|
||||
}
|
||||
|
||||
// Reuse ValidateLLMUpdateSpec by treating current SKU spec as the "current llm spec".
|
||||
fakeLLM := &models.SLLM{LLMSpec: sku.LLMSpec}
|
||||
spec, err := v.ValidateLLMUpdateSpec(ctx, userCred, fakeLLM, input.LLMSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
input.LLMSpec = spec
|
||||
if input.LLMSpec != nil && input.LLMSpec.Vllm == nil {
|
||||
input.LLMSpec.Vllm = &api.LLMSpecVllm{}
|
||||
}
|
||||
return input, nil
|
||||
}
|
||||
|
||||
// ValidateLLMCreateSpec implements ILLMContainerDriver. Merges preferred_model from SKU when input's is empty.
|
||||
func (v *vllm) ValidateLLMCreateSpec(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSpec) (*api.LLMSpec, error) {
|
||||
if input == nil {
|
||||
return nil, nil
|
||||
}
|
||||
preferred := ""
|
||||
if input.Vllm != nil {
|
||||
preferred = input.Vllm.PreferredModel
|
||||
}
|
||||
if preferred == "" && sku != nil && sku.LLMSpec != nil && sku.LLMSpec.Vllm != nil {
|
||||
preferred = sku.LLMSpec.Vllm.PreferredModel
|
||||
}
|
||||
return &api.LLMSpec{Vllm: &api.LLMSpecVllm{PreferredModel: preferred}}, nil
|
||||
}
|
||||
|
||||
// ValidateLLMUpdateSpec implements ILLMContainerDriver. Merges preferred_model with current LLM spec; only overwrite when non-empty.
|
||||
func (v *vllm) ValidateLLMUpdateSpec(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, input *api.LLMSpec) (*api.LLMSpec, error) {
|
||||
if input == nil || input.Vllm == nil {
|
||||
return input, nil
|
||||
}
|
||||
current := ""
|
||||
if llm != nil && llm.LLMSpec != nil && llm.LLMSpec.Vllm != nil {
|
||||
current = llm.LLMSpec.Vllm.PreferredModel
|
||||
}
|
||||
preferred := input.Vllm.PreferredModel
|
||||
if preferred == "" {
|
||||
preferred = current
|
||||
}
|
||||
return &api.LLMSpec{Vllm: &api.LLMSpecVllm{PreferredModel: preferred}}, nil
|
||||
}
|
||||
|
||||
func (v *vllm) GetContainerSpec(ctx context.Context, llm *models.SLLM, image *models.SLLMImage, sku *models.SLLMSku, props []string, devices []computeapi.SIsolatedDevice, diskId string) *computeapi.PodContainerCreateInput {
|
||||
// Container entrypoint only keeps the container alive; vLLM is started by StartLLM via exec.
|
||||
startScript := `mkdir -p ` + api.LLM_VLLM_MODELS_PATH + ` && exec sleep infinity`
|
||||
envs := []*commonapi.ContainerKeyValue{
|
||||
{
|
||||
Key: "HUGGING_FACE_HUB_CACHE",
|
||||
Value: api.LLM_VLLM_CACHE_DIR,
|
||||
},
|
||||
{
|
||||
Key: "HF_ENDPOINT",
|
||||
Value: api.LLM_VLLM_HF_ENDPOINT,
|
||||
},
|
||||
// // Fix Error 803
|
||||
// {
|
||||
// Key: "LD_LIBRARY_PATH",
|
||||
// Value: "/lib64:/usr/local/cuda/lib64:/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}",
|
||||
// },
|
||||
// // Fix Error 803
|
||||
// {
|
||||
// Key: "LD_PRELOAD",
|
||||
// Value: "/lib/libcuda.so.1 /lib/libnvidia-ptxjitcompiler.so.1 /lib/libnvidia-gpucomp.so",
|
||||
// },
|
||||
}
|
||||
spec := computeapi.ContainerSpec{
|
||||
ContainerSpec: commonapi.ContainerSpec{
|
||||
Image: image.ToContainerImage(),
|
||||
ImageCredentialId: image.CredentialId,
|
||||
Command: []string{"/bin/sh", "-c"},
|
||||
Args: []string{startScript},
|
||||
EnableLxcfs: true,
|
||||
AlwaysRestart: true,
|
||||
Envs: envs,
|
||||
},
|
||||
}
|
||||
|
||||
// GPU Devices
|
||||
if len(devices) == 0 && (sku.Devices != nil && len(*sku.Devices) > 0) {
|
||||
for i := range *sku.Devices {
|
||||
index := i
|
||||
spec.Devices = append(spec.Devices, &computeapi.ContainerDevice{
|
||||
Type: commonapi.CONTAINER_DEVICE_TYPE_ISOLATED_DEVICE,
|
||||
IsolatedDevice: &computeapi.ContainerIsolatedDevice{
|
||||
Index: &index,
|
||||
},
|
||||
})
|
||||
}
|
||||
} else if len(devices) > 0 {
|
||||
for i := range devices {
|
||||
spec.Devices = append(spec.Devices, &computeapi.ContainerDevice{
|
||||
Type: commonapi.CONTAINER_DEVICE_TYPE_ISOLATED_DEVICE,
|
||||
IsolatedDevice: &computeapi.ContainerIsolatedDevice{
|
||||
Id: devices[i].Id,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Volume Mounts
|
||||
diskIndex := 0
|
||||
postOverlays, err := llm.GetMountedModelsPostOverlay()
|
||||
if err != nil {
|
||||
log.Errorf("GetMountedModelsPostOverlay failed %s", err)
|
||||
}
|
||||
ctrVols := []*commonapi.ContainerVolumeMount{
|
||||
{
|
||||
Disk: &commonapi.ContainerVolumeMountDisk{
|
||||
SubDirectory: api.LLM_VLLM,
|
||||
Index: &diskIndex,
|
||||
PostOverlay: postOverlays,
|
||||
},
|
||||
Type: commonapi.CONTAINER_VOLUME_MOUNT_TYPE_DISK,
|
||||
MountPath: api.LLM_VLLM_BASE_PATH,
|
||||
ReadOnly: false,
|
||||
Propagation: commonapi.MOUNTPROPAGATION_PROPAGATION_HOST_TO_CONTAINER,
|
||||
},
|
||||
{
|
||||
// Mount cache dir to save HF cache
|
||||
Disk: &commonapi.ContainerVolumeMountDisk{
|
||||
SubDirectory: "cache",
|
||||
Index: &diskIndex,
|
||||
},
|
||||
Type: commonapi.CONTAINER_VOLUME_MOUNT_TYPE_DISK,
|
||||
MountPath: "/root/.cache",
|
||||
ReadOnly: false,
|
||||
},
|
||||
}
|
||||
spec.VolumeMounts = append(spec.VolumeMounts, ctrVols...)
|
||||
|
||||
return &computeapi.PodContainerCreateInput{
|
||||
ContainerSpec: spec,
|
||||
}
|
||||
}
|
||||
|
||||
func (v *vllm) GetContainerSpecs(ctx context.Context, llm *models.SLLM, image *models.SLLMImage, sku *models.SLLMSku, props []string, devices []computeapi.SIsolatedDevice, diskId string) []*computeapi.PodContainerCreateInput {
|
||||
return []*computeapi.PodContainerCreateInput{
|
||||
v.GetContainerSpec(ctx, llm, image, sku, props, devices, diskId),
|
||||
}
|
||||
}
|
||||
|
||||
func (v *vllm) GetLLMUrl(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM) (string, error) {
|
||||
// Similar logic to Ollama to determine URL
|
||||
server, err := llm.GetServer(ctx)
|
||||
if err != nil {
|
||||
return "", errors.Wrap(err, "get server")
|
||||
}
|
||||
|
||||
networkType := llm.NetworkType
|
||||
if networkType == string(computeapi.NETWORK_TYPE_GUEST) {
|
||||
if len(llm.LLMIp) == 0 {
|
||||
return "", errors.Error("LLM IP is empty for guest network")
|
||||
}
|
||||
return fmt.Sprintf("http://%s:%d", llm.LLMIp, api.LLM_VLLM_DEFAULT_PORT), nil
|
||||
} else {
|
||||
// hostlocal
|
||||
if len(server.HostAccessIp) == 0 {
|
||||
return "", errors.Error("host access IP is empty")
|
||||
}
|
||||
// Assuming we might map ports or just use the default if host networking isn't strictly port-mapped per instance
|
||||
// For simplicity, returning default port on host IP, assuming bridge/direct access or specific port mapping logic exists elsewhere.
|
||||
// NOTE: In ollama.go, it queries AccessInfo. Here we simplify.
|
||||
return fmt.Sprintf("http://%s:%d", server.HostAccessIp, api.LLM_VLLM_DEFAULT_PORT), nil
|
||||
}
|
||||
}
|
||||
|
||||
// StartLLM starts the vLLM server inside the container via exec, then waits for the health endpoint to be ready.
|
||||
func (v *vllm) StartLLM(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM) error {
|
||||
lc, err := llm.GetLLMContainer()
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "get llm container")
|
||||
}
|
||||
sku, err := llm.GetLLMSku(llm.LLMSkuId)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "get llm sku")
|
||||
}
|
||||
tensorParallelSize := 1
|
||||
if sku.Devices != nil && len(*sku.Devices) > 0 {
|
||||
tensorParallelSize = len(*sku.Devices)
|
||||
}
|
||||
swapSpaceGiB := (sku.Memory * 1) / (2 * 1024)
|
||||
if swapSpaceGiB < 1 {
|
||||
swapSpaceGiB = 1
|
||||
}
|
||||
|
||||
preferredPath := ""
|
||||
if eff := v.GetEffectiveSpec(llm, sku); eff != nil {
|
||||
if preferred := eff.(*api.LLMSpecVllm).PreferredModel; preferred != "" {
|
||||
preferredPath = path.Join(api.LLM_VLLM_MODELS_PATH, preferred)
|
||||
}
|
||||
}
|
||||
resolved, err := v.resolveModelAndParams(ctx, lc.CmpId, preferredPath, tensorParallelSize)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if resolved == nil {
|
||||
return nil // no model
|
||||
}
|
||||
|
||||
modelEscaped := escapeShellSingleQuoted(resolved.ModelPath)
|
||||
startCmd := fmt.Sprintf(
|
||||
`nohup %s --model '%s' --served-model-name "$(basename '%s')" --port %d \
|
||||
--tensor-parallel-size %d --swap-space %d --enable-prefix-caching \
|
||||
--gpu-memory-utilization %s --max-model-len %d --max-num-seqs %d \
|
||||
> /tmp/vllm.log 2>&1 &`,
|
||||
api.LLM_VLLM_EXEC_PATH,
|
||||
modelEscaped,
|
||||
modelEscaped,
|
||||
api.LLM_VLLM_DEFAULT_PORT,
|
||||
tensorParallelSize,
|
||||
swapSpaceGiB,
|
||||
resolved.GpuUtil,
|
||||
resolved.MaxModelLen,
|
||||
resolved.MaxNumSeqs,
|
||||
)
|
||||
_, err = exec(ctx, lc.CmpId, startCmd, 30)
|
||||
if err != nil {
|
||||
log.Errorf("vLLM start failed, exec command: %s", startCmd)
|
||||
return errors.Wrapf(err, "exec start vLLM, command: %s", startCmd)
|
||||
}
|
||||
cmd := startCmd
|
||||
// Wait for health endpoint
|
||||
baseURL, err := v.GetLLMUrl(ctx, userCred, llm)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "get llm url for health check")
|
||||
}
|
||||
healthURL := strings.TrimSuffix(baseURL, "/") + "/health"
|
||||
deadline := time.Now().Add(api.LLM_VLLM_HEALTH_CHECK_TIMEOUT)
|
||||
for time.Now().Before(deadline) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, healthURL, nil)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "new health check request")
|
||||
}
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err == nil {
|
||||
resp.Body.Close()
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return errors.Wrap(ctx.Err(), "context done while waiting for vLLM")
|
||||
case <-time.After(api.LLM_VLLM_HEALTH_CHECK_INTERVAL):
|
||||
// continue
|
||||
}
|
||||
}
|
||||
// Optionally read last lines of /tmp/vllm.log for better error message
|
||||
logTail, _ := exec(ctx, lc.CmpId, "tail -n 20 /tmp/vllm.log 2>/dev/null || true", 5)
|
||||
if logTail != "" {
|
||||
return errors.Errorf("vLLM health check timeout after %v, exec command: %s, last log: %s", api.LLM_VLLM_HEALTH_CHECK_TIMEOUT, cmd, strings.TrimSpace(logTail))
|
||||
}
|
||||
return errors.Errorf("vLLM health check timeout after %v, exec command: %s", api.LLM_VLLM_HEALTH_CHECK_TIMEOUT, cmd)
|
||||
}
|
||||
|
||||
// ILLMContainerInstantApp implementation
|
||||
|
||||
func (v *vllm) GetProbedInstantModelsExt(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, mdlIds ...string) (map[string]api.LLMInternalInstantMdlInfo, error) {
|
||||
lc, err := llm.GetLLMContainer()
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "get llm container")
|
||||
}
|
||||
|
||||
// List directories in models path
|
||||
cmd := fmt.Sprintf("du -sk %s/*/", api.LLM_VLLM_MODELS_PATH)
|
||||
output, err := exec(ctx, lc.CmpId, cmd, 10)
|
||||
if err != nil {
|
||||
// If ls fails, maybe no directory yet, return empty
|
||||
return make(map[string]api.LLMInternalInstantMdlInfo), nil
|
||||
}
|
||||
|
||||
modelsMap := make(map[string]api.LLMInternalInstantMdlInfo)
|
||||
lines := strings.Split(strings.TrimSpace(output), "\n")
|
||||
for _, line := range lines {
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 2 {
|
||||
continue
|
||||
}
|
||||
// Size is in KB
|
||||
sizeKB, _ := strconv.ParseInt(fields[0], 10, 64)
|
||||
fullPath := fields[1]
|
||||
name := path.Base(fullPath)
|
||||
if name == "" {
|
||||
continue
|
||||
}
|
||||
// We treat the directory name as the model name
|
||||
// For vLLM, name usually implies "organization/model" if downloaded from HF, but here we just list local dirs
|
||||
modelsMap[name] = api.LLMInternalInstantMdlInfo{
|
||||
Name: name,
|
||||
ModelId: name,
|
||||
Tag: "latest",
|
||||
Size: sizeKB * 1024,
|
||||
}
|
||||
}
|
||||
return modelsMap, nil
|
||||
}
|
||||
|
||||
func (v *vllm) DetectModelPaths(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, pkgInfo api.LLMInternalInstantMdlInfo) ([]string, error) {
|
||||
lc, err := llm.GetLLMContainer()
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "get llm container")
|
||||
}
|
||||
|
||||
modelPath := path.Join(api.LLM_VLLM_MODELS_PATH, pkgInfo.Name)
|
||||
checkCmd := fmt.Sprintf("[ -d '%s' ] && echo 'EXIST' || echo 'MISSING'", modelPath)
|
||||
output, err := exec(ctx, lc.CmpId, checkCmd, 10)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to check file existence")
|
||||
}
|
||||
|
||||
if !strings.Contains(output, "EXIST") {
|
||||
return nil, errors.Errorf("model directory %s missing", modelPath)
|
||||
}
|
||||
|
||||
return []string{modelPath}, nil
|
||||
}
|
||||
|
||||
func (v *vllm) GetImageInternalPathMounts(sApp *models.SInstantModel) map[string]string {
|
||||
// Map host paths to container paths
|
||||
// For vLLM simple volume mount, this might be 1:1 or based on the base path
|
||||
res := make(map[string]string)
|
||||
for _, mount := range sApp.Mounts {
|
||||
relPath := strings.TrimPrefix(mount, api.LLM_VLLM_BASE_PATH)
|
||||
res[relPath] = path.Join(api.LLM_VLLM, relPath)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func (v *vllm) GetSaveDirectories(sApp *models.SInstantModel) (string, []string, error) {
|
||||
var filteredMounts []string
|
||||
for _, mount := range sApp.Mounts {
|
||||
if strings.HasPrefix(mount, api.LLM_VLLM_BASE_PATH) {
|
||||
relPath := strings.TrimPrefix(mount, api.LLM_VLLM_BASE_PATH)
|
||||
filteredMounts = append(filteredMounts, relPath)
|
||||
}
|
||||
}
|
||||
return "", filteredMounts, nil
|
||||
}
|
||||
|
||||
func (v *vllm) ValidateMounts(mounts []string, mdlName string, mdlTag string) ([]string, error) {
|
||||
return mounts, nil
|
||||
}
|
||||
|
||||
func (v *vllm) CheckDuplicateMounts(errStr string, dupIndex int) string {
|
||||
return "Duplicate mounts detected"
|
||||
}
|
||||
|
||||
func (v *vllm) GetInstantModelIdByPostOverlay(postOverlay *commonapi.ContainerVolumeMountDiskPostOverlay, mdlNameToId map[string]string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (v *vllm) GetDirPostOverlay(dir api.LLMMountDirInfo) *commonapi.ContainerVolumeMountDiskPostOverlay {
|
||||
uid := int64(1000)
|
||||
gid := int64(1000)
|
||||
ov := dir.ToOverlay()
|
||||
ov.FsUser = &uid
|
||||
ov.FsGroup = &gid
|
||||
return &ov
|
||||
}
|
||||
|
||||
func (v *vllm) PreInstallModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, instMdl *models.SLLMInstantModel) error {
|
||||
lc, err := llm.GetLLMContainer()
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "get llm container")
|
||||
}
|
||||
// Create base directory
|
||||
cmd := fmt.Sprintf("mkdir -p %s", api.LLM_VLLM_MODELS_PATH)
|
||||
_, err = exec(ctx, lc.CmpId, cmd, 10)
|
||||
return err
|
||||
}
|
||||
|
||||
func (v *vllm) InstallModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, dirs []string, mdlIds []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *vllm) UninstallModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, instMdl *models.SLLMInstantModel) error {
|
||||
// Optionally remove the model directory
|
||||
// For safety, we might just log or leave it
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *vllm) DownloadModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, tmpDir string, modelName string, modelTag string) (string, []string, error) {
|
||||
lc, err := llm.GetLLMContainer()
|
||||
if err != nil {
|
||||
return "", nil, errors.Wrap(err, "get llm container")
|
||||
}
|
||||
|
||||
// Logic to download model inside the container
|
||||
// modelName is expected to be like "facebook/opt-125m"
|
||||
targetDir := path.Join(api.LLM_VLLM_MODELS_PATH, modelName)
|
||||
|
||||
// Check if already exists
|
||||
checkCmd := fmt.Sprintf("[ -d '%s' ] && echo 'EXIST'", targetDir)
|
||||
out, _ := exec(ctx, lc.CmpId, checkCmd, 10)
|
||||
if strings.Contains(out, "EXIST") {
|
||||
log.Infof("Model %s already exists at %s", modelName, targetDir)
|
||||
return modelName, []string{targetDir}, nil
|
||||
}
|
||||
|
||||
// Try to use huggingface-cli
|
||||
// Assuming container has internet access and tools
|
||||
downloadCmd := fmt.Sprintf("mkdir -p %s && huggingface-cli download %s --local-dir %s --local-dir-use-symlinks False", targetDir, modelName, targetDir)
|
||||
|
||||
// If huggingface-cli is missing, try installing it (if pip available)
|
||||
// fallback to pip install
|
||||
fullCmd := fmt.Sprintf("if ! command -v huggingface-cli &> /dev/null; then pip install -U huggingface_hub; fi; %s", downloadCmd)
|
||||
|
||||
log.Infof("Downloading model %s with cmd: %s", modelName, fullCmd)
|
||||
_, err = exec(ctx, lc.CmpId, fullCmd, 3600) // 1 hour timeout for large models
|
||||
if err != nil {
|
||||
return "", nil, errors.Wrapf(err, "failed to download model %s", modelName)
|
||||
}
|
||||
|
||||
return modelName, []string{targetDir}, nil
|
||||
}
|
||||
|
||||
// vllmResolveResult is the result of resolving model path and estimating vLLM memory params in the container.
|
||||
type vllmResolveResult struct {
|
||||
ModelPath string
|
||||
GpuUtil string
|
||||
MaxModelLen int
|
||||
MaxNumSeqs int
|
||||
}
|
||||
|
||||
// resolveModelAndParams runs one exec in the container to resolve the model path and estimate
|
||||
// --gpu-memory-utilization, --max-model-len, --max-num-seqs. Returns (nil, nil) when no model is found.
|
||||
func (v *vllm) resolveModelAndParams(ctx context.Context, containerId string, preferredPath string, tensorParallelSize int) (*vllmResolveResult, error) {
|
||||
preferredEscaped := escapeShellSingleQuoted(preferredPath)
|
||||
escapedScript := escapeShellSingleQuoted(strings.TrimSpace(api.LLM_VLLM_ESTIMATE_PARAMS_SCRIPT))
|
||||
defaultGpuUtil := strconv.FormatFloat(float64(api.LLM_VLLM_DEFAULT_GPU_MEMORY_UTIL), 'f', -1, 64)
|
||||
cmd := fmt.Sprintf(
|
||||
`mkdir -p %s;
|
||||
preferred='%s';
|
||||
if [ -n "$preferred" ] && [ -d "$preferred" ]; then model="$preferred"; else model=$(ls -d %s/* 2>/dev/null | head -n 1); fi;
|
||||
if [ -z "$model" ]; then echo "NO_MODEL"; exit 0; fi;
|
||||
tp=%d;
|
||||
vllm_out=$(python3 -c '%s' "$model" "$tp" 2>/dev/null) || true;
|
||||
GPU_MEMORY_UTIL=%s; MAX_MODEL_LEN=%d; MAX_NUM_SEQS=%d;
|
||||
[ -n "$vllm_out" ] && eval "$vllm_out";
|
||||
printf '%%s\n' "$model";
|
||||
printf 'GPU_MEMORY_UTIL=%%s MAX_MODEL_LEN=%%s MAX_NUM_SEQS=%%s\n' "$GPU_MEMORY_UTIL" "$MAX_MODEL_LEN" "$MAX_NUM_SEQS"`,
|
||||
api.LLM_VLLM_MODELS_PATH,
|
||||
preferredEscaped,
|
||||
api.LLM_VLLM_MODELS_PATH,
|
||||
tensorParallelSize,
|
||||
escapedScript,
|
||||
defaultGpuUtil,
|
||||
api.LLM_VLLM_DEFAULT_MAX_MODEL_LEN,
|
||||
api.LLM_VLLM_DEFAULT_MAX_NUM_SEQS,
|
||||
)
|
||||
out, err := exec(ctx, containerId, cmd, 30)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "exec resolve model and params")
|
||||
}
|
||||
out = strings.TrimSpace(out)
|
||||
if out == "NO_MODEL" {
|
||||
return nil, nil
|
||||
}
|
||||
lines := strings.SplitN(out, "\n", 2)
|
||||
if len(lines) < 2 {
|
||||
return nil, errors.Errorf("vLLM resolve output missing params line: %s", out)
|
||||
}
|
||||
res := &vllmResolveResult{
|
||||
ModelPath: strings.TrimSpace(lines[0]),
|
||||
GpuUtil: defaultGpuUtil,
|
||||
MaxModelLen: api.LLM_VLLM_DEFAULT_MAX_MODEL_LEN,
|
||||
MaxNumSeqs: api.LLM_VLLM_DEFAULT_MAX_NUM_SEQS,
|
||||
}
|
||||
for _, f := range strings.Fields(lines[1]) {
|
||||
if val, ok := strings.CutPrefix(f, api.LLM_VLLM_RESOLVE_OUTPUT_PREFIX_GPU_UTIL); ok {
|
||||
res.GpuUtil = val
|
||||
} else if val, ok := strings.CutPrefix(f, api.LLM_VLLM_RESOLVE_OUTPUT_PREFIX_MAX_LEN); ok {
|
||||
if n, e := strconv.Atoi(val); e == nil && n > 0 {
|
||||
res.MaxModelLen = n
|
||||
}
|
||||
} else if val, ok := strings.CutPrefix(f, api.LLM_VLLM_RESOLVE_OUTPUT_PREFIX_MAX_NUM_SEQ); ok {
|
||||
if n, e := strconv.Atoi(val); e == nil && n > 0 {
|
||||
res.MaxNumSeqs = n
|
||||
}
|
||||
}
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
@@ -92,6 +92,15 @@ func (man *SLLMManager) ValidateCreateData(ctx context.Context, userCred mcclien
|
||||
input.LLMSkuId = lSku.Id
|
||||
input.LLMImageId = lSku.GetLLMImageId()
|
||||
|
||||
if input.LLMSpec != nil {
|
||||
drv := lSku.GetLLMContainerDriver()
|
||||
spec, err := drv.ValidateLLMCreateSpec(ctx, userCred, lSku, input.LLMSpec)
|
||||
if err != nil {
|
||||
return input, errors.Wrap(err, "validate LLM create spec")
|
||||
}
|
||||
input.LLMSpec = spec
|
||||
}
|
||||
|
||||
return input, nil
|
||||
}
|
||||
|
||||
@@ -380,6 +389,29 @@ func (llm *SLLM) GetLLMSku(skuId string) (*SLLMSku, error) {
|
||||
return sku.(*SLLMSku), nil
|
||||
}
|
||||
|
||||
func (llm *SLLM) ValidateUpdateData(ctx context.Context, userCred mcclient.TokenCredential, query jsonutils.JSONObject, input api.LLMUpdateInput) (api.LLMUpdateInput, error) {
|
||||
var err error
|
||||
input.VirtualResourceBaseUpdateInput, err = llm.SLLMBase.SVirtualResourceBase.ValidateUpdateData(ctx, userCred, query, input.VirtualResourceBaseUpdateInput)
|
||||
if err != nil {
|
||||
return input, errors.Wrap(err, "validate VirtualResourceBaseUpdateInput")
|
||||
}
|
||||
|
||||
if input.LLMSpec == nil {
|
||||
return input, nil
|
||||
}
|
||||
sku, err := llm.GetLLMSku(llm.LLMSkuId)
|
||||
if err != nil {
|
||||
return input, errors.Wrap(err, "fetch LLMSku")
|
||||
}
|
||||
drv := sku.GetLLMContainerDriver()
|
||||
spec, err := drv.ValidateLLMUpdateSpec(ctx, userCred, llm, input.LLMSpec)
|
||||
if err != nil {
|
||||
return input, errors.Wrap(err, "validate LLM update spec")
|
||||
}
|
||||
input.LLMSpec = spec
|
||||
return input, nil
|
||||
}
|
||||
|
||||
func (llm *SLLM) GetLargeLanguageModelName(name string) (modelName string, modelTag string, err error) {
|
||||
if name == "" {
|
||||
return "", "", errors.Wrap(errors.ErrInvalidStatus, "model name is empty")
|
||||
|
||||
@@ -43,7 +43,7 @@ func GetLLMBasePodCreateInput(
|
||||
}
|
||||
|
||||
data.VcpuCount = skuBase.Cpu
|
||||
data.VmemSize = skuBase.Memory + 1
|
||||
data.VmemSize = skuBase.Memory
|
||||
// data.Name = input.Name + "-" + seclib.RandomPassword(6)
|
||||
data.Name = input.Name
|
||||
|
||||
|
||||
@@ -93,13 +93,16 @@ type ILLMContainerDriver interface {
|
||||
GetEffectiveSpec(llm *SLLM, sku *SLLMSku) interface{}
|
||||
// GetPrimaryImageId returns the primary image id for this SKU type (e.g. LLMImageId for ollama/vllm, DifyApiImageId for dify).
|
||||
GetPrimaryImageId(sku *SLLMSku) string
|
||||
// ValidateCreateData validates create input and returns the LLMSpec to store. Called by SKU manager after base validation.
|
||||
ValidateCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *llm.LLMSkuCreateInput) (*llm.LLMSkuCreateInput, error)
|
||||
// ValidateUpdateData validates update input, merges with current spec, and returns the LLMSpec to store. Called by SKU when LLMSpec is not nil.
|
||||
ValidateUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *SLLMSku, input *llm.LLMSkuUpdateInput) (*llm.LLMSkuUpdateInput, error)
|
||||
// ValidateLLMSkuCreateData validates create input and returns the LLMSpec to store. Called by SKU manager after base validation.
|
||||
ValidateLLMSkuCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *llm.LLMSkuCreateInput) (*llm.LLMSkuCreateInput, error)
|
||||
// ValidateLLMSkuUpdateData validates update input, merges with current spec, and returns the LLMSpec to store. Called by SKU when LLMSpec is not nil.
|
||||
ValidateLLMSkuUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *SLLMSku, input *llm.LLMSkuUpdateInput) (*llm.LLMSkuUpdateInput, error)
|
||||
|
||||
MatchContainerToUpdate(ctr *computeapi.SContainer, podCtrs []*computeapi.PodContainerCreateInput) (*computeapi.PodContainerCreateInput, error)
|
||||
|
||||
ValidateLLMCreateSpec(ctx context.Context, userCred mcclient.TokenCredential, sku *SLLMSku, input *llm.LLMSpec) (*llm.LLMSpec, error)
|
||||
ValidateLLMUpdateSpec(ctx context.Context, userCred mcclient.TokenCredential, llm *SLLM, input *llm.LLMSpec) (*llm.LLMSpec, error)
|
||||
|
||||
ILLMContainerMCPAgent
|
||||
}
|
||||
|
||||
|
||||
@@ -820,13 +820,31 @@ func (llm *SLLM) EnsureInstantModelsInstalled(ctx context.Context, userCred mccl
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "FetchApps")
|
||||
}
|
||||
if len(mdlIds) == 0 {
|
||||
return nil
|
||||
}
|
||||
instModels := make(map[string]SInstantModel)
|
||||
if err := db.FetchModelObjectsByIds(GetInstantModelManager(), "id", mdlIds, &instModels); err != nil {
|
||||
return errors.Wrap(err, "FetchModelObjectsByIds")
|
||||
}
|
||||
var errs []error
|
||||
for _, mdlId := range mdlIds {
|
||||
if _, ok := mdlMap[mdlId]; ok {
|
||||
// probed
|
||||
// errs = append(errs, errors.Wrap(errors.ErrInvalidStatus, pkg))
|
||||
} else {
|
||||
// not mounted and not probed
|
||||
continue
|
||||
}
|
||||
instModel, ok := instModels[mdlId]
|
||||
if !ok {
|
||||
errs = append(errs, errors.Wrapf(errors.ErrNotFound, "mdlId %s", mdlId))
|
||||
continue
|
||||
}
|
||||
found := false
|
||||
for _, info := range mdlMap {
|
||||
if info.ModelId == instModel.ModelId {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
errs = append(errs, errors.Wrapf(errors.ErrNotFound, "mdlId %s", mdlId))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -201,7 +201,7 @@ func (man *SLLMSkuManager) ValidateCreateData(ctx context.Context, userCred mccl
|
||||
if err != nil {
|
||||
return input, errors.Wrap(err, "get container driver")
|
||||
}
|
||||
input, err = drv.ValidateCreateData(ctx, userCred, input)
|
||||
input, err = drv.ValidateLLMSkuCreateData(ctx, userCred, input)
|
||||
if err != nil {
|
||||
return input, errors.Wrap(err, "validate create input")
|
||||
}
|
||||
@@ -238,7 +238,7 @@ func (sku *SLLMSku) ValidateUpdateData(ctx context.Context, userCred mcclient.To
|
||||
return input, nil
|
||||
}
|
||||
drv := sku.GetLLMContainerDriver()
|
||||
updateInput, err := drv.ValidateUpdateData(ctx, userCred, sku, &input)
|
||||
updateInput, err := drv.ValidateLLMSkuUpdateData(ctx, userCred, sku, &input)
|
||||
if err != nil {
|
||||
return input, errors.Wrap(err, "validate update spec")
|
||||
}
|
||||
|
||||
@@ -94,6 +94,7 @@ func (task *LLMCreateTask) OnLLMRefreshStatusComplete(ctx context.Context, llm *
|
||||
mountedModels, err := llm.FetchMountedModelFullName()
|
||||
if err != nil {
|
||||
task.taskFailed(ctx, llm, errors.Wrap(err, "FetchMountedModelFullName"))
|
||||
return
|
||||
}
|
||||
|
||||
// 创建磁盘
|
||||
|
||||
@@ -59,5 +59,10 @@ func (t *LLMStartTask) OnStartedFailed(ctx context.Context, llm *models.SLLM, er
|
||||
}
|
||||
|
||||
func (t *LLMStartTask) OnStarted(ctx context.Context, llm *models.SLLM, reason jsonutils.JSONObject) {
|
||||
err := llm.GetLLMContainerDriver().StartLLM(ctx, t.GetUserCred(), llm)
|
||||
if err != nil {
|
||||
t.taskFailed(ctx, llm, err.Error())
|
||||
return
|
||||
}
|
||||
t.taskComplete(ctx, llm)
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ func (o *LLMImageShowOptions) Params() (jsonutils.JSONObject, error) {
|
||||
type LLMImageListOptions struct {
|
||||
options.BaseListOptions
|
||||
|
||||
LLMType string `json:"llm_type" choices:"ollama|dify|comfyui" help:"filter by llm type"`
|
||||
LLMType string `json:"llm_type" choices:"ollama|dify|comfyui|vllm" help:"filter by llm type"`
|
||||
}
|
||||
|
||||
func (o *LLMImageListOptions) Params() (jsonutils.JSONObject, error) {
|
||||
@@ -30,7 +30,7 @@ type LLMImageCreateOptions struct {
|
||||
IMAGE_NAME string `json:"image_name"`
|
||||
IMAGE_LABEL string `json:"image_label"`
|
||||
CredentialId string `json:"credential_id"`
|
||||
LLM_TYPE string `json:"llm_type" choices:"ollama|dify|comfyui" help:"llm type: ollama, comfyui or dify"`
|
||||
LLM_TYPE string `json:"llm_type" choices:"ollama|dify|comfyui|vllm" help:"llm type: ollama, comfyui or dify"`
|
||||
}
|
||||
|
||||
func (o *LLMImageCreateOptions) Params() (jsonutils.JSONObject, error) {
|
||||
@@ -44,7 +44,7 @@ type LLMImageUpdateOptions struct {
|
||||
ImageName string `json:"image_name"`
|
||||
ImageLabel string `json:"image_label"`
|
||||
CredentialId string `json:"credential_id"`
|
||||
LlmType string `json:"llm_type" choices:"ollama|dify|vllm|comfyui" help:"llm type: ollama, comfyui or dify"`
|
||||
LlmType string `json:"llm_type" choices:"ollama|dify|vllm|comfyui" help:"llm type: ollama, comfyui, vllm or dify"`
|
||||
}
|
||||
|
||||
func (o *LLMImageUpdateOptions) GetId() string {
|
||||
|
||||
@@ -68,11 +68,12 @@ type LLMBaseCreateOptions struct {
|
||||
type LLMCreateOptions struct {
|
||||
LLMBaseCreateOptions
|
||||
|
||||
LLM_SKU_ID string `help:"llm sku id or name" json:"llm_sku_id"`
|
||||
LLM_SKU_ID string `help:"llm sku id or name" json:"llm_sku_id"`
|
||||
PreferredModel string `help:"vLLM preferred model dir name under models path (e.g. Qwen/Qwen2-7B)" json:"-"`
|
||||
}
|
||||
|
||||
func (o *LLMCreateOptions) Params() (jsonutils.JSONObject, error) {
|
||||
params := jsonutils.Marshal(o)
|
||||
params := jsonutils.Marshal(o).(*jsonutils.JSONDict)
|
||||
|
||||
if len(o.Net) > 0 {
|
||||
nets := make([]*computeapi.NetworkConfig, 0)
|
||||
@@ -83,7 +84,16 @@ func (o *LLMCreateOptions) Params() (jsonutils.JSONObject, error) {
|
||||
}
|
||||
nets = append(nets, net)
|
||||
}
|
||||
params.(*jsonutils.JSONDict).Add(jsonutils.Marshal(nets), "nets")
|
||||
params.Add(jsonutils.Marshal(nets), "nets")
|
||||
}
|
||||
|
||||
if o.PreferredModel != "" {
|
||||
spec := &api.LLMSpec{
|
||||
Ollama: nil,
|
||||
Vllm: &api.LLMSpecVllm{PreferredModel: o.PreferredModel},
|
||||
Dify: nil,
|
||||
}
|
||||
params.Set("llm_spec", jsonutils.Marshal(spec))
|
||||
}
|
||||
|
||||
return params, nil
|
||||
@@ -93,6 +103,32 @@ func (o *LLMCreateOptions) GetCountParam() int {
|
||||
return o.Count
|
||||
}
|
||||
|
||||
type LLMUpdateOptions struct {
|
||||
options.BaseIdOptions
|
||||
|
||||
PreferredModel string `help:"vLLM preferred model dir name under models path (e.g. Qwen/Qwen2-7B); takes effect after pod recreate" json:"-"`
|
||||
}
|
||||
|
||||
func (o *LLMUpdateOptions) GetId() string {
|
||||
return o.ID
|
||||
}
|
||||
|
||||
func (o *LLMUpdateOptions) Params() (jsonutils.JSONObject, error) {
|
||||
dict, err := options.StructToParams(o)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if o.PreferredModel != "" {
|
||||
spec := &api.LLMSpec{
|
||||
Ollama: nil,
|
||||
Vllm: &api.LLMSpecVllm{PreferredModel: o.PreferredModel},
|
||||
Dify: nil,
|
||||
}
|
||||
dict.Set("llm_spec", jsonutils.Marshal(spec))
|
||||
}
|
||||
return dict, nil
|
||||
}
|
||||
|
||||
type LLMDeleteOptions struct {
|
||||
options.BaseIdOptions
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ set -euo pipefail
|
||||
|
||||
if [ $# -ne 1 ]; then
|
||||
echo "用法: $0 <targetRegistry>"
|
||||
echo "例如: $0 crpi-nf3abu98o8qf9y2x.cn-beijing.personal.cr.aliyuncs.com/eikoh"
|
||||
echo "例如: $0 registry.cn-beijing.aliyuncs.com/cloudpods"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
Reference in New Issue
Block a user