diff --git a/cmd/climc/shell/llm/llm.go b/cmd/climc/shell/llm/llm.go index a6989e5f54..cfbf6fa752 100644 --- a/cmd/climc/shell/llm/llm.go +++ b/cmd/climc/shell/llm/llm.go @@ -11,6 +11,7 @@ func init() { cmd.BatchCreate(new(options.LLMCreateOptions)) cmd.List(new(options.LLMListOptions)) cmd.Show(new(options.LLMShowOptions)) + cmd.Update(new(options.LLMUpdateOptions)) cmd.Delete(new(options.LLMDeleteOptions)) // cmd.Perform("change-model", new(options.LLMChangeModelOptions)) cmd.Perform("syncstatus", new(options.LLMIdOptions)) diff --git a/pkg/apis/llm/image.go b/pkg/apis/llm/image.go index cff43c640d..9a2b59c502 100644 --- a/pkg/apis/llm/image.go +++ b/pkg/apis/llm/image.go @@ -19,6 +19,7 @@ const ( var ( LLM_IMAGE_TYPES = sets.NewString( string(LLM_IMAGE_TYPE_OLLAMA), + string(LLM_IMAGE_TYPE_VLLM), string(LLM_IMAGE_TYPE_DIFY), string(LLM_IMAGE_TYPE_COMFYUI), string(LLM_IMAGE_TYPE_OPENCLAW), diff --git a/pkg/apis/llm/llm.go b/pkg/apis/llm/llm.go index cf6fe9bf86..04355c245c 100644 --- a/pkg/apis/llm/llm.go +++ b/pkg/apis/llm/llm.go @@ -82,6 +82,14 @@ type LLMCreateInput struct { LLMSpec *LLMSpec `json:"llm_spec,omitempty"` } +// LLMUpdateInput is the request body for updating an LLM (including llm_spec overrides). +type LLMUpdateInput struct { + apis.VirtualResourceBaseUpdateInput + + InstantModelQuotaGb *int `json:"instant_model_quota_gb,omitempty"` + LLMSpec *LLMSpec `json:"llm_spec,omitempty"` +} + type LLMBaseListInput struct { apis.VirtualResourceListInput apis.EnabledResourceBaseListInput diff --git a/pkg/apis/llm/vllm_const.go b/pkg/apis/llm/vllm_const.go new file mode 100644 index 0000000000..6b02f216ae --- /dev/null +++ b/pkg/apis/llm/vllm_const.go @@ -0,0 +1,88 @@ +package llm + +import "time" + +const ( + LLM_VLLM = "vllm" + LLM_VLLM_DEFAULT_PORT = 8000 + LLM_VLLM_EXEC_PATH = "python3 -m vllm.entrypoints.openai.api_server" + + LLM_VLLM_HF_ENDPOINT = "https://hf-mirror.com" + + // Directory constants + LLM_VLLM_CACHE_DIR = "/root/.cache/huggingface" + LLM_VLLM_BASE_PATH = "/data/models" + LLM_VLLM_MODELS_PATH = "/data/models/huggingface" + + // Health check + LLM_VLLM_HEALTH_CHECK_TIMEOUT = 120 * time.Second // 2 minutes + LLM_VLLM_HEALTH_CHECK_INTERVAL = 10 * time.Second // 10 seconds + + // Default vLLM memory params when Python estimation fails (conservative to avoid OOM) + LLM_VLLM_DEFAULT_GPU_MEMORY_UTIL = 0.9 + LLM_VLLM_DEFAULT_MAX_MODEL_LEN = 2048 + LLM_VLLM_DEFAULT_MAX_NUM_SEQS = 1 + + // Prefixes for parsing resolveModelAndParams output line (KEY=value) + LLM_VLLM_RESOLVE_OUTPUT_PREFIX_GPU_UTIL = "GPU_MEMORY_UTIL=" + LLM_VLLM_RESOLVE_OUTPUT_PREFIX_MAX_LEN = "MAX_MODEL_LEN=" + LLM_VLLM_RESOLVE_OUTPUT_PREFIX_MAX_NUM_SEQ = "MAX_NUM_SEQS=" +) + +const ( + + // vllmEstimateParamsScript is a Python script run inside the container to estimate + // --gpu-memory-utilization, --max-model-len, and --max-num-seqs from GPU memory and model config. + // Args: sys.argv[1]=model path, sys.argv[2]=tensor_parallel_size. + // Prints one line: GPU_MEMORY_UTIL=0.9 MAX_MODEL_LEN=2624 MAX_NUM_SEQS=1 (eval-safe). + LLM_VLLM_ESTIMATE_PARAMS_SCRIPT = ` +import sys, json, os +model_path = sys.argv[1] if len(sys.argv) > 1 else "" +tp = int(sys.argv[2]) if len(sys.argv) > 2 else 1 +if not model_path or not os.path.isdir(model_path): + sys.exit(1) +config_path = os.path.join(model_path, "config.json") +if not os.path.isfile(config_path): + sys.exit(1) +with open(config_path) as f: + config = json.load(f) +def get_nested(d, *keys): + for k in keys: + d = d.get(k) if isinstance(d, dict) else None + if d is None: + return None + return d +num_layers = config.get("num_hidden_layers") or config.get("n_layer") or get_nested(config, "text_config", "num_hidden_layers") or 0 +num_heads = config.get("num_attention_heads") or config.get("n_head") or get_nested(config, "text_config", "num_attention_heads") or 0 +num_kv_heads = config.get("num_key_value_heads") or get_nested(config, "text_config", "num_key_value_heads") or num_heads +hidden_size = config.get("hidden_size") or config.get("n_embd") or get_nested(config, "text_config", "hidden_size") or 0 +head_dim = hidden_size // num_heads if num_heads else (hidden_size // 64) +max_pos = config.get("max_position_embeddings") or config.get("n_positions") or get_nested(config, "text_config", "max_position_embeddings") or 4096 +if not num_layers or not num_kv_heads or not head_dim: + sys.exit(1) +try: + import torch + total_mem = sum(torch.cuda.get_device_properties(i).total_memory for i in range(torch.cuda.device_count())) +except Exception: + sys.exit(1) +gpu_util = 0.9 +activation_overhead = 2 * (1024**3) +num_params = config.get("num_parameters") or config.get("num_params") or get_nested(config, "text_config", "num_parameters") +if num_params is not None: + model_bytes = num_params * 2 +else: + model_bytes = 12 * (1024**3) +available_kv = total_mem * gpu_util - model_bytes - activation_overhead +if available_kv <= 0: + available_kv = total_mem * 0.5 +kv_per_token = num_layers * 2 * num_kv_heads * head_dim * 2 +max_num_seqs = 4 +max_model_len = int(available_kv / (kv_per_token * max_num_seqs)) +max_model_len = max(1, min(max_model_len, max_pos)) +if max_model_len < 256: + max_num_seqs = 1 + max_model_len = int(available_kv / (kv_per_token * max_num_seqs)) + max_model_len = max(1, min(max_model_len, max_pos)) +print("GPU_MEMORY_UTIL=%s MAX_MODEL_LEN=%d MAX_NUM_SEQS=%d" % (gpu_util, max_model_len, max_num_seqs)) +` +) diff --git a/pkg/llm/drivers/llm_container/base_driver.go b/pkg/llm/drivers/llm_container/base_driver.go index e3d185ecfc..3c2e1fa68c 100644 --- a/pkg/llm/drivers/llm_container/base_driver.go +++ b/pkg/llm/drivers/llm_container/base_driver.go @@ -41,7 +41,7 @@ func (b *baseDriver) StartLLM(ctx context.Context, userCred mcclient.TokenCreden return nil } -func (b *baseDriver) ValidateCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *api.LLMSkuCreateInput) (*api.LLMSkuCreateInput, error) { +func (b *baseDriver) ValidateLLMSkuCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *api.LLMSkuCreateInput) (*api.LLMSkuCreateInput, error) { imgObj, err := validators.ValidateModel(ctx, userCred, models.GetLLMImageManager(), &input.LLMImageId) if err != nil { return nil, errors.Wrapf(err, "validate image_id %s", input.LLMImageId) @@ -67,7 +67,7 @@ func (b *baseDriver) ValidateCreateData(ctx context.Context, userCred mcclient.T return input, nil } -func (b *baseDriver) ValidateUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSkuUpdateInput) (*api.LLMSkuUpdateInput, error) { +func (b *baseDriver) ValidateLLMSkuUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSkuUpdateInput) (*api.LLMSkuUpdateInput, error) { llmImageId := input.LLMImageId if llmImageId != "" { imgObj, err := validators.ValidateModel(ctx, userCred, models.GetLLMImageManager(), &llmImageId) @@ -111,5 +111,16 @@ func MatchContainerToUpdateByName(ctr *computeapi.SContainer, podCtrs []*compute } func (b *baseDriver) MatchContainerToUpdate(ctr *computeapi.SContainer, podCtrs []*computeapi.PodContainerCreateInput) (*computeapi.PodContainerCreateInput, error) { + if len(podCtrs) == 1 { + return podCtrs[0], nil + } return MatchContainerToUpdateByName(ctr, podCtrs) } + +func (b *baseDriver) ValidateLLMCreateSpec(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSpec) (*api.LLMSpec, error) { + return input, nil +} + +func (b *baseDriver) ValidateLLMUpdateSpec(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, input *api.LLMSpec) (*api.LLMSpec, error) { + return input, nil +} diff --git a/pkg/llm/drivers/llm_container/dify.go b/pkg/llm/drivers/llm_container/dify.go index 00bbace1a1..eaf0a57f47 100644 --- a/pkg/llm/drivers/llm_container/dify.go +++ b/pkg/llm/drivers/llm_container/dify.go @@ -37,6 +37,36 @@ func (d *dify) GetSpec(sku *models.SLLMSku) interface{} { return sku.LLMSpec.Dify } +// mergeDifySpecInto merges src into dst. If fillEmpty is true, only fills dst when dst is empty; otherwise overwrites dst when src is non-empty. +func mergeDifySpecInto(dst, src *api.LLMSpecDify, fillEmpty bool) { + if dst == nil || src == nil { + return + } + mergeStr := func(dstPtr *string, srcVal string) { + if fillEmpty { + if *dstPtr == "" && srcVal != "" { + *dstPtr = srcVal + } + } else { + if srcVal != "" { + *dstPtr = srcVal + } + } + } + mergeStr(&dst.PostgresImageId, src.PostgresImageId) + mergeStr(&dst.RedisImageId, src.RedisImageId) + mergeStr(&dst.NginxImageId, src.NginxImageId) + mergeStr(&dst.DifyApiImageId, src.DifyApiImageId) + mergeStr(&dst.DifyPluginImageId, src.DifyPluginImageId) + mergeStr(&dst.DifyWebImageId, src.DifyWebImageId) + mergeStr(&dst.DifySandboxImageId, src.DifySandboxImageId) + mergeStr(&dst.DifySSRFImageId, src.DifySSRFImageId) + mergeStr(&dst.DifyWeaviateImageId, src.DifyWeaviateImageId) + if (fillEmpty && len(dst.CustomizedEnvs) == 0 && len(src.CustomizedEnvs) > 0) || (!fillEmpty && len(src.CustomizedEnvs) > 0) { + dst.CustomizedEnvs = src.CustomizedEnvs + } +} + // mergeDify merges llm and sku Dify specs; llm takes priority, use sku when llm is nil or zero. func mergeDify(llm, sku *api.LLMSpecDify) *api.LLMSpecDify { if llm != nil && !llm.IsZero() { @@ -88,14 +118,60 @@ func (d *dify) GetPrimaryContainer(ctx context.Context, llm *models.SLLM, contai return nil, errors.Error("api container not found") } -func (d *dify) ValidateCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *api.LLMSkuCreateInput) (*api.LLMSkuCreateInput, error) { +func (d *dify) ValidateLLMSkuCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *api.LLMSkuCreateInput) (*api.LLMSkuCreateInput, error) { if input.LLMSpec == nil || input.LLMSpec.Dify == nil { return nil, errors.Wrap(httperrors.ErrInputParameter, "dify SKU requires llm_spec with type dify and image ids") } if input.MountedModels != nil { return nil, errors.Wrap(httperrors.ErrInputParameter, "dify SKU does not support mounted models") } - difySpec := input.LLMSpec.Dify + + // Reuse ValidateLLMCreateSpec to normalize/validate LLMSpec. + spec, err := d.ValidateLLMCreateSpec(ctx, userCred, nil, input.LLMSpec) + if err != nil { + return nil, err + } + input.LLMSpec = spec + if input.LLMSpec != nil && input.LLMSpec.Dify != nil { + input.LLMImageId = input.LLMSpec.Dify.DifyApiImageId + } + return input, nil +} + +func (d *dify) ValidateLLMSkuUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSkuUpdateInput) (*api.LLMSkuUpdateInput, error) { + if input.MountedModels != nil { + return nil, errors.Wrap(httperrors.ErrInputParameter, "dify SKU does not support mounted models") + } + if input.LLMSpec == nil { + return input, nil + } + + // Reuse ValidateLLMUpdateSpec by treating current SKU spec as the \"current llm spec\". + fakeLLM := &models.SLLM{LLMSpec: sku.LLMSpec} + spec, err := d.ValidateLLMUpdateSpec(ctx, userCred, fakeLLM, input.LLMSpec) + if err != nil { + return nil, err + } + input.LLMSpec = spec + + // if dify_api_image_id is set, use it as the primary image id + if input.LLMSpec != nil && input.LLMSpec.Dify != nil && input.LLMSpec.Dify.DifyApiImageId != "" { + input.LLMImageId = input.LLMSpec.Dify.DifyApiImageId + } + return input, nil +} + +// ValidateLLMCreateSpec implements ILLMContainerDriver. Validates image ids and merges empty fields from SKU spec. +func (d *dify) ValidateLLMCreateSpec(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSpec) (*api.LLMSpec, error) { + if input == nil || input.Dify == nil { + return input, nil + } + difySpec := input.Dify + // Merge empty fields from SKU so the stored spec is complete + if sku != nil && sku.LLMSpec != nil && sku.LLMSpec.Dify != nil { + mergeDifySpecInto(difySpec, sku.LLMSpec.Dify, true) + } + // Validate non-empty image ids for _, imgId := range []*string{&difySpec.PostgresImageId, &difySpec.RedisImageId, &difySpec.NginxImageId, &difySpec.DifyApiImageId, &difySpec.DifyPluginImageId, &difySpec.DifyWebImageId, &difySpec.DifySandboxImageId, &difySpec.DifySSRFImageId, &difySpec.DifyWeaviateImageId} { if *imgId == "" { continue @@ -105,63 +181,68 @@ func (d *dify) ValidateCreateData(ctx context.Context, userCred mcclient.TokenCr return nil, errors.Wrapf(err, "validate image_id %s", *imgId) } img := imgObj.(*models.SLLMImage) - if img.LLMType != input.LLMType { - return nil, errors.Wrapf(httperrors.ErrInvalidStatus, "image %s is not of type %s", *imgId, input.LLMType) + if img.LLMType != string(api.LLM_CONTAINER_DIFY) { + return nil, errors.Wrapf(httperrors.ErrInvalidStatus, "image %s is not of type dify", *imgId) } *imgId = img.Id } - input.LLMImageId = difySpec.DifyApiImageId - return input, nil + return &api.LLMSpec{Dify: difySpec}, nil } -func (d *dify) ValidateUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSkuUpdateInput) (*api.LLMSkuUpdateInput, error) { - if input.MountedModels != nil { - return nil, errors.Wrap(httperrors.ErrInputParameter, "dify SKU does not support mounted models") +// ValidateLLMUpdateSpec implements ILLMContainerDriver. Merges input with current LLM spec (non-empty overwrites); validates image ids. +func (d *dify) ValidateLLMUpdateSpec(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, input *api.LLMSpec) (*api.LLMSpec, error) { + if input == nil || input.Dify == nil { + return input, nil } - if input.LLMSpec == nil || input.LLMSpec.Dify == nil { - return nil, nil - } - currentSpec := d.GetSpec(sku) - if currentSpec == nil { - return nil, nil - } - updated := *currentSpec.(*api.LLMSpecDify) - difySpec := input.LLMSpec.Dify - mergeStr := func(dst *string, src string) { - if src != "" { - *dst = src + // Start from current LLM spec, or SKU spec as base + var base *api.LLMSpecDify + if llm != nil && llm.LLMSpec != nil && llm.LLMSpec.Dify != nil { + b := *llm.LLMSpec.Dify + base = &b + if llm.LLMSpec.Dify.CustomizedEnvs != nil { + base.CustomizedEnvs = make([]*api.DifyCustomizedEnv, len(llm.LLMSpec.Dify.CustomizedEnvs)) + copy(base.CustomizedEnvs, llm.LLMSpec.Dify.CustomizedEnvs) + } + } else if llm != nil { + sku, err := llm.GetLLMSku(llm.LLMSkuId) + if err == nil && sku != nil && sku.LLMSpec != nil && sku.LLMSpec.Dify != nil { + b := *sku.LLMSpec.Dify + base = &b + if sku.LLMSpec.Dify.CustomizedEnvs != nil { + base.CustomizedEnvs = make([]*api.DifyCustomizedEnv, len(sku.LLMSpec.Dify.CustomizedEnvs)) + copy(base.CustomizedEnvs, sku.LLMSpec.Dify.CustomizedEnvs) + } } } - mergeStr(&updated.PostgresImageId, difySpec.PostgresImageId) - mergeStr(&updated.RedisImageId, difySpec.RedisImageId) - mergeStr(&updated.NginxImageId, difySpec.NginxImageId) - mergeStr(&updated.DifyApiImageId, difySpec.DifyApiImageId) - mergeStr(&updated.DifyPluginImageId, difySpec.DifyPluginImageId) - mergeStr(&updated.DifyWebImageId, difySpec.DifyWebImageId) - mergeStr(&updated.DifySandboxImageId, difySpec.DifySandboxImageId) - mergeStr(&updated.DifySSRFImageId, difySpec.DifySSRFImageId) - mergeStr(&updated.DifyWeaviateImageId, difySpec.DifyWeaviateImageId) - if len(difySpec.CustomizedEnvs) > 0 { - updated.CustomizedEnvs = difySpec.CustomizedEnvs + if base == nil { + base = &api.LLMSpecDify{} } - for _, imgId := range []*string{&updated.PostgresImageId, &updated.RedisImageId, &updated.NginxImageId, &updated.DifyApiImageId, &updated.DifyPluginImageId, &updated.DifyWebImageId, &updated.DifySandboxImageId, &updated.DifySSRFImageId, &updated.DifyWeaviateImageId} { - if *imgId != "" { - imgObj, err := validators.ValidateModel(ctx, userCred, models.GetLLMImageManager(), imgId) - if err != nil { - return nil, errors.Wrapf(err, "validate image_id %s", *imgId) - } - img := imgObj.(*models.SLLMImage) - if img.LLMType != sku.LLMType { - return nil, errors.Wrapf(httperrors.ErrInvalidStatus, "image %s is not of type %s", *imgId, sku.LLMType) - } - *imgId = img.GetId() + mergeDifySpecInto(base, input.Dify, false) + // Validate non-empty image ids + for _, imgId := range []*string{&base.PostgresImageId, &base.RedisImageId, &base.NginxImageId, &base.DifyApiImageId, &base.DifyPluginImageId, &base.DifyWebImageId, &base.DifySandboxImageId, &base.DifySSRFImageId, &base.DifyWeaviateImageId} { + if *imgId == "" { + continue } + imgObj, err := validators.ValidateModel(ctx, userCred, models.GetLLMImageManager(), imgId) + if err != nil { + return nil, errors.Wrapf(err, "validate image_id %s", *imgId) + } + img := imgObj.(*models.SLLMImage) + if img.LLMType != string(api.LLM_CONTAINER_DIFY) { + return nil, errors.Wrapf(httperrors.ErrInvalidStatus, "image %s is not of type dify", *imgId) + } + *imgId = img.Id } - // if dify_api_image_id is set, use it as the primary image id - if input.LLMSpec.Dify.DifyApiImageId != "" { - input.LLMImageId = input.LLMSpec.Dify.DifyApiImageId + return &api.LLMSpec{Dify: base}, nil +} + +// GetContainerSpec is required by ILLMContainerDriver but not used for Dify; pod creation uses GetContainerSpecs. Return the first container so the interface is satisfied. +func (d *dify) GetContainerSpec(ctx context.Context, llm *models.SLLM, image *models.SLLMImage, sku *models.SLLMSku, props []string, devices []computeapi.SIsolatedDevice, diskId string) *computeapi.PodContainerCreateInput { + specs := d.GetContainerSpecs(ctx, llm, image, sku, props, devices, diskId) + if len(specs) == 0 { + return nil } - return input, nil + return specs[0] } // GetContainerSpecs returns all Dify pod containers (postgres, redis, api, worker, nginx, etc.). Uses effective spec (llm + sku merged by driver). diff --git a/pkg/llm/drivers/llm_container/vllm.go b/pkg/llm/drivers/llm_container/vllm.go new file mode 100644 index 0000000000..6297165dd2 --- /dev/null +++ b/pkg/llm/drivers/llm_container/vllm.go @@ -0,0 +1,578 @@ +package llm_container + +import ( + "context" + "fmt" + "net/http" + "path" + "strconv" + "strings" + "time" + + "yunion.io/x/log" + "yunion.io/x/pkg/errors" + + commonapi "yunion.io/x/onecloud/pkg/apis" + computeapi "yunion.io/x/onecloud/pkg/apis/compute" + api "yunion.io/x/onecloud/pkg/apis/llm" + "yunion.io/x/onecloud/pkg/llm/models" + "yunion.io/x/onecloud/pkg/mcclient" +) + +func init() { + models.RegisterLLMContainerDriver(newVLLM()) +} + +type vllm struct { + baseDriver +} + +func newVLLM() models.ILLMContainerDriver { + return &vllm{baseDriver: newBaseDriver(api.LLM_CONTAINER_VLLM)} +} + +// escapeShellSingleQuoted escapes s for use inside a single-quoted shell string (each ' becomes '\”). +func escapeShellSingleQuoted(s string) string { + return strings.ReplaceAll(s, "'", "'\\''") +} + +func (v *vllm) GetSpec(sku *models.SLLMSku) interface{} { + if sku == nil || sku.LLMType != string(api.LLM_CONTAINER_VLLM) || sku.LLMSpec == nil || sku.LLMSpec.Vllm == nil { + return nil + } + return sku.LLMSpec.Vllm +} + +func (v *vllm) GetEffectiveSpec(llm *models.SLLM, sku *models.SLLMSku) interface{} { + var skuSpec *api.LLMSpecVllm + if s := v.GetSpec(sku); s != nil { + skuSpec = s.(*api.LLMSpecVllm) + } + if llm != nil && llm.LLMSpec != nil && llm.LLMSpec.Vllm != nil { + if llm.LLMSpec.Vllm.PreferredModel != "" { + out := *llm.LLMSpec.Vllm + return &out + } + // llm explicitly present but empty -> fall back to sku default + } + if skuSpec != nil { + out := *skuSpec + return &out + } + return nil +} + +func (v *vllm) ValidateLLMSkuCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *api.LLMSkuCreateInput) (*api.LLMSkuCreateInput, error) { + input, err := v.baseDriver.ValidateLLMSkuCreateData(ctx, userCred, input) + if err != nil { + return nil, err + } + + // Reuse ValidateLLMCreateSpec; ensure llm_spec.vllm always exists for vLLM SKU. + spec, err := v.ValidateLLMCreateSpec(ctx, userCred, nil, input.LLMSpec) + if err != nil { + return nil, err + } + if spec == nil { + spec = &api.LLMSpec{Vllm: &api.LLMSpecVllm{}} + } else if spec.Vllm == nil { + spec.Vllm = &api.LLMSpecVllm{} + } + input.LLMSpec = spec + return input, nil +} + +func (v *vllm) ValidateLLMSkuUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSkuUpdateInput) (*api.LLMSkuUpdateInput, error) { + input, err := v.baseDriver.ValidateLLMSkuUpdateData(ctx, userCred, sku, input) + if err != nil { + return nil, err + } + if input.LLMSpec == nil { + return input, nil + } + + // Reuse ValidateLLMUpdateSpec by treating current SKU spec as the "current llm spec". + fakeLLM := &models.SLLM{LLMSpec: sku.LLMSpec} + spec, err := v.ValidateLLMUpdateSpec(ctx, userCred, fakeLLM, input.LLMSpec) + if err != nil { + return nil, err + } + input.LLMSpec = spec + if input.LLMSpec != nil && input.LLMSpec.Vllm == nil { + input.LLMSpec.Vllm = &api.LLMSpecVllm{} + } + return input, nil +} + +// ValidateLLMCreateSpec implements ILLMContainerDriver. Merges preferred_model from SKU when input's is empty. +func (v *vllm) ValidateLLMCreateSpec(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSpec) (*api.LLMSpec, error) { + if input == nil { + return nil, nil + } + preferred := "" + if input.Vllm != nil { + preferred = input.Vllm.PreferredModel + } + if preferred == "" && sku != nil && sku.LLMSpec != nil && sku.LLMSpec.Vllm != nil { + preferred = sku.LLMSpec.Vllm.PreferredModel + } + return &api.LLMSpec{Vllm: &api.LLMSpecVllm{PreferredModel: preferred}}, nil +} + +// ValidateLLMUpdateSpec implements ILLMContainerDriver. Merges preferred_model with current LLM spec; only overwrite when non-empty. +func (v *vllm) ValidateLLMUpdateSpec(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, input *api.LLMSpec) (*api.LLMSpec, error) { + if input == nil || input.Vllm == nil { + return input, nil + } + current := "" + if llm != nil && llm.LLMSpec != nil && llm.LLMSpec.Vllm != nil { + current = llm.LLMSpec.Vllm.PreferredModel + } + preferred := input.Vllm.PreferredModel + if preferred == "" { + preferred = current + } + return &api.LLMSpec{Vllm: &api.LLMSpecVllm{PreferredModel: preferred}}, nil +} + +func (v *vllm) GetContainerSpec(ctx context.Context, llm *models.SLLM, image *models.SLLMImage, sku *models.SLLMSku, props []string, devices []computeapi.SIsolatedDevice, diskId string) *computeapi.PodContainerCreateInput { + // Container entrypoint only keeps the container alive; vLLM is started by StartLLM via exec. + startScript := `mkdir -p ` + api.LLM_VLLM_MODELS_PATH + ` && exec sleep infinity` + envs := []*commonapi.ContainerKeyValue{ + { + Key: "HUGGING_FACE_HUB_CACHE", + Value: api.LLM_VLLM_CACHE_DIR, + }, + { + Key: "HF_ENDPOINT", + Value: api.LLM_VLLM_HF_ENDPOINT, + }, + // // Fix Error 803 + // { + // Key: "LD_LIBRARY_PATH", + // Value: "/lib64:/usr/local/cuda/lib64:/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}", + // }, + // // Fix Error 803 + // { + // Key: "LD_PRELOAD", + // Value: "/lib/libcuda.so.1 /lib/libnvidia-ptxjitcompiler.so.1 /lib/libnvidia-gpucomp.so", + // }, + } + spec := computeapi.ContainerSpec{ + ContainerSpec: commonapi.ContainerSpec{ + Image: image.ToContainerImage(), + ImageCredentialId: image.CredentialId, + Command: []string{"/bin/sh", "-c"}, + Args: []string{startScript}, + EnableLxcfs: true, + AlwaysRestart: true, + Envs: envs, + }, + } + + // GPU Devices + if len(devices) == 0 && (sku.Devices != nil && len(*sku.Devices) > 0) { + for i := range *sku.Devices { + index := i + spec.Devices = append(spec.Devices, &computeapi.ContainerDevice{ + Type: commonapi.CONTAINER_DEVICE_TYPE_ISOLATED_DEVICE, + IsolatedDevice: &computeapi.ContainerIsolatedDevice{ + Index: &index, + }, + }) + } + } else if len(devices) > 0 { + for i := range devices { + spec.Devices = append(spec.Devices, &computeapi.ContainerDevice{ + Type: commonapi.CONTAINER_DEVICE_TYPE_ISOLATED_DEVICE, + IsolatedDevice: &computeapi.ContainerIsolatedDevice{ + Id: devices[i].Id, + }, + }) + } + } + + // Volume Mounts + diskIndex := 0 + postOverlays, err := llm.GetMountedModelsPostOverlay() + if err != nil { + log.Errorf("GetMountedModelsPostOverlay failed %s", err) + } + ctrVols := []*commonapi.ContainerVolumeMount{ + { + Disk: &commonapi.ContainerVolumeMountDisk{ + SubDirectory: api.LLM_VLLM, + Index: &diskIndex, + PostOverlay: postOverlays, + }, + Type: commonapi.CONTAINER_VOLUME_MOUNT_TYPE_DISK, + MountPath: api.LLM_VLLM_BASE_PATH, + ReadOnly: false, + Propagation: commonapi.MOUNTPROPAGATION_PROPAGATION_HOST_TO_CONTAINER, + }, + { + // Mount cache dir to save HF cache + Disk: &commonapi.ContainerVolumeMountDisk{ + SubDirectory: "cache", + Index: &diskIndex, + }, + Type: commonapi.CONTAINER_VOLUME_MOUNT_TYPE_DISK, + MountPath: "/root/.cache", + ReadOnly: false, + }, + } + spec.VolumeMounts = append(spec.VolumeMounts, ctrVols...) + + return &computeapi.PodContainerCreateInput{ + ContainerSpec: spec, + } +} + +func (v *vllm) GetContainerSpecs(ctx context.Context, llm *models.SLLM, image *models.SLLMImage, sku *models.SLLMSku, props []string, devices []computeapi.SIsolatedDevice, diskId string) []*computeapi.PodContainerCreateInput { + return []*computeapi.PodContainerCreateInput{ + v.GetContainerSpec(ctx, llm, image, sku, props, devices, diskId), + } +} + +func (v *vllm) GetLLMUrl(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM) (string, error) { + // Similar logic to Ollama to determine URL + server, err := llm.GetServer(ctx) + if err != nil { + return "", errors.Wrap(err, "get server") + } + + networkType := llm.NetworkType + if networkType == string(computeapi.NETWORK_TYPE_GUEST) { + if len(llm.LLMIp) == 0 { + return "", errors.Error("LLM IP is empty for guest network") + } + return fmt.Sprintf("http://%s:%d", llm.LLMIp, api.LLM_VLLM_DEFAULT_PORT), nil + } else { + // hostlocal + if len(server.HostAccessIp) == 0 { + return "", errors.Error("host access IP is empty") + } + // Assuming we might map ports or just use the default if host networking isn't strictly port-mapped per instance + // For simplicity, returning default port on host IP, assuming bridge/direct access or specific port mapping logic exists elsewhere. + // NOTE: In ollama.go, it queries AccessInfo. Here we simplify. + return fmt.Sprintf("http://%s:%d", server.HostAccessIp, api.LLM_VLLM_DEFAULT_PORT), nil + } +} + +// StartLLM starts the vLLM server inside the container via exec, then waits for the health endpoint to be ready. +func (v *vllm) StartLLM(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM) error { + lc, err := llm.GetLLMContainer() + if err != nil { + return errors.Wrap(err, "get llm container") + } + sku, err := llm.GetLLMSku(llm.LLMSkuId) + if err != nil { + return errors.Wrap(err, "get llm sku") + } + tensorParallelSize := 1 + if sku.Devices != nil && len(*sku.Devices) > 0 { + tensorParallelSize = len(*sku.Devices) + } + swapSpaceGiB := (sku.Memory * 1) / (2 * 1024) + if swapSpaceGiB < 1 { + swapSpaceGiB = 1 + } + + preferredPath := "" + if eff := v.GetEffectiveSpec(llm, sku); eff != nil { + if preferred := eff.(*api.LLMSpecVllm).PreferredModel; preferred != "" { + preferredPath = path.Join(api.LLM_VLLM_MODELS_PATH, preferred) + } + } + resolved, err := v.resolveModelAndParams(ctx, lc.CmpId, preferredPath, tensorParallelSize) + if err != nil { + return err + } + if resolved == nil { + return nil // no model + } + + modelEscaped := escapeShellSingleQuoted(resolved.ModelPath) + startCmd := fmt.Sprintf( + `nohup %s --model '%s' --served-model-name "$(basename '%s')" --port %d \ + --tensor-parallel-size %d --swap-space %d --enable-prefix-caching \ + --gpu-memory-utilization %s --max-model-len %d --max-num-seqs %d \ + > /tmp/vllm.log 2>&1 &`, + api.LLM_VLLM_EXEC_PATH, + modelEscaped, + modelEscaped, + api.LLM_VLLM_DEFAULT_PORT, + tensorParallelSize, + swapSpaceGiB, + resolved.GpuUtil, + resolved.MaxModelLen, + resolved.MaxNumSeqs, + ) + _, err = exec(ctx, lc.CmpId, startCmd, 30) + if err != nil { + log.Errorf("vLLM start failed, exec command: %s", startCmd) + return errors.Wrapf(err, "exec start vLLM, command: %s", startCmd) + } + cmd := startCmd + // Wait for health endpoint + baseURL, err := v.GetLLMUrl(ctx, userCred, llm) + if err != nil { + return errors.Wrap(err, "get llm url for health check") + } + healthURL := strings.TrimSuffix(baseURL, "/") + "/health" + deadline := time.Now().Add(api.LLM_VLLM_HEALTH_CHECK_TIMEOUT) + for time.Now().Before(deadline) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, healthURL, nil) + if err != nil { + return errors.Wrap(err, "new health check request") + } + resp, err := http.DefaultClient.Do(req) + if err == nil { + resp.Body.Close() + if resp.StatusCode == http.StatusOK { + return nil + } + } + select { + case <-ctx.Done(): + return errors.Wrap(ctx.Err(), "context done while waiting for vLLM") + case <-time.After(api.LLM_VLLM_HEALTH_CHECK_INTERVAL): + // continue + } + } + // Optionally read last lines of /tmp/vllm.log for better error message + logTail, _ := exec(ctx, lc.CmpId, "tail -n 20 /tmp/vllm.log 2>/dev/null || true", 5) + if logTail != "" { + return errors.Errorf("vLLM health check timeout after %v, exec command: %s, last log: %s", api.LLM_VLLM_HEALTH_CHECK_TIMEOUT, cmd, strings.TrimSpace(logTail)) + } + return errors.Errorf("vLLM health check timeout after %v, exec command: %s", api.LLM_VLLM_HEALTH_CHECK_TIMEOUT, cmd) +} + +// ILLMContainerInstantApp implementation + +func (v *vllm) GetProbedInstantModelsExt(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, mdlIds ...string) (map[string]api.LLMInternalInstantMdlInfo, error) { + lc, err := llm.GetLLMContainer() + if err != nil { + return nil, errors.Wrap(err, "get llm container") + } + + // List directories in models path + cmd := fmt.Sprintf("du -sk %s/*/", api.LLM_VLLM_MODELS_PATH) + output, err := exec(ctx, lc.CmpId, cmd, 10) + if err != nil { + // If ls fails, maybe no directory yet, return empty + return make(map[string]api.LLMInternalInstantMdlInfo), nil + } + + modelsMap := make(map[string]api.LLMInternalInstantMdlInfo) + lines := strings.Split(strings.TrimSpace(output), "\n") + for _, line := range lines { + fields := strings.Fields(line) + if len(fields) < 2 { + continue + } + // Size is in KB + sizeKB, _ := strconv.ParseInt(fields[0], 10, 64) + fullPath := fields[1] + name := path.Base(fullPath) + if name == "" { + continue + } + // We treat the directory name as the model name + // For vLLM, name usually implies "organization/model" if downloaded from HF, but here we just list local dirs + modelsMap[name] = api.LLMInternalInstantMdlInfo{ + Name: name, + ModelId: name, + Tag: "latest", + Size: sizeKB * 1024, + } + } + return modelsMap, nil +} + +func (v *vllm) DetectModelPaths(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, pkgInfo api.LLMInternalInstantMdlInfo) ([]string, error) { + lc, err := llm.GetLLMContainer() + if err != nil { + return nil, errors.Wrap(err, "get llm container") + } + + modelPath := path.Join(api.LLM_VLLM_MODELS_PATH, pkgInfo.Name) + checkCmd := fmt.Sprintf("[ -d '%s' ] && echo 'EXIST' || echo 'MISSING'", modelPath) + output, err := exec(ctx, lc.CmpId, checkCmd, 10) + if err != nil { + return nil, errors.Wrap(err, "failed to check file existence") + } + + if !strings.Contains(output, "EXIST") { + return nil, errors.Errorf("model directory %s missing", modelPath) + } + + return []string{modelPath}, nil +} + +func (v *vllm) GetImageInternalPathMounts(sApp *models.SInstantModel) map[string]string { + // Map host paths to container paths + // For vLLM simple volume mount, this might be 1:1 or based on the base path + res := make(map[string]string) + for _, mount := range sApp.Mounts { + relPath := strings.TrimPrefix(mount, api.LLM_VLLM_BASE_PATH) + res[relPath] = path.Join(api.LLM_VLLM, relPath) + } + return res +} + +func (v *vllm) GetSaveDirectories(sApp *models.SInstantModel) (string, []string, error) { + var filteredMounts []string + for _, mount := range sApp.Mounts { + if strings.HasPrefix(mount, api.LLM_VLLM_BASE_PATH) { + relPath := strings.TrimPrefix(mount, api.LLM_VLLM_BASE_PATH) + filteredMounts = append(filteredMounts, relPath) + } + } + return "", filteredMounts, nil +} + +func (v *vllm) ValidateMounts(mounts []string, mdlName string, mdlTag string) ([]string, error) { + return mounts, nil +} + +func (v *vllm) CheckDuplicateMounts(errStr string, dupIndex int) string { + return "Duplicate mounts detected" +} + +func (v *vllm) GetInstantModelIdByPostOverlay(postOverlay *commonapi.ContainerVolumeMountDiskPostOverlay, mdlNameToId map[string]string) string { + return "" +} + +func (v *vllm) GetDirPostOverlay(dir api.LLMMountDirInfo) *commonapi.ContainerVolumeMountDiskPostOverlay { + uid := int64(1000) + gid := int64(1000) + ov := dir.ToOverlay() + ov.FsUser = &uid + ov.FsGroup = &gid + return &ov +} + +func (v *vllm) PreInstallModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, instMdl *models.SLLMInstantModel) error { + lc, err := llm.GetLLMContainer() + if err != nil { + return errors.Wrap(err, "get llm container") + } + // Create base directory + cmd := fmt.Sprintf("mkdir -p %s", api.LLM_VLLM_MODELS_PATH) + _, err = exec(ctx, lc.CmpId, cmd, 10) + return err +} + +func (v *vllm) InstallModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, dirs []string, mdlIds []string) error { + return nil +} + +func (v *vllm) UninstallModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, instMdl *models.SLLMInstantModel) error { + // Optionally remove the model directory + // For safety, we might just log or leave it + return nil +} + +func (v *vllm) DownloadModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, tmpDir string, modelName string, modelTag string) (string, []string, error) { + lc, err := llm.GetLLMContainer() + if err != nil { + return "", nil, errors.Wrap(err, "get llm container") + } + + // Logic to download model inside the container + // modelName is expected to be like "facebook/opt-125m" + targetDir := path.Join(api.LLM_VLLM_MODELS_PATH, modelName) + + // Check if already exists + checkCmd := fmt.Sprintf("[ -d '%s' ] && echo 'EXIST'", targetDir) + out, _ := exec(ctx, lc.CmpId, checkCmd, 10) + if strings.Contains(out, "EXIST") { + log.Infof("Model %s already exists at %s", modelName, targetDir) + return modelName, []string{targetDir}, nil + } + + // Try to use huggingface-cli + // Assuming container has internet access and tools + downloadCmd := fmt.Sprintf("mkdir -p %s && huggingface-cli download %s --local-dir %s --local-dir-use-symlinks False", targetDir, modelName, targetDir) + + // If huggingface-cli is missing, try installing it (if pip available) + // fallback to pip install + fullCmd := fmt.Sprintf("if ! command -v huggingface-cli &> /dev/null; then pip install -U huggingface_hub; fi; %s", downloadCmd) + + log.Infof("Downloading model %s with cmd: %s", modelName, fullCmd) + _, err = exec(ctx, lc.CmpId, fullCmd, 3600) // 1 hour timeout for large models + if err != nil { + return "", nil, errors.Wrapf(err, "failed to download model %s", modelName) + } + + return modelName, []string{targetDir}, nil +} + +// vllmResolveResult is the result of resolving model path and estimating vLLM memory params in the container. +type vllmResolveResult struct { + ModelPath string + GpuUtil string + MaxModelLen int + MaxNumSeqs int +} + +// resolveModelAndParams runs one exec in the container to resolve the model path and estimate +// --gpu-memory-utilization, --max-model-len, --max-num-seqs. Returns (nil, nil) when no model is found. +func (v *vllm) resolveModelAndParams(ctx context.Context, containerId string, preferredPath string, tensorParallelSize int) (*vllmResolveResult, error) { + preferredEscaped := escapeShellSingleQuoted(preferredPath) + escapedScript := escapeShellSingleQuoted(strings.TrimSpace(api.LLM_VLLM_ESTIMATE_PARAMS_SCRIPT)) + defaultGpuUtil := strconv.FormatFloat(float64(api.LLM_VLLM_DEFAULT_GPU_MEMORY_UTIL), 'f', -1, 64) + cmd := fmt.Sprintf( + `mkdir -p %s; + preferred='%s'; + if [ -n "$preferred" ] && [ -d "$preferred" ]; then model="$preferred"; else model=$(ls -d %s/* 2>/dev/null | head -n 1); fi; + if [ -z "$model" ]; then echo "NO_MODEL"; exit 0; fi; + tp=%d; + vllm_out=$(python3 -c '%s' "$model" "$tp" 2>/dev/null) || true; + GPU_MEMORY_UTIL=%s; MAX_MODEL_LEN=%d; MAX_NUM_SEQS=%d; + [ -n "$vllm_out" ] && eval "$vllm_out"; + printf '%%s\n' "$model"; + printf 'GPU_MEMORY_UTIL=%%s MAX_MODEL_LEN=%%s MAX_NUM_SEQS=%%s\n' "$GPU_MEMORY_UTIL" "$MAX_MODEL_LEN" "$MAX_NUM_SEQS"`, + api.LLM_VLLM_MODELS_PATH, + preferredEscaped, + api.LLM_VLLM_MODELS_PATH, + tensorParallelSize, + escapedScript, + defaultGpuUtil, + api.LLM_VLLM_DEFAULT_MAX_MODEL_LEN, + api.LLM_VLLM_DEFAULT_MAX_NUM_SEQS, + ) + out, err := exec(ctx, containerId, cmd, 30) + if err != nil { + return nil, errors.Wrapf(err, "exec resolve model and params") + } + out = strings.TrimSpace(out) + if out == "NO_MODEL" { + return nil, nil + } + lines := strings.SplitN(out, "\n", 2) + if len(lines) < 2 { + return nil, errors.Errorf("vLLM resolve output missing params line: %s", out) + } + res := &vllmResolveResult{ + ModelPath: strings.TrimSpace(lines[0]), + GpuUtil: defaultGpuUtil, + MaxModelLen: api.LLM_VLLM_DEFAULT_MAX_MODEL_LEN, + MaxNumSeqs: api.LLM_VLLM_DEFAULT_MAX_NUM_SEQS, + } + for _, f := range strings.Fields(lines[1]) { + if val, ok := strings.CutPrefix(f, api.LLM_VLLM_RESOLVE_OUTPUT_PREFIX_GPU_UTIL); ok { + res.GpuUtil = val + } else if val, ok := strings.CutPrefix(f, api.LLM_VLLM_RESOLVE_OUTPUT_PREFIX_MAX_LEN); ok { + if n, e := strconv.Atoi(val); e == nil && n > 0 { + res.MaxModelLen = n + } + } else if val, ok := strings.CutPrefix(f, api.LLM_VLLM_RESOLVE_OUTPUT_PREFIX_MAX_NUM_SEQ); ok { + if n, e := strconv.Atoi(val); e == nil && n > 0 { + res.MaxNumSeqs = n + } + } + } + return res, nil +} diff --git a/pkg/llm/models/llm.go b/pkg/llm/models/llm.go index 5c8d31d0a1..dcc4180075 100644 --- a/pkg/llm/models/llm.go +++ b/pkg/llm/models/llm.go @@ -92,6 +92,15 @@ func (man *SLLMManager) ValidateCreateData(ctx context.Context, userCred mcclien input.LLMSkuId = lSku.Id input.LLMImageId = lSku.GetLLMImageId() + if input.LLMSpec != nil { + drv := lSku.GetLLMContainerDriver() + spec, err := drv.ValidateLLMCreateSpec(ctx, userCred, lSku, input.LLMSpec) + if err != nil { + return input, errors.Wrap(err, "validate LLM create spec") + } + input.LLMSpec = spec + } + return input, nil } @@ -380,6 +389,29 @@ func (llm *SLLM) GetLLMSku(skuId string) (*SLLMSku, error) { return sku.(*SLLMSku), nil } +func (llm *SLLM) ValidateUpdateData(ctx context.Context, userCred mcclient.TokenCredential, query jsonutils.JSONObject, input api.LLMUpdateInput) (api.LLMUpdateInput, error) { + var err error + input.VirtualResourceBaseUpdateInput, err = llm.SLLMBase.SVirtualResourceBase.ValidateUpdateData(ctx, userCred, query, input.VirtualResourceBaseUpdateInput) + if err != nil { + return input, errors.Wrap(err, "validate VirtualResourceBaseUpdateInput") + } + + if input.LLMSpec == nil { + return input, nil + } + sku, err := llm.GetLLMSku(llm.LLMSkuId) + if err != nil { + return input, errors.Wrap(err, "fetch LLMSku") + } + drv := sku.GetLLMContainerDriver() + spec, err := drv.ValidateLLMUpdateSpec(ctx, userCred, llm, input.LLMSpec) + if err != nil { + return input, errors.Wrap(err, "validate LLM update spec") + } + input.LLMSpec = spec + return input, nil +} + func (llm *SLLM) GetLargeLanguageModelName(name string) (modelName string, modelTag string, err error) { if name == "" { return "", "", errors.Wrap(errors.ErrInvalidStatus, "model name is empty") diff --git a/pkg/llm/models/llm_base_pod.go b/pkg/llm/models/llm_base_pod.go index b316d91046..4063782bd8 100644 --- a/pkg/llm/models/llm_base_pod.go +++ b/pkg/llm/models/llm_base_pod.go @@ -43,7 +43,7 @@ func GetLLMBasePodCreateInput( } data.VcpuCount = skuBase.Cpu - data.VmemSize = skuBase.Memory + 1 + data.VmemSize = skuBase.Memory // data.Name = input.Name + "-" + seclib.RandomPassword(6) data.Name = input.Name diff --git a/pkg/llm/models/llm_container_driver.go b/pkg/llm/models/llm_container_driver.go index 37610832d4..e9d5a76b24 100644 --- a/pkg/llm/models/llm_container_driver.go +++ b/pkg/llm/models/llm_container_driver.go @@ -93,13 +93,16 @@ type ILLMContainerDriver interface { GetEffectiveSpec(llm *SLLM, sku *SLLMSku) interface{} // GetPrimaryImageId returns the primary image id for this SKU type (e.g. LLMImageId for ollama/vllm, DifyApiImageId for dify). GetPrimaryImageId(sku *SLLMSku) string - // ValidateCreateData validates create input and returns the LLMSpec to store. Called by SKU manager after base validation. - ValidateCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *llm.LLMSkuCreateInput) (*llm.LLMSkuCreateInput, error) - // ValidateUpdateData validates update input, merges with current spec, and returns the LLMSpec to store. Called by SKU when LLMSpec is not nil. - ValidateUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *SLLMSku, input *llm.LLMSkuUpdateInput) (*llm.LLMSkuUpdateInput, error) + // ValidateLLMSkuCreateData validates create input and returns the LLMSpec to store. Called by SKU manager after base validation. + ValidateLLMSkuCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *llm.LLMSkuCreateInput) (*llm.LLMSkuCreateInput, error) + // ValidateLLMSkuUpdateData validates update input, merges with current spec, and returns the LLMSpec to store. Called by SKU when LLMSpec is not nil. + ValidateLLMSkuUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *SLLMSku, input *llm.LLMSkuUpdateInput) (*llm.LLMSkuUpdateInput, error) MatchContainerToUpdate(ctr *computeapi.SContainer, podCtrs []*computeapi.PodContainerCreateInput) (*computeapi.PodContainerCreateInput, error) + ValidateLLMCreateSpec(ctx context.Context, userCred mcclient.TokenCredential, sku *SLLMSku, input *llm.LLMSpec) (*llm.LLMSpec, error) + ValidateLLMUpdateSpec(ctx context.Context, userCred mcclient.TokenCredential, llm *SLLM, input *llm.LLMSpec) (*llm.LLMSpec, error) + ILLMContainerMCPAgent } diff --git a/pkg/llm/models/llm_instant_model_sync.go b/pkg/llm/models/llm_instant_model_sync.go index c2743ba110..8ceb8bbca7 100644 --- a/pkg/llm/models/llm_instant_model_sync.go +++ b/pkg/llm/models/llm_instant_model_sync.go @@ -820,13 +820,31 @@ func (llm *SLLM) EnsureInstantModelsInstalled(ctx context.Context, userCred mccl if err != nil { return errors.Wrap(err, "FetchApps") } + if len(mdlIds) == 0 { + return nil + } + instModels := make(map[string]SInstantModel) + if err := db.FetchModelObjectsByIds(GetInstantModelManager(), "id", mdlIds, &instModels); err != nil { + return errors.Wrap(err, "FetchModelObjectsByIds") + } var errs []error for _, mdlId := range mdlIds { if _, ok := mdlMap[mdlId]; ok { - // probed - // errs = append(errs, errors.Wrap(errors.ErrInvalidStatus, pkg)) - } else { - // not mounted and not probed + continue + } + instModel, ok := instModels[mdlId] + if !ok { + errs = append(errs, errors.Wrapf(errors.ErrNotFound, "mdlId %s", mdlId)) + continue + } + found := false + for _, info := range mdlMap { + if info.ModelId == instModel.ModelId { + found = true + break + } + } + if !found { errs = append(errs, errors.Wrapf(errors.ErrNotFound, "mdlId %s", mdlId)) } } diff --git a/pkg/llm/models/llm_sku.go b/pkg/llm/models/llm_sku.go index 28d1caef0e..ac4ce40abe 100644 --- a/pkg/llm/models/llm_sku.go +++ b/pkg/llm/models/llm_sku.go @@ -201,7 +201,7 @@ func (man *SLLMSkuManager) ValidateCreateData(ctx context.Context, userCred mccl if err != nil { return input, errors.Wrap(err, "get container driver") } - input, err = drv.ValidateCreateData(ctx, userCred, input) + input, err = drv.ValidateLLMSkuCreateData(ctx, userCred, input) if err != nil { return input, errors.Wrap(err, "validate create input") } @@ -238,7 +238,7 @@ func (sku *SLLMSku) ValidateUpdateData(ctx context.Context, userCred mcclient.To return input, nil } drv := sku.GetLLMContainerDriver() - updateInput, err := drv.ValidateUpdateData(ctx, userCred, sku, &input) + updateInput, err := drv.ValidateLLMSkuUpdateData(ctx, userCred, sku, &input) if err != nil { return input, errors.Wrap(err, "validate update spec") } diff --git a/pkg/llm/tasks/llm/llm_create_task.go b/pkg/llm/tasks/llm/llm_create_task.go index 10a8fdf000..ebbe10414e 100644 --- a/pkg/llm/tasks/llm/llm_create_task.go +++ b/pkg/llm/tasks/llm/llm_create_task.go @@ -94,6 +94,7 @@ func (task *LLMCreateTask) OnLLMRefreshStatusComplete(ctx context.Context, llm * mountedModels, err := llm.FetchMountedModelFullName() if err != nil { task.taskFailed(ctx, llm, errors.Wrap(err, "FetchMountedModelFullName")) + return } // 创建磁盘 diff --git a/pkg/llm/tasks/llm/llm_start_task.go b/pkg/llm/tasks/llm/llm_start_task.go index a5c77d74ed..2c9b02baa7 100644 --- a/pkg/llm/tasks/llm/llm_start_task.go +++ b/pkg/llm/tasks/llm/llm_start_task.go @@ -59,5 +59,10 @@ func (t *LLMStartTask) OnStartedFailed(ctx context.Context, llm *models.SLLM, er } func (t *LLMStartTask) OnStarted(ctx context.Context, llm *models.SLLM, reason jsonutils.JSONObject) { + err := llm.GetLLMContainerDriver().StartLLM(ctx, t.GetUserCred(), llm) + if err != nil { + t.taskFailed(ctx, llm, err.Error()) + return + } t.taskComplete(ctx, llm) } diff --git a/pkg/mcclient/options/llm/image.go b/pkg/mcclient/options/llm/image.go index 31d0003a9e..1a128031be 100644 --- a/pkg/mcclient/options/llm/image.go +++ b/pkg/mcclient/options/llm/image.go @@ -18,7 +18,7 @@ func (o *LLMImageShowOptions) Params() (jsonutils.JSONObject, error) { type LLMImageListOptions struct { options.BaseListOptions - LLMType string `json:"llm_type" choices:"ollama|dify|comfyui" help:"filter by llm type"` + LLMType string `json:"llm_type" choices:"ollama|dify|comfyui|vllm" help:"filter by llm type"` } func (o *LLMImageListOptions) Params() (jsonutils.JSONObject, error) { @@ -30,7 +30,7 @@ type LLMImageCreateOptions struct { IMAGE_NAME string `json:"image_name"` IMAGE_LABEL string `json:"image_label"` CredentialId string `json:"credential_id"` - LLM_TYPE string `json:"llm_type" choices:"ollama|dify|comfyui" help:"llm type: ollama, comfyui or dify"` + LLM_TYPE string `json:"llm_type" choices:"ollama|dify|comfyui|vllm" help:"llm type: ollama, comfyui or dify"` } func (o *LLMImageCreateOptions) Params() (jsonutils.JSONObject, error) { @@ -44,7 +44,7 @@ type LLMImageUpdateOptions struct { ImageName string `json:"image_name"` ImageLabel string `json:"image_label"` CredentialId string `json:"credential_id"` - LlmType string `json:"llm_type" choices:"ollama|dify|vllm|comfyui" help:"llm type: ollama, comfyui or dify"` + LlmType string `json:"llm_type" choices:"ollama|dify|vllm|comfyui" help:"llm type: ollama, comfyui, vllm or dify"` } func (o *LLMImageUpdateOptions) GetId() string { diff --git a/pkg/mcclient/options/llm/llm.go b/pkg/mcclient/options/llm/llm.go index f9977fbf91..1cba8e0b8b 100644 --- a/pkg/mcclient/options/llm/llm.go +++ b/pkg/mcclient/options/llm/llm.go @@ -68,11 +68,12 @@ type LLMBaseCreateOptions struct { type LLMCreateOptions struct { LLMBaseCreateOptions - LLM_SKU_ID string `help:"llm sku id or name" json:"llm_sku_id"` + LLM_SKU_ID string `help:"llm sku id or name" json:"llm_sku_id"` + PreferredModel string `help:"vLLM preferred model dir name under models path (e.g. Qwen/Qwen2-7B)" json:"-"` } func (o *LLMCreateOptions) Params() (jsonutils.JSONObject, error) { - params := jsonutils.Marshal(o) + params := jsonutils.Marshal(o).(*jsonutils.JSONDict) if len(o.Net) > 0 { nets := make([]*computeapi.NetworkConfig, 0) @@ -83,7 +84,16 @@ func (o *LLMCreateOptions) Params() (jsonutils.JSONObject, error) { } nets = append(nets, net) } - params.(*jsonutils.JSONDict).Add(jsonutils.Marshal(nets), "nets") + params.Add(jsonutils.Marshal(nets), "nets") + } + + if o.PreferredModel != "" { + spec := &api.LLMSpec{ + Ollama: nil, + Vllm: &api.LLMSpecVllm{PreferredModel: o.PreferredModel}, + Dify: nil, + } + params.Set("llm_spec", jsonutils.Marshal(spec)) } return params, nil @@ -93,6 +103,32 @@ func (o *LLMCreateOptions) GetCountParam() int { return o.Count } +type LLMUpdateOptions struct { + options.BaseIdOptions + + PreferredModel string `help:"vLLM preferred model dir name under models path (e.g. Qwen/Qwen2-7B); takes effect after pod recreate" json:"-"` +} + +func (o *LLMUpdateOptions) GetId() string { + return o.ID +} + +func (o *LLMUpdateOptions) Params() (jsonutils.JSONObject, error) { + dict, err := options.StructToParams(o) + if err != nil { + return nil, err + } + if o.PreferredModel != "" { + spec := &api.LLMSpec{ + Ollama: nil, + Vllm: &api.LLMSpecVllm{PreferredModel: o.PreferredModel}, + Dify: nil, + } + dict.Set("llm_spec", jsonutils.Marshal(spec)) + } + return dict, nil +} + type LLMDeleteOptions struct { options.BaseIdOptions } diff --git a/scripts/sync_llm_images.sh b/scripts/sync_llm_images.sh index 6c47dec1e4..3981f5b233 100644 --- a/scripts/sync_llm_images.sh +++ b/scripts/sync_llm_images.sh @@ -8,7 +8,7 @@ set -euo pipefail if [ $# -ne 1 ]; then echo "用法: $0 " - echo "例如: $0 crpi-nf3abu98o8qf9y2x.cn-beijing.personal.cr.aliyuncs.com/eikoh" + echo "例如: $0 registry.cn-beijing.aliyuncs.com/cloudpods" exit 1 fi