feat(llm): init vllm as llm_container (#24338)

* feat(llm): init vllm as llm_container

* feat(vllm): fix save-instant-model for vllm

* feat(llm): support quick-model for vllm

* feat(vllm): support preferred-model in llm

* fix(llm): add ValidateSpec in llm_container_driver

* feat(vllm): auto-set vllm params
This commit is contained in:
cwz_eikoh
2026-03-13 11:07:02 +08:00
committed by GitHub
parent 1fe46558ce
commit d3ab4af466
17 changed files with 930 additions and 67 deletions

View File

@@ -11,6 +11,7 @@ func init() {
cmd.BatchCreate(new(options.LLMCreateOptions))
cmd.List(new(options.LLMListOptions))
cmd.Show(new(options.LLMShowOptions))
cmd.Update(new(options.LLMUpdateOptions))
cmd.Delete(new(options.LLMDeleteOptions))
// cmd.Perform("change-model", new(options.LLMChangeModelOptions))
cmd.Perform("syncstatus", new(options.LLMIdOptions))

View File

@@ -19,6 +19,7 @@ const (
var (
LLM_IMAGE_TYPES = sets.NewString(
string(LLM_IMAGE_TYPE_OLLAMA),
string(LLM_IMAGE_TYPE_VLLM),
string(LLM_IMAGE_TYPE_DIFY),
string(LLM_IMAGE_TYPE_COMFYUI),
string(LLM_IMAGE_TYPE_OPENCLAW),

View File

@@ -82,6 +82,14 @@ type LLMCreateInput struct {
LLMSpec *LLMSpec `json:"llm_spec,omitempty"`
}
// LLMUpdateInput is the request body for updating an LLM (including llm_spec overrides).
type LLMUpdateInput struct {
apis.VirtualResourceBaseUpdateInput
InstantModelQuotaGb *int `json:"instant_model_quota_gb,omitempty"`
LLMSpec *LLMSpec `json:"llm_spec,omitempty"`
}
type LLMBaseListInput struct {
apis.VirtualResourceListInput
apis.EnabledResourceBaseListInput

View File

@@ -0,0 +1,88 @@
package llm
import "time"
const (
LLM_VLLM = "vllm"
LLM_VLLM_DEFAULT_PORT = 8000
LLM_VLLM_EXEC_PATH = "python3 -m vllm.entrypoints.openai.api_server"
LLM_VLLM_HF_ENDPOINT = "https://hf-mirror.com"
// Directory constants
LLM_VLLM_CACHE_DIR = "/root/.cache/huggingface"
LLM_VLLM_BASE_PATH = "/data/models"
LLM_VLLM_MODELS_PATH = "/data/models/huggingface"
// Health check
LLM_VLLM_HEALTH_CHECK_TIMEOUT = 120 * time.Second // 2 minutes
LLM_VLLM_HEALTH_CHECK_INTERVAL = 10 * time.Second // 10 seconds
// Default vLLM memory params when Python estimation fails (conservative to avoid OOM)
LLM_VLLM_DEFAULT_GPU_MEMORY_UTIL = 0.9
LLM_VLLM_DEFAULT_MAX_MODEL_LEN = 2048
LLM_VLLM_DEFAULT_MAX_NUM_SEQS = 1
// Prefixes for parsing resolveModelAndParams output line (KEY=value)
LLM_VLLM_RESOLVE_OUTPUT_PREFIX_GPU_UTIL = "GPU_MEMORY_UTIL="
LLM_VLLM_RESOLVE_OUTPUT_PREFIX_MAX_LEN = "MAX_MODEL_LEN="
LLM_VLLM_RESOLVE_OUTPUT_PREFIX_MAX_NUM_SEQ = "MAX_NUM_SEQS="
)
const (
// vllmEstimateParamsScript is a Python script run inside the container to estimate
// --gpu-memory-utilization, --max-model-len, and --max-num-seqs from GPU memory and model config.
// Args: sys.argv[1]=model path, sys.argv[2]=tensor_parallel_size.
// Prints one line: GPU_MEMORY_UTIL=0.9 MAX_MODEL_LEN=2624 MAX_NUM_SEQS=1 (eval-safe).
LLM_VLLM_ESTIMATE_PARAMS_SCRIPT = `
import sys, json, os
model_path = sys.argv[1] if len(sys.argv) > 1 else ""
tp = int(sys.argv[2]) if len(sys.argv) > 2 else 1
if not model_path or not os.path.isdir(model_path):
sys.exit(1)
config_path = os.path.join(model_path, "config.json")
if not os.path.isfile(config_path):
sys.exit(1)
with open(config_path) as f:
config = json.load(f)
def get_nested(d, *keys):
for k in keys:
d = d.get(k) if isinstance(d, dict) else None
if d is None:
return None
return d
num_layers = config.get("num_hidden_layers") or config.get("n_layer") or get_nested(config, "text_config", "num_hidden_layers") or 0
num_heads = config.get("num_attention_heads") or config.get("n_head") or get_nested(config, "text_config", "num_attention_heads") or 0
num_kv_heads = config.get("num_key_value_heads") or get_nested(config, "text_config", "num_key_value_heads") or num_heads
hidden_size = config.get("hidden_size") or config.get("n_embd") or get_nested(config, "text_config", "hidden_size") or 0
head_dim = hidden_size // num_heads if num_heads else (hidden_size // 64)
max_pos = config.get("max_position_embeddings") or config.get("n_positions") or get_nested(config, "text_config", "max_position_embeddings") or 4096
if not num_layers or not num_kv_heads or not head_dim:
sys.exit(1)
try:
import torch
total_mem = sum(torch.cuda.get_device_properties(i).total_memory for i in range(torch.cuda.device_count()))
except Exception:
sys.exit(1)
gpu_util = 0.9
activation_overhead = 2 * (1024**3)
num_params = config.get("num_parameters") or config.get("num_params") or get_nested(config, "text_config", "num_parameters")
if num_params is not None:
model_bytes = num_params * 2
else:
model_bytes = 12 * (1024**3)
available_kv = total_mem * gpu_util - model_bytes - activation_overhead
if available_kv <= 0:
available_kv = total_mem * 0.5
kv_per_token = num_layers * 2 * num_kv_heads * head_dim * 2
max_num_seqs = 4
max_model_len = int(available_kv / (kv_per_token * max_num_seqs))
max_model_len = max(1, min(max_model_len, max_pos))
if max_model_len < 256:
max_num_seqs = 1
max_model_len = int(available_kv / (kv_per_token * max_num_seqs))
max_model_len = max(1, min(max_model_len, max_pos))
print("GPU_MEMORY_UTIL=%s MAX_MODEL_LEN=%d MAX_NUM_SEQS=%d" % (gpu_util, max_model_len, max_num_seqs))
`
)

View File

@@ -41,7 +41,7 @@ func (b *baseDriver) StartLLM(ctx context.Context, userCred mcclient.TokenCreden
return nil
}
func (b *baseDriver) ValidateCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *api.LLMSkuCreateInput) (*api.LLMSkuCreateInput, error) {
func (b *baseDriver) ValidateLLMSkuCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *api.LLMSkuCreateInput) (*api.LLMSkuCreateInput, error) {
imgObj, err := validators.ValidateModel(ctx, userCred, models.GetLLMImageManager(), &input.LLMImageId)
if err != nil {
return nil, errors.Wrapf(err, "validate image_id %s", input.LLMImageId)
@@ -67,7 +67,7 @@ func (b *baseDriver) ValidateCreateData(ctx context.Context, userCred mcclient.T
return input, nil
}
func (b *baseDriver) ValidateUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSkuUpdateInput) (*api.LLMSkuUpdateInput, error) {
func (b *baseDriver) ValidateLLMSkuUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSkuUpdateInput) (*api.LLMSkuUpdateInput, error) {
llmImageId := input.LLMImageId
if llmImageId != "" {
imgObj, err := validators.ValidateModel(ctx, userCred, models.GetLLMImageManager(), &llmImageId)
@@ -111,5 +111,16 @@ func MatchContainerToUpdateByName(ctr *computeapi.SContainer, podCtrs []*compute
}
func (b *baseDriver) MatchContainerToUpdate(ctr *computeapi.SContainer, podCtrs []*computeapi.PodContainerCreateInput) (*computeapi.PodContainerCreateInput, error) {
if len(podCtrs) == 1 {
return podCtrs[0], nil
}
return MatchContainerToUpdateByName(ctr, podCtrs)
}
func (b *baseDriver) ValidateLLMCreateSpec(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSpec) (*api.LLMSpec, error) {
return input, nil
}
func (b *baseDriver) ValidateLLMUpdateSpec(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, input *api.LLMSpec) (*api.LLMSpec, error) {
return input, nil
}

View File

@@ -37,6 +37,36 @@ func (d *dify) GetSpec(sku *models.SLLMSku) interface{} {
return sku.LLMSpec.Dify
}
// mergeDifySpecInto merges src into dst. If fillEmpty is true, only fills dst when dst is empty; otherwise overwrites dst when src is non-empty.
func mergeDifySpecInto(dst, src *api.LLMSpecDify, fillEmpty bool) {
if dst == nil || src == nil {
return
}
mergeStr := func(dstPtr *string, srcVal string) {
if fillEmpty {
if *dstPtr == "" && srcVal != "" {
*dstPtr = srcVal
}
} else {
if srcVal != "" {
*dstPtr = srcVal
}
}
}
mergeStr(&dst.PostgresImageId, src.PostgresImageId)
mergeStr(&dst.RedisImageId, src.RedisImageId)
mergeStr(&dst.NginxImageId, src.NginxImageId)
mergeStr(&dst.DifyApiImageId, src.DifyApiImageId)
mergeStr(&dst.DifyPluginImageId, src.DifyPluginImageId)
mergeStr(&dst.DifyWebImageId, src.DifyWebImageId)
mergeStr(&dst.DifySandboxImageId, src.DifySandboxImageId)
mergeStr(&dst.DifySSRFImageId, src.DifySSRFImageId)
mergeStr(&dst.DifyWeaviateImageId, src.DifyWeaviateImageId)
if (fillEmpty && len(dst.CustomizedEnvs) == 0 && len(src.CustomizedEnvs) > 0) || (!fillEmpty && len(src.CustomizedEnvs) > 0) {
dst.CustomizedEnvs = src.CustomizedEnvs
}
}
// mergeDify merges llm and sku Dify specs; llm takes priority, use sku when llm is nil or zero.
func mergeDify(llm, sku *api.LLMSpecDify) *api.LLMSpecDify {
if llm != nil && !llm.IsZero() {
@@ -88,14 +118,60 @@ func (d *dify) GetPrimaryContainer(ctx context.Context, llm *models.SLLM, contai
return nil, errors.Error("api container not found")
}
func (d *dify) ValidateCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *api.LLMSkuCreateInput) (*api.LLMSkuCreateInput, error) {
func (d *dify) ValidateLLMSkuCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *api.LLMSkuCreateInput) (*api.LLMSkuCreateInput, error) {
if input.LLMSpec == nil || input.LLMSpec.Dify == nil {
return nil, errors.Wrap(httperrors.ErrInputParameter, "dify SKU requires llm_spec with type dify and image ids")
}
if input.MountedModels != nil {
return nil, errors.Wrap(httperrors.ErrInputParameter, "dify SKU does not support mounted models")
}
difySpec := input.LLMSpec.Dify
// Reuse ValidateLLMCreateSpec to normalize/validate LLMSpec.
spec, err := d.ValidateLLMCreateSpec(ctx, userCred, nil, input.LLMSpec)
if err != nil {
return nil, err
}
input.LLMSpec = spec
if input.LLMSpec != nil && input.LLMSpec.Dify != nil {
input.LLMImageId = input.LLMSpec.Dify.DifyApiImageId
}
return input, nil
}
func (d *dify) ValidateLLMSkuUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSkuUpdateInput) (*api.LLMSkuUpdateInput, error) {
if input.MountedModels != nil {
return nil, errors.Wrap(httperrors.ErrInputParameter, "dify SKU does not support mounted models")
}
if input.LLMSpec == nil {
return input, nil
}
// Reuse ValidateLLMUpdateSpec by treating current SKU spec as the \"current llm spec\".
fakeLLM := &models.SLLM{LLMSpec: sku.LLMSpec}
spec, err := d.ValidateLLMUpdateSpec(ctx, userCred, fakeLLM, input.LLMSpec)
if err != nil {
return nil, err
}
input.LLMSpec = spec
// if dify_api_image_id is set, use it as the primary image id
if input.LLMSpec != nil && input.LLMSpec.Dify != nil && input.LLMSpec.Dify.DifyApiImageId != "" {
input.LLMImageId = input.LLMSpec.Dify.DifyApiImageId
}
return input, nil
}
// ValidateLLMCreateSpec implements ILLMContainerDriver. Validates image ids and merges empty fields from SKU spec.
func (d *dify) ValidateLLMCreateSpec(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSpec) (*api.LLMSpec, error) {
if input == nil || input.Dify == nil {
return input, nil
}
difySpec := input.Dify
// Merge empty fields from SKU so the stored spec is complete
if sku != nil && sku.LLMSpec != nil && sku.LLMSpec.Dify != nil {
mergeDifySpecInto(difySpec, sku.LLMSpec.Dify, true)
}
// Validate non-empty image ids
for _, imgId := range []*string{&difySpec.PostgresImageId, &difySpec.RedisImageId, &difySpec.NginxImageId, &difySpec.DifyApiImageId, &difySpec.DifyPluginImageId, &difySpec.DifyWebImageId, &difySpec.DifySandboxImageId, &difySpec.DifySSRFImageId, &difySpec.DifyWeaviateImageId} {
if *imgId == "" {
continue
@@ -105,63 +181,68 @@ func (d *dify) ValidateCreateData(ctx context.Context, userCred mcclient.TokenCr
return nil, errors.Wrapf(err, "validate image_id %s", *imgId)
}
img := imgObj.(*models.SLLMImage)
if img.LLMType != input.LLMType {
return nil, errors.Wrapf(httperrors.ErrInvalidStatus, "image %s is not of type %s", *imgId, input.LLMType)
if img.LLMType != string(api.LLM_CONTAINER_DIFY) {
return nil, errors.Wrapf(httperrors.ErrInvalidStatus, "image %s is not of type dify", *imgId)
}
*imgId = img.Id
}
input.LLMImageId = difySpec.DifyApiImageId
return input, nil
return &api.LLMSpec{Dify: difySpec}, nil
}
func (d *dify) ValidateUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSkuUpdateInput) (*api.LLMSkuUpdateInput, error) {
if input.MountedModels != nil {
return nil, errors.Wrap(httperrors.ErrInputParameter, "dify SKU does not support mounted models")
// ValidateLLMUpdateSpec implements ILLMContainerDriver. Merges input with current LLM spec (non-empty overwrites); validates image ids.
func (d *dify) ValidateLLMUpdateSpec(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, input *api.LLMSpec) (*api.LLMSpec, error) {
if input == nil || input.Dify == nil {
return input, nil
}
if input.LLMSpec == nil || input.LLMSpec.Dify == nil {
return nil, nil
}
currentSpec := d.GetSpec(sku)
if currentSpec == nil {
return nil, nil
}
updated := *currentSpec.(*api.LLMSpecDify)
difySpec := input.LLMSpec.Dify
mergeStr := func(dst *string, src string) {
if src != "" {
*dst = src
// Start from current LLM spec, or SKU spec as base
var base *api.LLMSpecDify
if llm != nil && llm.LLMSpec != nil && llm.LLMSpec.Dify != nil {
b := *llm.LLMSpec.Dify
base = &b
if llm.LLMSpec.Dify.CustomizedEnvs != nil {
base.CustomizedEnvs = make([]*api.DifyCustomizedEnv, len(llm.LLMSpec.Dify.CustomizedEnvs))
copy(base.CustomizedEnvs, llm.LLMSpec.Dify.CustomizedEnvs)
}
} else if llm != nil {
sku, err := llm.GetLLMSku(llm.LLMSkuId)
if err == nil && sku != nil && sku.LLMSpec != nil && sku.LLMSpec.Dify != nil {
b := *sku.LLMSpec.Dify
base = &b
if sku.LLMSpec.Dify.CustomizedEnvs != nil {
base.CustomizedEnvs = make([]*api.DifyCustomizedEnv, len(sku.LLMSpec.Dify.CustomizedEnvs))
copy(base.CustomizedEnvs, sku.LLMSpec.Dify.CustomizedEnvs)
}
}
}
mergeStr(&updated.PostgresImageId, difySpec.PostgresImageId)
mergeStr(&updated.RedisImageId, difySpec.RedisImageId)
mergeStr(&updated.NginxImageId, difySpec.NginxImageId)
mergeStr(&updated.DifyApiImageId, difySpec.DifyApiImageId)
mergeStr(&updated.DifyPluginImageId, difySpec.DifyPluginImageId)
mergeStr(&updated.DifyWebImageId, difySpec.DifyWebImageId)
mergeStr(&updated.DifySandboxImageId, difySpec.DifySandboxImageId)
mergeStr(&updated.DifySSRFImageId, difySpec.DifySSRFImageId)
mergeStr(&updated.DifyWeaviateImageId, difySpec.DifyWeaviateImageId)
if len(difySpec.CustomizedEnvs) > 0 {
updated.CustomizedEnvs = difySpec.CustomizedEnvs
if base == nil {
base = &api.LLMSpecDify{}
}
for _, imgId := range []*string{&updated.PostgresImageId, &updated.RedisImageId, &updated.NginxImageId, &updated.DifyApiImageId, &updated.DifyPluginImageId, &updated.DifyWebImageId, &updated.DifySandboxImageId, &updated.DifySSRFImageId, &updated.DifyWeaviateImageId} {
if *imgId != "" {
imgObj, err := validators.ValidateModel(ctx, userCred, models.GetLLMImageManager(), imgId)
if err != nil {
return nil, errors.Wrapf(err, "validate image_id %s", *imgId)
}
img := imgObj.(*models.SLLMImage)
if img.LLMType != sku.LLMType {
return nil, errors.Wrapf(httperrors.ErrInvalidStatus, "image %s is not of type %s", *imgId, sku.LLMType)
}
*imgId = img.GetId()
mergeDifySpecInto(base, input.Dify, false)
// Validate non-empty image ids
for _, imgId := range []*string{&base.PostgresImageId, &base.RedisImageId, &base.NginxImageId, &base.DifyApiImageId, &base.DifyPluginImageId, &base.DifyWebImageId, &base.DifySandboxImageId, &base.DifySSRFImageId, &base.DifyWeaviateImageId} {
if *imgId == "" {
continue
}
imgObj, err := validators.ValidateModel(ctx, userCred, models.GetLLMImageManager(), imgId)
if err != nil {
return nil, errors.Wrapf(err, "validate image_id %s", *imgId)
}
img := imgObj.(*models.SLLMImage)
if img.LLMType != string(api.LLM_CONTAINER_DIFY) {
return nil, errors.Wrapf(httperrors.ErrInvalidStatus, "image %s is not of type dify", *imgId)
}
*imgId = img.Id
}
// if dify_api_image_id is set, use it as the primary image id
if input.LLMSpec.Dify.DifyApiImageId != "" {
input.LLMImageId = input.LLMSpec.Dify.DifyApiImageId
return &api.LLMSpec{Dify: base}, nil
}
// GetContainerSpec is required by ILLMContainerDriver but not used for Dify; pod creation uses GetContainerSpecs. Return the first container so the interface is satisfied.
func (d *dify) GetContainerSpec(ctx context.Context, llm *models.SLLM, image *models.SLLMImage, sku *models.SLLMSku, props []string, devices []computeapi.SIsolatedDevice, diskId string) *computeapi.PodContainerCreateInput {
specs := d.GetContainerSpecs(ctx, llm, image, sku, props, devices, diskId)
if len(specs) == 0 {
return nil
}
return input, nil
return specs[0]
}
// GetContainerSpecs returns all Dify pod containers (postgres, redis, api, worker, nginx, etc.). Uses effective spec (llm + sku merged by driver).

View File

@@ -0,0 +1,578 @@
package llm_container
import (
"context"
"fmt"
"net/http"
"path"
"strconv"
"strings"
"time"
"yunion.io/x/log"
"yunion.io/x/pkg/errors"
commonapi "yunion.io/x/onecloud/pkg/apis"
computeapi "yunion.io/x/onecloud/pkg/apis/compute"
api "yunion.io/x/onecloud/pkg/apis/llm"
"yunion.io/x/onecloud/pkg/llm/models"
"yunion.io/x/onecloud/pkg/mcclient"
)
func init() {
models.RegisterLLMContainerDriver(newVLLM())
}
type vllm struct {
baseDriver
}
func newVLLM() models.ILLMContainerDriver {
return &vllm{baseDriver: newBaseDriver(api.LLM_CONTAINER_VLLM)}
}
// escapeShellSingleQuoted escapes s for use inside a single-quoted shell string (each ' becomes '\”).
func escapeShellSingleQuoted(s string) string {
return strings.ReplaceAll(s, "'", "'\\''")
}
func (v *vllm) GetSpec(sku *models.SLLMSku) interface{} {
if sku == nil || sku.LLMType != string(api.LLM_CONTAINER_VLLM) || sku.LLMSpec == nil || sku.LLMSpec.Vllm == nil {
return nil
}
return sku.LLMSpec.Vllm
}
func (v *vllm) GetEffectiveSpec(llm *models.SLLM, sku *models.SLLMSku) interface{} {
var skuSpec *api.LLMSpecVllm
if s := v.GetSpec(sku); s != nil {
skuSpec = s.(*api.LLMSpecVllm)
}
if llm != nil && llm.LLMSpec != nil && llm.LLMSpec.Vllm != nil {
if llm.LLMSpec.Vllm.PreferredModel != "" {
out := *llm.LLMSpec.Vllm
return &out
}
// llm explicitly present but empty -> fall back to sku default
}
if skuSpec != nil {
out := *skuSpec
return &out
}
return nil
}
func (v *vllm) ValidateLLMSkuCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *api.LLMSkuCreateInput) (*api.LLMSkuCreateInput, error) {
input, err := v.baseDriver.ValidateLLMSkuCreateData(ctx, userCred, input)
if err != nil {
return nil, err
}
// Reuse ValidateLLMCreateSpec; ensure llm_spec.vllm always exists for vLLM SKU.
spec, err := v.ValidateLLMCreateSpec(ctx, userCred, nil, input.LLMSpec)
if err != nil {
return nil, err
}
if spec == nil {
spec = &api.LLMSpec{Vllm: &api.LLMSpecVllm{}}
} else if spec.Vllm == nil {
spec.Vllm = &api.LLMSpecVllm{}
}
input.LLMSpec = spec
return input, nil
}
func (v *vllm) ValidateLLMSkuUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSkuUpdateInput) (*api.LLMSkuUpdateInput, error) {
input, err := v.baseDriver.ValidateLLMSkuUpdateData(ctx, userCred, sku, input)
if err != nil {
return nil, err
}
if input.LLMSpec == nil {
return input, nil
}
// Reuse ValidateLLMUpdateSpec by treating current SKU spec as the "current llm spec".
fakeLLM := &models.SLLM{LLMSpec: sku.LLMSpec}
spec, err := v.ValidateLLMUpdateSpec(ctx, userCred, fakeLLM, input.LLMSpec)
if err != nil {
return nil, err
}
input.LLMSpec = spec
if input.LLMSpec != nil && input.LLMSpec.Vllm == nil {
input.LLMSpec.Vllm = &api.LLMSpecVllm{}
}
return input, nil
}
// ValidateLLMCreateSpec implements ILLMContainerDriver. Merges preferred_model from SKU when input's is empty.
func (v *vllm) ValidateLLMCreateSpec(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSpec) (*api.LLMSpec, error) {
if input == nil {
return nil, nil
}
preferred := ""
if input.Vllm != nil {
preferred = input.Vllm.PreferredModel
}
if preferred == "" && sku != nil && sku.LLMSpec != nil && sku.LLMSpec.Vllm != nil {
preferred = sku.LLMSpec.Vllm.PreferredModel
}
return &api.LLMSpec{Vllm: &api.LLMSpecVllm{PreferredModel: preferred}}, nil
}
// ValidateLLMUpdateSpec implements ILLMContainerDriver. Merges preferred_model with current LLM spec; only overwrite when non-empty.
func (v *vllm) ValidateLLMUpdateSpec(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, input *api.LLMSpec) (*api.LLMSpec, error) {
if input == nil || input.Vllm == nil {
return input, nil
}
current := ""
if llm != nil && llm.LLMSpec != nil && llm.LLMSpec.Vllm != nil {
current = llm.LLMSpec.Vllm.PreferredModel
}
preferred := input.Vllm.PreferredModel
if preferred == "" {
preferred = current
}
return &api.LLMSpec{Vllm: &api.LLMSpecVllm{PreferredModel: preferred}}, nil
}
func (v *vllm) GetContainerSpec(ctx context.Context, llm *models.SLLM, image *models.SLLMImage, sku *models.SLLMSku, props []string, devices []computeapi.SIsolatedDevice, diskId string) *computeapi.PodContainerCreateInput {
// Container entrypoint only keeps the container alive; vLLM is started by StartLLM via exec.
startScript := `mkdir -p ` + api.LLM_VLLM_MODELS_PATH + ` && exec sleep infinity`
envs := []*commonapi.ContainerKeyValue{
{
Key: "HUGGING_FACE_HUB_CACHE",
Value: api.LLM_VLLM_CACHE_DIR,
},
{
Key: "HF_ENDPOINT",
Value: api.LLM_VLLM_HF_ENDPOINT,
},
// // Fix Error 803
// {
// Key: "LD_LIBRARY_PATH",
// Value: "/lib64:/usr/local/cuda/lib64:/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}",
// },
// // Fix Error 803
// {
// Key: "LD_PRELOAD",
// Value: "/lib/libcuda.so.1 /lib/libnvidia-ptxjitcompiler.so.1 /lib/libnvidia-gpucomp.so",
// },
}
spec := computeapi.ContainerSpec{
ContainerSpec: commonapi.ContainerSpec{
Image: image.ToContainerImage(),
ImageCredentialId: image.CredentialId,
Command: []string{"/bin/sh", "-c"},
Args: []string{startScript},
EnableLxcfs: true,
AlwaysRestart: true,
Envs: envs,
},
}
// GPU Devices
if len(devices) == 0 && (sku.Devices != nil && len(*sku.Devices) > 0) {
for i := range *sku.Devices {
index := i
spec.Devices = append(spec.Devices, &computeapi.ContainerDevice{
Type: commonapi.CONTAINER_DEVICE_TYPE_ISOLATED_DEVICE,
IsolatedDevice: &computeapi.ContainerIsolatedDevice{
Index: &index,
},
})
}
} else if len(devices) > 0 {
for i := range devices {
spec.Devices = append(spec.Devices, &computeapi.ContainerDevice{
Type: commonapi.CONTAINER_DEVICE_TYPE_ISOLATED_DEVICE,
IsolatedDevice: &computeapi.ContainerIsolatedDevice{
Id: devices[i].Id,
},
})
}
}
// Volume Mounts
diskIndex := 0
postOverlays, err := llm.GetMountedModelsPostOverlay()
if err != nil {
log.Errorf("GetMountedModelsPostOverlay failed %s", err)
}
ctrVols := []*commonapi.ContainerVolumeMount{
{
Disk: &commonapi.ContainerVolumeMountDisk{
SubDirectory: api.LLM_VLLM,
Index: &diskIndex,
PostOverlay: postOverlays,
},
Type: commonapi.CONTAINER_VOLUME_MOUNT_TYPE_DISK,
MountPath: api.LLM_VLLM_BASE_PATH,
ReadOnly: false,
Propagation: commonapi.MOUNTPROPAGATION_PROPAGATION_HOST_TO_CONTAINER,
},
{
// Mount cache dir to save HF cache
Disk: &commonapi.ContainerVolumeMountDisk{
SubDirectory: "cache",
Index: &diskIndex,
},
Type: commonapi.CONTAINER_VOLUME_MOUNT_TYPE_DISK,
MountPath: "/root/.cache",
ReadOnly: false,
},
}
spec.VolumeMounts = append(spec.VolumeMounts, ctrVols...)
return &computeapi.PodContainerCreateInput{
ContainerSpec: spec,
}
}
func (v *vllm) GetContainerSpecs(ctx context.Context, llm *models.SLLM, image *models.SLLMImage, sku *models.SLLMSku, props []string, devices []computeapi.SIsolatedDevice, diskId string) []*computeapi.PodContainerCreateInput {
return []*computeapi.PodContainerCreateInput{
v.GetContainerSpec(ctx, llm, image, sku, props, devices, diskId),
}
}
func (v *vllm) GetLLMUrl(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM) (string, error) {
// Similar logic to Ollama to determine URL
server, err := llm.GetServer(ctx)
if err != nil {
return "", errors.Wrap(err, "get server")
}
networkType := llm.NetworkType
if networkType == string(computeapi.NETWORK_TYPE_GUEST) {
if len(llm.LLMIp) == 0 {
return "", errors.Error("LLM IP is empty for guest network")
}
return fmt.Sprintf("http://%s:%d", llm.LLMIp, api.LLM_VLLM_DEFAULT_PORT), nil
} else {
// hostlocal
if len(server.HostAccessIp) == 0 {
return "", errors.Error("host access IP is empty")
}
// Assuming we might map ports or just use the default if host networking isn't strictly port-mapped per instance
// For simplicity, returning default port on host IP, assuming bridge/direct access or specific port mapping logic exists elsewhere.
// NOTE: In ollama.go, it queries AccessInfo. Here we simplify.
return fmt.Sprintf("http://%s:%d", server.HostAccessIp, api.LLM_VLLM_DEFAULT_PORT), nil
}
}
// StartLLM starts the vLLM server inside the container via exec, then waits for the health endpoint to be ready.
func (v *vllm) StartLLM(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM) error {
lc, err := llm.GetLLMContainer()
if err != nil {
return errors.Wrap(err, "get llm container")
}
sku, err := llm.GetLLMSku(llm.LLMSkuId)
if err != nil {
return errors.Wrap(err, "get llm sku")
}
tensorParallelSize := 1
if sku.Devices != nil && len(*sku.Devices) > 0 {
tensorParallelSize = len(*sku.Devices)
}
swapSpaceGiB := (sku.Memory * 1) / (2 * 1024)
if swapSpaceGiB < 1 {
swapSpaceGiB = 1
}
preferredPath := ""
if eff := v.GetEffectiveSpec(llm, sku); eff != nil {
if preferred := eff.(*api.LLMSpecVllm).PreferredModel; preferred != "" {
preferredPath = path.Join(api.LLM_VLLM_MODELS_PATH, preferred)
}
}
resolved, err := v.resolveModelAndParams(ctx, lc.CmpId, preferredPath, tensorParallelSize)
if err != nil {
return err
}
if resolved == nil {
return nil // no model
}
modelEscaped := escapeShellSingleQuoted(resolved.ModelPath)
startCmd := fmt.Sprintf(
`nohup %s --model '%s' --served-model-name "$(basename '%s')" --port %d \
--tensor-parallel-size %d --swap-space %d --enable-prefix-caching \
--gpu-memory-utilization %s --max-model-len %d --max-num-seqs %d \
> /tmp/vllm.log 2>&1 &`,
api.LLM_VLLM_EXEC_PATH,
modelEscaped,
modelEscaped,
api.LLM_VLLM_DEFAULT_PORT,
tensorParallelSize,
swapSpaceGiB,
resolved.GpuUtil,
resolved.MaxModelLen,
resolved.MaxNumSeqs,
)
_, err = exec(ctx, lc.CmpId, startCmd, 30)
if err != nil {
log.Errorf("vLLM start failed, exec command: %s", startCmd)
return errors.Wrapf(err, "exec start vLLM, command: %s", startCmd)
}
cmd := startCmd
// Wait for health endpoint
baseURL, err := v.GetLLMUrl(ctx, userCred, llm)
if err != nil {
return errors.Wrap(err, "get llm url for health check")
}
healthURL := strings.TrimSuffix(baseURL, "/") + "/health"
deadline := time.Now().Add(api.LLM_VLLM_HEALTH_CHECK_TIMEOUT)
for time.Now().Before(deadline) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, healthURL, nil)
if err != nil {
return errors.Wrap(err, "new health check request")
}
resp, err := http.DefaultClient.Do(req)
if err == nil {
resp.Body.Close()
if resp.StatusCode == http.StatusOK {
return nil
}
}
select {
case <-ctx.Done():
return errors.Wrap(ctx.Err(), "context done while waiting for vLLM")
case <-time.After(api.LLM_VLLM_HEALTH_CHECK_INTERVAL):
// continue
}
}
// Optionally read last lines of /tmp/vllm.log for better error message
logTail, _ := exec(ctx, lc.CmpId, "tail -n 20 /tmp/vllm.log 2>/dev/null || true", 5)
if logTail != "" {
return errors.Errorf("vLLM health check timeout after %v, exec command: %s, last log: %s", api.LLM_VLLM_HEALTH_CHECK_TIMEOUT, cmd, strings.TrimSpace(logTail))
}
return errors.Errorf("vLLM health check timeout after %v, exec command: %s", api.LLM_VLLM_HEALTH_CHECK_TIMEOUT, cmd)
}
// ILLMContainerInstantApp implementation
func (v *vllm) GetProbedInstantModelsExt(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, mdlIds ...string) (map[string]api.LLMInternalInstantMdlInfo, error) {
lc, err := llm.GetLLMContainer()
if err != nil {
return nil, errors.Wrap(err, "get llm container")
}
// List directories in models path
cmd := fmt.Sprintf("du -sk %s/*/", api.LLM_VLLM_MODELS_PATH)
output, err := exec(ctx, lc.CmpId, cmd, 10)
if err != nil {
// If ls fails, maybe no directory yet, return empty
return make(map[string]api.LLMInternalInstantMdlInfo), nil
}
modelsMap := make(map[string]api.LLMInternalInstantMdlInfo)
lines := strings.Split(strings.TrimSpace(output), "\n")
for _, line := range lines {
fields := strings.Fields(line)
if len(fields) < 2 {
continue
}
// Size is in KB
sizeKB, _ := strconv.ParseInt(fields[0], 10, 64)
fullPath := fields[1]
name := path.Base(fullPath)
if name == "" {
continue
}
// We treat the directory name as the model name
// For vLLM, name usually implies "organization/model" if downloaded from HF, but here we just list local dirs
modelsMap[name] = api.LLMInternalInstantMdlInfo{
Name: name,
ModelId: name,
Tag: "latest",
Size: sizeKB * 1024,
}
}
return modelsMap, nil
}
func (v *vllm) DetectModelPaths(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, pkgInfo api.LLMInternalInstantMdlInfo) ([]string, error) {
lc, err := llm.GetLLMContainer()
if err != nil {
return nil, errors.Wrap(err, "get llm container")
}
modelPath := path.Join(api.LLM_VLLM_MODELS_PATH, pkgInfo.Name)
checkCmd := fmt.Sprintf("[ -d '%s' ] && echo 'EXIST' || echo 'MISSING'", modelPath)
output, err := exec(ctx, lc.CmpId, checkCmd, 10)
if err != nil {
return nil, errors.Wrap(err, "failed to check file existence")
}
if !strings.Contains(output, "EXIST") {
return nil, errors.Errorf("model directory %s missing", modelPath)
}
return []string{modelPath}, nil
}
func (v *vllm) GetImageInternalPathMounts(sApp *models.SInstantModel) map[string]string {
// Map host paths to container paths
// For vLLM simple volume mount, this might be 1:1 or based on the base path
res := make(map[string]string)
for _, mount := range sApp.Mounts {
relPath := strings.TrimPrefix(mount, api.LLM_VLLM_BASE_PATH)
res[relPath] = path.Join(api.LLM_VLLM, relPath)
}
return res
}
func (v *vllm) GetSaveDirectories(sApp *models.SInstantModel) (string, []string, error) {
var filteredMounts []string
for _, mount := range sApp.Mounts {
if strings.HasPrefix(mount, api.LLM_VLLM_BASE_PATH) {
relPath := strings.TrimPrefix(mount, api.LLM_VLLM_BASE_PATH)
filteredMounts = append(filteredMounts, relPath)
}
}
return "", filteredMounts, nil
}
func (v *vllm) ValidateMounts(mounts []string, mdlName string, mdlTag string) ([]string, error) {
return mounts, nil
}
func (v *vllm) CheckDuplicateMounts(errStr string, dupIndex int) string {
return "Duplicate mounts detected"
}
func (v *vllm) GetInstantModelIdByPostOverlay(postOverlay *commonapi.ContainerVolumeMountDiskPostOverlay, mdlNameToId map[string]string) string {
return ""
}
func (v *vllm) GetDirPostOverlay(dir api.LLMMountDirInfo) *commonapi.ContainerVolumeMountDiskPostOverlay {
uid := int64(1000)
gid := int64(1000)
ov := dir.ToOverlay()
ov.FsUser = &uid
ov.FsGroup = &gid
return &ov
}
func (v *vllm) PreInstallModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, instMdl *models.SLLMInstantModel) error {
lc, err := llm.GetLLMContainer()
if err != nil {
return errors.Wrap(err, "get llm container")
}
// Create base directory
cmd := fmt.Sprintf("mkdir -p %s", api.LLM_VLLM_MODELS_PATH)
_, err = exec(ctx, lc.CmpId, cmd, 10)
return err
}
func (v *vllm) InstallModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, dirs []string, mdlIds []string) error {
return nil
}
func (v *vllm) UninstallModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, instMdl *models.SLLMInstantModel) error {
// Optionally remove the model directory
// For safety, we might just log or leave it
return nil
}
func (v *vllm) DownloadModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, tmpDir string, modelName string, modelTag string) (string, []string, error) {
lc, err := llm.GetLLMContainer()
if err != nil {
return "", nil, errors.Wrap(err, "get llm container")
}
// Logic to download model inside the container
// modelName is expected to be like "facebook/opt-125m"
targetDir := path.Join(api.LLM_VLLM_MODELS_PATH, modelName)
// Check if already exists
checkCmd := fmt.Sprintf("[ -d '%s' ] && echo 'EXIST'", targetDir)
out, _ := exec(ctx, lc.CmpId, checkCmd, 10)
if strings.Contains(out, "EXIST") {
log.Infof("Model %s already exists at %s", modelName, targetDir)
return modelName, []string{targetDir}, nil
}
// Try to use huggingface-cli
// Assuming container has internet access and tools
downloadCmd := fmt.Sprintf("mkdir -p %s && huggingface-cli download %s --local-dir %s --local-dir-use-symlinks False", targetDir, modelName, targetDir)
// If huggingface-cli is missing, try installing it (if pip available)
// fallback to pip install
fullCmd := fmt.Sprintf("if ! command -v huggingface-cli &> /dev/null; then pip install -U huggingface_hub; fi; %s", downloadCmd)
log.Infof("Downloading model %s with cmd: %s", modelName, fullCmd)
_, err = exec(ctx, lc.CmpId, fullCmd, 3600) // 1 hour timeout for large models
if err != nil {
return "", nil, errors.Wrapf(err, "failed to download model %s", modelName)
}
return modelName, []string{targetDir}, nil
}
// vllmResolveResult is the result of resolving model path and estimating vLLM memory params in the container.
type vllmResolveResult struct {
ModelPath string
GpuUtil string
MaxModelLen int
MaxNumSeqs int
}
// resolveModelAndParams runs one exec in the container to resolve the model path and estimate
// --gpu-memory-utilization, --max-model-len, --max-num-seqs. Returns (nil, nil) when no model is found.
func (v *vllm) resolveModelAndParams(ctx context.Context, containerId string, preferredPath string, tensorParallelSize int) (*vllmResolveResult, error) {
preferredEscaped := escapeShellSingleQuoted(preferredPath)
escapedScript := escapeShellSingleQuoted(strings.TrimSpace(api.LLM_VLLM_ESTIMATE_PARAMS_SCRIPT))
defaultGpuUtil := strconv.FormatFloat(float64(api.LLM_VLLM_DEFAULT_GPU_MEMORY_UTIL), 'f', -1, 64)
cmd := fmt.Sprintf(
`mkdir -p %s;
preferred='%s';
if [ -n "$preferred" ] && [ -d "$preferred" ]; then model="$preferred"; else model=$(ls -d %s/* 2>/dev/null | head -n 1); fi;
if [ -z "$model" ]; then echo "NO_MODEL"; exit 0; fi;
tp=%d;
vllm_out=$(python3 -c '%s' "$model" "$tp" 2>/dev/null) || true;
GPU_MEMORY_UTIL=%s; MAX_MODEL_LEN=%d; MAX_NUM_SEQS=%d;
[ -n "$vllm_out" ] && eval "$vllm_out";
printf '%%s\n' "$model";
printf 'GPU_MEMORY_UTIL=%%s MAX_MODEL_LEN=%%s MAX_NUM_SEQS=%%s\n' "$GPU_MEMORY_UTIL" "$MAX_MODEL_LEN" "$MAX_NUM_SEQS"`,
api.LLM_VLLM_MODELS_PATH,
preferredEscaped,
api.LLM_VLLM_MODELS_PATH,
tensorParallelSize,
escapedScript,
defaultGpuUtil,
api.LLM_VLLM_DEFAULT_MAX_MODEL_LEN,
api.LLM_VLLM_DEFAULT_MAX_NUM_SEQS,
)
out, err := exec(ctx, containerId, cmd, 30)
if err != nil {
return nil, errors.Wrapf(err, "exec resolve model and params")
}
out = strings.TrimSpace(out)
if out == "NO_MODEL" {
return nil, nil
}
lines := strings.SplitN(out, "\n", 2)
if len(lines) < 2 {
return nil, errors.Errorf("vLLM resolve output missing params line: %s", out)
}
res := &vllmResolveResult{
ModelPath: strings.TrimSpace(lines[0]),
GpuUtil: defaultGpuUtil,
MaxModelLen: api.LLM_VLLM_DEFAULT_MAX_MODEL_LEN,
MaxNumSeqs: api.LLM_VLLM_DEFAULT_MAX_NUM_SEQS,
}
for _, f := range strings.Fields(lines[1]) {
if val, ok := strings.CutPrefix(f, api.LLM_VLLM_RESOLVE_OUTPUT_PREFIX_GPU_UTIL); ok {
res.GpuUtil = val
} else if val, ok := strings.CutPrefix(f, api.LLM_VLLM_RESOLVE_OUTPUT_PREFIX_MAX_LEN); ok {
if n, e := strconv.Atoi(val); e == nil && n > 0 {
res.MaxModelLen = n
}
} else if val, ok := strings.CutPrefix(f, api.LLM_VLLM_RESOLVE_OUTPUT_PREFIX_MAX_NUM_SEQ); ok {
if n, e := strconv.Atoi(val); e == nil && n > 0 {
res.MaxNumSeqs = n
}
}
}
return res, nil
}

View File

@@ -92,6 +92,15 @@ func (man *SLLMManager) ValidateCreateData(ctx context.Context, userCred mcclien
input.LLMSkuId = lSku.Id
input.LLMImageId = lSku.GetLLMImageId()
if input.LLMSpec != nil {
drv := lSku.GetLLMContainerDriver()
spec, err := drv.ValidateLLMCreateSpec(ctx, userCred, lSku, input.LLMSpec)
if err != nil {
return input, errors.Wrap(err, "validate LLM create spec")
}
input.LLMSpec = spec
}
return input, nil
}
@@ -380,6 +389,29 @@ func (llm *SLLM) GetLLMSku(skuId string) (*SLLMSku, error) {
return sku.(*SLLMSku), nil
}
func (llm *SLLM) ValidateUpdateData(ctx context.Context, userCred mcclient.TokenCredential, query jsonutils.JSONObject, input api.LLMUpdateInput) (api.LLMUpdateInput, error) {
var err error
input.VirtualResourceBaseUpdateInput, err = llm.SLLMBase.SVirtualResourceBase.ValidateUpdateData(ctx, userCred, query, input.VirtualResourceBaseUpdateInput)
if err != nil {
return input, errors.Wrap(err, "validate VirtualResourceBaseUpdateInput")
}
if input.LLMSpec == nil {
return input, nil
}
sku, err := llm.GetLLMSku(llm.LLMSkuId)
if err != nil {
return input, errors.Wrap(err, "fetch LLMSku")
}
drv := sku.GetLLMContainerDriver()
spec, err := drv.ValidateLLMUpdateSpec(ctx, userCred, llm, input.LLMSpec)
if err != nil {
return input, errors.Wrap(err, "validate LLM update spec")
}
input.LLMSpec = spec
return input, nil
}
func (llm *SLLM) GetLargeLanguageModelName(name string) (modelName string, modelTag string, err error) {
if name == "" {
return "", "", errors.Wrap(errors.ErrInvalidStatus, "model name is empty")

View File

@@ -43,7 +43,7 @@ func GetLLMBasePodCreateInput(
}
data.VcpuCount = skuBase.Cpu
data.VmemSize = skuBase.Memory + 1
data.VmemSize = skuBase.Memory
// data.Name = input.Name + "-" + seclib.RandomPassword(6)
data.Name = input.Name

View File

@@ -93,13 +93,16 @@ type ILLMContainerDriver interface {
GetEffectiveSpec(llm *SLLM, sku *SLLMSku) interface{}
// GetPrimaryImageId returns the primary image id for this SKU type (e.g. LLMImageId for ollama/vllm, DifyApiImageId for dify).
GetPrimaryImageId(sku *SLLMSku) string
// ValidateCreateData validates create input and returns the LLMSpec to store. Called by SKU manager after base validation.
ValidateCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *llm.LLMSkuCreateInput) (*llm.LLMSkuCreateInput, error)
// ValidateUpdateData validates update input, merges with current spec, and returns the LLMSpec to store. Called by SKU when LLMSpec is not nil.
ValidateUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *SLLMSku, input *llm.LLMSkuUpdateInput) (*llm.LLMSkuUpdateInput, error)
// ValidateLLMSkuCreateData validates create input and returns the LLMSpec to store. Called by SKU manager after base validation.
ValidateLLMSkuCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *llm.LLMSkuCreateInput) (*llm.LLMSkuCreateInput, error)
// ValidateLLMSkuUpdateData validates update input, merges with current spec, and returns the LLMSpec to store. Called by SKU when LLMSpec is not nil.
ValidateLLMSkuUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *SLLMSku, input *llm.LLMSkuUpdateInput) (*llm.LLMSkuUpdateInput, error)
MatchContainerToUpdate(ctr *computeapi.SContainer, podCtrs []*computeapi.PodContainerCreateInput) (*computeapi.PodContainerCreateInput, error)
ValidateLLMCreateSpec(ctx context.Context, userCred mcclient.TokenCredential, sku *SLLMSku, input *llm.LLMSpec) (*llm.LLMSpec, error)
ValidateLLMUpdateSpec(ctx context.Context, userCred mcclient.TokenCredential, llm *SLLM, input *llm.LLMSpec) (*llm.LLMSpec, error)
ILLMContainerMCPAgent
}

View File

@@ -820,13 +820,31 @@ func (llm *SLLM) EnsureInstantModelsInstalled(ctx context.Context, userCred mccl
if err != nil {
return errors.Wrap(err, "FetchApps")
}
if len(mdlIds) == 0 {
return nil
}
instModels := make(map[string]SInstantModel)
if err := db.FetchModelObjectsByIds(GetInstantModelManager(), "id", mdlIds, &instModels); err != nil {
return errors.Wrap(err, "FetchModelObjectsByIds")
}
var errs []error
for _, mdlId := range mdlIds {
if _, ok := mdlMap[mdlId]; ok {
// probed
// errs = append(errs, errors.Wrap(errors.ErrInvalidStatus, pkg))
} else {
// not mounted and not probed
continue
}
instModel, ok := instModels[mdlId]
if !ok {
errs = append(errs, errors.Wrapf(errors.ErrNotFound, "mdlId %s", mdlId))
continue
}
found := false
for _, info := range mdlMap {
if info.ModelId == instModel.ModelId {
found = true
break
}
}
if !found {
errs = append(errs, errors.Wrapf(errors.ErrNotFound, "mdlId %s", mdlId))
}
}

View File

@@ -201,7 +201,7 @@ func (man *SLLMSkuManager) ValidateCreateData(ctx context.Context, userCred mccl
if err != nil {
return input, errors.Wrap(err, "get container driver")
}
input, err = drv.ValidateCreateData(ctx, userCred, input)
input, err = drv.ValidateLLMSkuCreateData(ctx, userCred, input)
if err != nil {
return input, errors.Wrap(err, "validate create input")
}
@@ -238,7 +238,7 @@ func (sku *SLLMSku) ValidateUpdateData(ctx context.Context, userCred mcclient.To
return input, nil
}
drv := sku.GetLLMContainerDriver()
updateInput, err := drv.ValidateUpdateData(ctx, userCred, sku, &input)
updateInput, err := drv.ValidateLLMSkuUpdateData(ctx, userCred, sku, &input)
if err != nil {
return input, errors.Wrap(err, "validate update spec")
}

View File

@@ -94,6 +94,7 @@ func (task *LLMCreateTask) OnLLMRefreshStatusComplete(ctx context.Context, llm *
mountedModels, err := llm.FetchMountedModelFullName()
if err != nil {
task.taskFailed(ctx, llm, errors.Wrap(err, "FetchMountedModelFullName"))
return
}
// 创建磁盘

View File

@@ -59,5 +59,10 @@ func (t *LLMStartTask) OnStartedFailed(ctx context.Context, llm *models.SLLM, er
}
func (t *LLMStartTask) OnStarted(ctx context.Context, llm *models.SLLM, reason jsonutils.JSONObject) {
err := llm.GetLLMContainerDriver().StartLLM(ctx, t.GetUserCred(), llm)
if err != nil {
t.taskFailed(ctx, llm, err.Error())
return
}
t.taskComplete(ctx, llm)
}

View File

@@ -18,7 +18,7 @@ func (o *LLMImageShowOptions) Params() (jsonutils.JSONObject, error) {
type LLMImageListOptions struct {
options.BaseListOptions
LLMType string `json:"llm_type" choices:"ollama|dify|comfyui" help:"filter by llm type"`
LLMType string `json:"llm_type" choices:"ollama|dify|comfyui|vllm" help:"filter by llm type"`
}
func (o *LLMImageListOptions) Params() (jsonutils.JSONObject, error) {
@@ -30,7 +30,7 @@ type LLMImageCreateOptions struct {
IMAGE_NAME string `json:"image_name"`
IMAGE_LABEL string `json:"image_label"`
CredentialId string `json:"credential_id"`
LLM_TYPE string `json:"llm_type" choices:"ollama|dify|comfyui" help:"llm type: ollama, comfyui or dify"`
LLM_TYPE string `json:"llm_type" choices:"ollama|dify|comfyui|vllm" help:"llm type: ollama, comfyui or dify"`
}
func (o *LLMImageCreateOptions) Params() (jsonutils.JSONObject, error) {
@@ -44,7 +44,7 @@ type LLMImageUpdateOptions struct {
ImageName string `json:"image_name"`
ImageLabel string `json:"image_label"`
CredentialId string `json:"credential_id"`
LlmType string `json:"llm_type" choices:"ollama|dify|vllm|comfyui" help:"llm type: ollama, comfyui or dify"`
LlmType string `json:"llm_type" choices:"ollama|dify|vllm|comfyui" help:"llm type: ollama, comfyui, vllm or dify"`
}
func (o *LLMImageUpdateOptions) GetId() string {

View File

@@ -68,11 +68,12 @@ type LLMBaseCreateOptions struct {
type LLMCreateOptions struct {
LLMBaseCreateOptions
LLM_SKU_ID string `help:"llm sku id or name" json:"llm_sku_id"`
LLM_SKU_ID string `help:"llm sku id or name" json:"llm_sku_id"`
PreferredModel string `help:"vLLM preferred model dir name under models path (e.g. Qwen/Qwen2-7B)" json:"-"`
}
func (o *LLMCreateOptions) Params() (jsonutils.JSONObject, error) {
params := jsonutils.Marshal(o)
params := jsonutils.Marshal(o).(*jsonutils.JSONDict)
if len(o.Net) > 0 {
nets := make([]*computeapi.NetworkConfig, 0)
@@ -83,7 +84,16 @@ func (o *LLMCreateOptions) Params() (jsonutils.JSONObject, error) {
}
nets = append(nets, net)
}
params.(*jsonutils.JSONDict).Add(jsonutils.Marshal(nets), "nets")
params.Add(jsonutils.Marshal(nets), "nets")
}
if o.PreferredModel != "" {
spec := &api.LLMSpec{
Ollama: nil,
Vllm: &api.LLMSpecVllm{PreferredModel: o.PreferredModel},
Dify: nil,
}
params.Set("llm_spec", jsonutils.Marshal(spec))
}
return params, nil
@@ -93,6 +103,32 @@ func (o *LLMCreateOptions) GetCountParam() int {
return o.Count
}
type LLMUpdateOptions struct {
options.BaseIdOptions
PreferredModel string `help:"vLLM preferred model dir name under models path (e.g. Qwen/Qwen2-7B); takes effect after pod recreate" json:"-"`
}
func (o *LLMUpdateOptions) GetId() string {
return o.ID
}
func (o *LLMUpdateOptions) Params() (jsonutils.JSONObject, error) {
dict, err := options.StructToParams(o)
if err != nil {
return nil, err
}
if o.PreferredModel != "" {
spec := &api.LLMSpec{
Ollama: nil,
Vllm: &api.LLMSpecVllm{PreferredModel: o.PreferredModel},
Dify: nil,
}
dict.Set("llm_spec", jsonutils.Marshal(spec))
}
return dict, nil
}
type LLMDeleteOptions struct {
options.BaseIdOptions
}

View File

@@ -8,7 +8,7 @@ set -euo pipefail
if [ $# -ne 1 ]; then
echo "用法: $0 <targetRegistry>"
echo "例如: $0 crpi-nf3abu98o8qf9y2x.cn-beijing.personal.cr.aliyuncs.com/eikoh"
echo "例如: $0 registry.cn-beijing.aliyuncs.com/cloudpods"
exit 1
fi