support prefixes, loading multiple local files

Former-commit-id: 6672e09836ed0103693a381ece010377bd0ef4f8
This commit is contained in:
hiyouga
2023-06-26 15:32:40 +08:00
parent 83346e86af
commit 3aa1ca66e0
3 changed files with 84 additions and 39 deletions

View File

@@ -34,6 +34,21 @@ async def lifespan(app: FastAPI): # collects GPU memory
app = FastAPI(lifespan=lifespan)
class ModelCard(BaseModel):
id: str
object: str = "model"
created: int = Field(default_factory=lambda: int(time.time()))
owned_by: str = "owner"
root: Optional[str] = None
parent: Optional[str] = None
permission: Optional[list] = None
class ModelList(BaseModel):
object: str = "list"
data: List[ModelCard] = []
class ChatMessage(BaseModel):
role: Literal["user", "assistant", "system"]
content: str
@@ -73,6 +88,13 @@ class ChatCompletionResponse(BaseModel):
created: Optional[int] = Field(default_factory=lambda: int(time.time()))
@app.get("/v1/models", response_model=ModelList)
async def list_models():
global model_args
model_card = ModelCard(id="gpt-3.5-turbo")
return ModelList(data=[model_card])
@app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
async def create_chat_completion(request: ChatCompletionRequest):
global model, tokenizer, source_prefix, generating_args