-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
70 lines (60 loc) · 2.5 KB
/
app.py
File metadata and controls
70 lines (60 loc) · 2.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from transformers import set_seed,AutoModelForCausalLM, AutoTokenizer
import torch
import random
import numpy as np
import inferless
from typing import Optional
from pydantic import BaseModel, Field
from prompts import PY_USER_PROMPT, PY_SYSTEM_PROMPT, JS_USER_PROMPT, JS_SYSTEM_PROMPT
@inferless.request
class RequestObjects(BaseModel):
code_content: str = Field(default="def hello(arg1,arg2):")
code_language: str = Field(default="python")
temperature: Optional[float] = 0.1
top_p: Optional[float] = 0.9
max_new_tokens: Optional[int] = 4096
do_sample: Optional[bool] = True
@inferless.response
class ResponseObjects(BaseModel):
generated_text: str = Field(default="Generated text will appear here")
class InferlessPythonModel:
def set_seed(self,SEED):
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
set_seed(SEED)
def initialize(self):
SEED = 12896654
self.set_seed(SEED)
model_name = "Qwen/Qwen2.5-Coder-7B-Instruct"
self.model = AutoModelForCausalLM.from_pretrained(model_name,torch_dtype="auto",device_map="auto")
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.LANG_PROMPTS = {"python":(PY_SYSTEM_PROMPT, PY_USER_PROMPT),
"javascript": (JS_SYSTEM_PROMPT, JS_USER_PROMPT),
}
def infer(self, inputs: RequestObjects) -> ResponseObjects:
SYSTEM_PROMPT, USER_PROMPT = self.LANG_PROMPTS[inputs.code_language.lower()]
messages = [ {"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": USER_PROMPT.format(code_content=inputs.code_content)}
]
text = self.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
generated_ids = self.model.generate(
**model_inputs,
max_new_tokens=inputs.max_new_tokens,
temperature=inputs.temperature,
do_sample=inputs.do_sample,
top_p=inputs.top_p
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
return ResponseObjects(generated_text=response)
def finalize(self):
self.model = None