File tree 1 file changed +5
-5
lines changed
1 file changed +5
-5
lines changed Original file line number Diff line number Diff line change 1
1
import torch
2
- from transformers import AutoModelForCausalLM , AutoTokenizer
2
+ from transformers import LlamaForCausalLM , LlamaTokenizer
3
3
4
4
MAX_NEW_TOKENS = 128
5
- model_name = 'decapoda-research/ llama-7b-hf'
5
+ model_name = 'meta- llama/Llama-2 -7b-hf'
6
6
7
7
text = 'Hamburg is in which country?\n '
8
- tokenizer = AutoTokenizer .from_pretrained (model_name )
8
+ tokenizer = LlamaTokenizer .from_pretrained (model_name )
9
9
input_ids = tokenizer (text , return_tensors = "pt" ).input_ids
10
10
11
- free_in_GB = int (torch .cuda .mem_get_info ()[0 ]/ 1024 ** 3 )
12
11
max_memory = f'{ int (torch .cuda .mem_get_info ()[0 ]/ 1024 ** 3 )- 2 } GB'
13
12
14
13
n_gpus = torch .cuda .device_count ()
15
14
max_memory = {i : max_memory for i in range (n_gpus )}
16
15
17
- model = AutoModelForCausalLM .from_pretrained (
16
+ model = LlamaForCausalLM .from_pretrained (
18
17
model_name ,
19
18
device_map = 'auto' ,
20
19
load_in_8bit = True ,
21
20
max_memory = max_memory
22
21
)
22
+
23
23
generated_ids = model .generate (input_ids , max_length = MAX_NEW_TOKENS )
24
24
print (tokenizer .decode (generated_ids [0 ], skip_special_tokens = True ))
You can’t perform that action at this time.
0 commit comments