SteveTran
/

ArmoRM-Llama3-8B-v0.1-4bit

Text Classification

text-generation-inference

4-bit precision

Model card Files Files and versions

SteveTran commited on Jul 25, 2024

Commit

cf7046b

·

verified ·

1 Parent(s): af3607c

update-readme

Files changed (1) hide show

README.md +10 -2

README.md CHANGED Viewed

@@ -37,7 +37,7 @@ license: llama3
 import torch
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 device = "cuda"
-path = "RLHFlow/ArmoRM-Llama3-8B-v0.1"
 model = AutoModelForSequenceClassification.from_pretrained(path, device_map=device,
                                trust_remote_code=True, torch_dtype=torch.bfloat16)
 tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
@@ -83,10 +83,17 @@ for i in range(K):
    attribute = attributes[top_obj_dims[example_index, i].item()]
    coeff = top_obj_coeffs[example_index, i].item()
    print(f"{attribute}: {round(coeff,5)}")
 # code-complexity: 0.19922
 # helpsteer-verbosity: -0.10864
 # ultrafeedback-instruction_following: 0.07861
 # The actual rewards of this example from the HelpSteer dataset
 # are [3,3,4,2,2] for the five helpsteer objectives:
 # helpfulness, correctness, coherence, complexity, verbosity
@@ -94,7 +101,8 @@ for i in range(K):
 # original reward space to compare with the ground truth
 helpsteer_rewards_pred = multi_obj_rewards[0, :5] * 5 - 0.5
 print(helpsteer_rewards_pred)
-# [2.78125   2.859375  3.484375  1.3847656 1.296875 ]
 ```
 ## Easy to use Pipeline

 import torch
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 device = "cuda"
+path = "SteveTran/ArmoRM-Llama3-8B-v0.1-4bit"
 model = AutoModelForSequenceClassification.from_pretrained(path, device_map=device,
                                trust_remote_code=True, torch_dtype=torch.bfloat16)
 tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
    attribute = attributes[top_obj_dims[example_index, i].item()]
    coeff = top_obj_coeffs[example_index, i].item()
    print(f"{attribute}: {round(coeff,5)}")
+# Float16
 # code-complexity: 0.19922
 # helpsteer-verbosity: -0.10864
 # ultrafeedback-instruction_following: 0.07861
+# 4bit
+# code-complexity: 0.19043
+# helpsteer-verbosity: -0.11304
+# ultrafeedback-instruction_following: 0.08203
 # The actual rewards of this example from the HelpSteer dataset
 # are [3,3,4,2,2] for the five helpsteer objectives:
 # helpfulness, correctness, coherence, complexity, verbosity
 # original reward space to compare with the ground truth
 helpsteer_rewards_pred = multi_obj_rewards[0, :5] * 5 - 0.5
 print(helpsteer_rewards_pred)
+# float16 [2.78125   2.859375  3.484375  1.3847656 1.296875 ]
+# 4bit [1.7754, 1.9316, 3.4062, 1.2773, 1.8438]
 ```
 ## Easy to use Pipeline