update-readme
Browse files
README.md
CHANGED
|
@@ -37,7 +37,7 @@ license: llama3
|
|
| 37 |
import torch
|
| 38 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 39 |
device = "cuda"
|
| 40 |
-
path = "
|
| 41 |
model = AutoModelForSequenceClassification.from_pretrained(path, device_map=device,
|
| 42 |
trust_remote_code=True, torch_dtype=torch.bfloat16)
|
| 43 |
tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
|
|
@@ -83,10 +83,17 @@ for i in range(K):
|
|
| 83 |
attribute = attributes[top_obj_dims[example_index, i].item()]
|
| 84 |
coeff = top_obj_coeffs[example_index, i].item()
|
| 85 |
print(f"{attribute}: {round(coeff,5)}")
|
|
|
|
|
|
|
| 86 |
# code-complexity: 0.19922
|
| 87 |
# helpsteer-verbosity: -0.10864
|
| 88 |
# ultrafeedback-instruction_following: 0.07861
|
| 89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
# The actual rewards of this example from the HelpSteer dataset
|
| 91 |
# are [3,3,4,2,2] for the five helpsteer objectives:
|
| 92 |
# helpfulness, correctness, coherence, complexity, verbosity
|
|
@@ -94,7 +101,8 @@ for i in range(K):
|
|
| 94 |
# original reward space to compare with the ground truth
|
| 95 |
helpsteer_rewards_pred = multi_obj_rewards[0, :5] * 5 - 0.5
|
| 96 |
print(helpsteer_rewards_pred)
|
| 97 |
-
# [2.78125 2.859375 3.484375 1.3847656 1.296875 ]
|
|
|
|
| 98 |
```
|
| 99 |
|
| 100 |
## Easy to use Pipeline
|
|
|
|
| 37 |
import torch
|
| 38 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 39 |
device = "cuda"
|
| 40 |
+
path = "SteveTran/ArmoRM-Llama3-8B-v0.1-4bit"
|
| 41 |
model = AutoModelForSequenceClassification.from_pretrained(path, device_map=device,
|
| 42 |
trust_remote_code=True, torch_dtype=torch.bfloat16)
|
| 43 |
tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
|
|
|
|
| 83 |
attribute = attributes[top_obj_dims[example_index, i].item()]
|
| 84 |
coeff = top_obj_coeffs[example_index, i].item()
|
| 85 |
print(f"{attribute}: {round(coeff,5)}")
|
| 86 |
+
|
| 87 |
+
# Float16
|
| 88 |
# code-complexity: 0.19922
|
| 89 |
# helpsteer-verbosity: -0.10864
|
| 90 |
# ultrafeedback-instruction_following: 0.07861
|
| 91 |
|
| 92 |
+
# 4bit
|
| 93 |
+
# code-complexity: 0.19043
|
| 94 |
+
# helpsteer-verbosity: -0.11304
|
| 95 |
+
# ultrafeedback-instruction_following: 0.08203
|
| 96 |
+
|
| 97 |
# The actual rewards of this example from the HelpSteer dataset
|
| 98 |
# are [3,3,4,2,2] for the five helpsteer objectives:
|
| 99 |
# helpfulness, correctness, coherence, complexity, verbosity
|
|
|
|
| 101 |
# original reward space to compare with the ground truth
|
| 102 |
helpsteer_rewards_pred = multi_obj_rewards[0, :5] * 5 - 0.5
|
| 103 |
print(helpsteer_rewards_pred)
|
| 104 |
+
# float16 [2.78125 2.859375 3.484375 1.3847656 1.296875 ]
|
| 105 |
+
# 4bit [1.7754, 1.9316, 3.4062, 1.2773, 1.8438]
|
| 106 |
```
|
| 107 |
|
| 108 |
## Easy to use Pipeline
|