Spaces:
Running
Running
Mike Ferchak commited on
Commit ·
b500f71
1
Parent(s): 6303c45
pyright and .gitignore
Browse files- .gitignore +75 -0
- app.py +44 -29
.gitignore
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Environment variables and secrets
|
| 2 |
+
.env
|
| 3 |
+
.env.*
|
| 4 |
+
!.env.example
|
| 5 |
+
*.env
|
| 6 |
+
|
| 7 |
+
# Claude Code related
|
| 8 |
+
.claude/
|
| 9 |
+
.clauderc
|
| 10 |
+
.claude-*
|
| 11 |
+
claude_*
|
| 12 |
+
.clinerules
|
| 13 |
+
CLAUDE.md
|
| 14 |
+
|
| 15 |
+
# Python
|
| 16 |
+
__pycache__/
|
| 17 |
+
*.py[cod]
|
| 18 |
+
*$py.class
|
| 19 |
+
*.so
|
| 20 |
+
.Python
|
| 21 |
+
build/
|
| 22 |
+
develop-eggs/
|
| 23 |
+
dist/
|
| 24 |
+
downloads/
|
| 25 |
+
eggs/
|
| 26 |
+
.eggs/
|
| 27 |
+
lib/
|
| 28 |
+
lib64/
|
| 29 |
+
parts/
|
| 30 |
+
sdist/
|
| 31 |
+
var/
|
| 32 |
+
wheels/
|
| 33 |
+
*.egg-info/
|
| 34 |
+
.installed.cfg
|
| 35 |
+
*.egg
|
| 36 |
+
MANIFEST
|
| 37 |
+
|
| 38 |
+
# Virtual environments
|
| 39 |
+
venv/
|
| 40 |
+
ENV/
|
| 41 |
+
env/
|
| 42 |
+
.venv
|
| 43 |
+
|
| 44 |
+
# IDEs
|
| 45 |
+
.vscode/
|
| 46 |
+
.idea/
|
| 47 |
+
*.swp
|
| 48 |
+
*.swo
|
| 49 |
+
*~
|
| 50 |
+
.DS_Store
|
| 51 |
+
|
| 52 |
+
# Jupyter Notebook
|
| 53 |
+
.ipynb_checkpoints
|
| 54 |
+
|
| 55 |
+
# pytest
|
| 56 |
+
.pytest_cache/
|
| 57 |
+
.coverage
|
| 58 |
+
htmlcov/
|
| 59 |
+
|
| 60 |
+
# Gradio
|
| 61 |
+
flagged/
|
| 62 |
+
gradio_cached_examples/
|
| 63 |
+
|
| 64 |
+
# Logs
|
| 65 |
+
*.log
|
| 66 |
+
|
| 67 |
+
# Database
|
| 68 |
+
*.db
|
| 69 |
+
*.sqlite
|
| 70 |
+
*.sqlite3
|
| 71 |
+
|
| 72 |
+
# Temporary files
|
| 73 |
+
tmp/
|
| 74 |
+
temp/
|
| 75 |
+
*.tmp
|
app.py
CHANGED
|
@@ -50,7 +50,7 @@ class ChatBot:
|
|
| 50 |
api_key=os.environ.get("SGLANG_API_KEY"),
|
| 51 |
)
|
| 52 |
self.alinia_client = httpx.AsyncClient(
|
| 53 |
-
base_url=os.environ.get("ALINIA_API_URL", "https://api.alinia.ai"),
|
| 54 |
headers={"Authorization": f"Bearer {os.environ.get('ALINIA_API_KEY', '')}"},
|
| 55 |
)
|
| 56 |
|
|
@@ -133,7 +133,9 @@ def _format_nested_html(data: Any) -> str:
|
|
| 133 |
elif isinstance(value, float):
|
| 134 |
score_pct = f"{value * 100:.2f}%"
|
| 135 |
color = "red" if value > 0.7 else "orange" if value > 0.3 else "green"
|
| 136 |
-
html +=
|
|
|
|
|
|
|
| 137 |
elif isinstance(value, bool):
|
| 138 |
html += f"<li>{key_str}: {value}</li>"
|
| 139 |
else:
|
|
@@ -220,7 +222,7 @@ async def bot_response_fn(
|
|
| 220 |
unguarded_history[-1][1] = "▌"
|
| 221 |
|
| 222 |
messages_for_moderation = [{"role": "user", "content": user_message}]
|
| 223 |
-
|
| 224 |
# Call v1 moderation if adversarial v1 is enabled
|
| 225 |
if security_enabled and adversarial_checkbox:
|
| 226 |
moderation_api_response = await chatbot.moderate_chat_history(
|
|
@@ -228,7 +230,9 @@ async def bot_response_fn(
|
|
| 228 |
)
|
| 229 |
result = moderation_api_response.get("result", {})
|
| 230 |
recommendation = moderation_api_response.get("recommendation", {})
|
| 231 |
-
user_input_blocked =
|
|
|
|
|
|
|
| 232 |
|
| 233 |
user_input_moderation_result = ModerationResultData(
|
| 234 |
flagged=result.get("flagged", False),
|
|
@@ -247,33 +251,33 @@ async def bot_response_fn(
|
|
| 247 |
# Build detection config for v2 - only adversarial
|
| 248 |
detection_config_v2 = {
|
| 249 |
"safety": False,
|
| 250 |
-
"security": {
|
| 251 |
-
"adversarial": adversarial_threshold_2
|
| 252 |
-
}
|
| 253 |
}
|
| 254 |
-
|
| 255 |
moderation_api_response_v2 = await chatbot.moderate_chat_history(
|
| 256 |
messages_for_moderation, detection_config_v2, model_version="20251105"
|
| 257 |
)
|
| 258 |
print(f"[DEBUG] V2 User Input Response: {moderation_api_response_v2}")
|
| 259 |
-
|
| 260 |
result_v2 = moderation_api_response_v2.get("result", {})
|
| 261 |
recommendation_v2 = moderation_api_response_v2.get("recommendation", {})
|
| 262 |
-
|
| 263 |
# Extract adversarial v2 probability from security->adversarial
|
| 264 |
v2_probability = 0.0
|
| 265 |
category_details_v2 = result_v2.get("category_details", {})
|
| 266 |
print(f"[DEBUG] V2 category_details: {category_details_v2}")
|
| 267 |
-
|
| 268 |
if "security" in category_details_v2:
|
| 269 |
v2_probability = category_details_v2["security"].get("adversarial", 0.0)
|
| 270 |
print(f"[DEBUG] V2 probability extracted: {v2_probability}")
|
| 271 |
-
|
| 272 |
# Add to categories as adversarial_v2
|
| 273 |
if "security" not in user_input_moderation_result.categories:
|
| 274 |
user_input_moderation_result.categories["security"] = {}
|
| 275 |
-
user_input_moderation_result.categories["security"]["adversarial_v2"] =
|
| 276 |
-
|
|
|
|
|
|
|
| 277 |
# Check if v2 flagged content
|
| 278 |
v2_flagged = v2_probability > adversarial_threshold_2
|
| 279 |
if v2_flagged or recommendation_v2.get("action") == "block":
|
|
@@ -308,8 +312,14 @@ async def bot_response_fn(
|
|
| 308 |
unguarded_text += token
|
| 309 |
unguarded_history[-1][1] = unguarded_text
|
| 310 |
guarded_history[-1][1] = unguarded_text
|
| 311 |
-
yield
|
| 312 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
)
|
| 314 |
|
| 315 |
# Step 3: Moderate bot response
|
|
@@ -317,12 +327,12 @@ async def bot_response_fn(
|
|
| 317 |
if moderate_user_input:
|
| 318 |
messages_for_moderation.append({"role": "user", "content": user_message})
|
| 319 |
messages_for_moderation.append({"role": "assistant", "content": unguarded_text})
|
| 320 |
-
|
| 321 |
# Call v1 moderation if adversarial v1 is enabled
|
| 322 |
is_blocked = False
|
| 323 |
categories = {}
|
| 324 |
flagged = False
|
| 325 |
-
|
| 326 |
if security_enabled and adversarial_checkbox:
|
| 327 |
moderation_api_response = await chatbot.moderate_chat_history(
|
| 328 |
messages_for_moderation, detection_config
|
|
@@ -339,33 +349,31 @@ async def bot_response_fn(
|
|
| 339 |
# Build detection config for v2 - only adversarial
|
| 340 |
detection_config_v2 = {
|
| 341 |
"safety": False,
|
| 342 |
-
"security": {
|
| 343 |
-
"adversarial": adversarial_threshold_2
|
| 344 |
-
}
|
| 345 |
}
|
| 346 |
-
|
| 347 |
moderation_api_response_v2 = await chatbot.moderate_chat_history(
|
| 348 |
messages_for_moderation, detection_config_v2, model_version="20251105"
|
| 349 |
)
|
| 350 |
print(f"[DEBUG] V2 Bot Response: {moderation_api_response_v2}")
|
| 351 |
-
|
| 352 |
result_v2 = moderation_api_response_v2.get("result", {})
|
| 353 |
recommendation_v2 = moderation_api_response_v2.get("recommendation", {})
|
| 354 |
-
|
| 355 |
# Extract adversarial v2 probability from security->adversarial
|
| 356 |
v2_probability = 0.0
|
| 357 |
category_details_v2 = result_v2.get("category_details", {})
|
| 358 |
print(f"[DEBUG] V2 category_details: {category_details_v2}")
|
| 359 |
-
|
| 360 |
if "security" in category_details_v2:
|
| 361 |
v2_probability = category_details_v2["security"].get("adversarial", 0.0)
|
| 362 |
print(f"[DEBUG] V2 probability extracted: {v2_probability}")
|
| 363 |
-
|
| 364 |
# Add to categories as adversarial_v2
|
| 365 |
if "security" not in categories:
|
| 366 |
categories["security"] = {}
|
| 367 |
categories["security"]["adversarial_v2"] = v2_probability
|
| 368 |
-
|
| 369 |
# Check if v2 flagged content
|
| 370 |
v2_flagged = v2_probability > adversarial_threshold_2
|
| 371 |
if v2_flagged or recommendation_v2.get("action") == "block":
|
|
@@ -585,7 +593,12 @@ def create_demo() -> gr.Blocks:
|
|
| 585 |
)
|
| 586 |
|
| 587 |
# Master toggle sync
|
| 588 |
-
safety_checkboxes = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 589 |
security_checkboxes = [adversarial_checkbox, adversarial_checkbox_2]
|
| 590 |
|
| 591 |
def sync_subs_to_master(*subs):
|
|
@@ -608,7 +621,9 @@ def create_demo() -> gr.Blocks:
|
|
| 608 |
sync_master_to_subs, [safety_enabled, *safety_checkboxes], safety_checkboxes
|
| 609 |
)
|
| 610 |
security_enabled.change(
|
| 611 |
-
sync_master_to_subs,
|
|
|
|
|
|
|
| 612 |
)
|
| 613 |
|
| 614 |
return demo
|
|
|
|
| 50 |
api_key=os.environ.get("SGLANG_API_KEY"),
|
| 51 |
)
|
| 52 |
self.alinia_client = httpx.AsyncClient(
|
| 53 |
+
base_url=os.environ.get("ALINIA_API_URL", "https://staging.api.alinia.ai"),
|
| 54 |
headers={"Authorization": f"Bearer {os.environ.get('ALINIA_API_KEY', '')}"},
|
| 55 |
)
|
| 56 |
|
|
|
|
| 133 |
elif isinstance(value, float):
|
| 134 |
score_pct = f"{value * 100:.2f}%"
|
| 135 |
color = "red" if value > 0.7 else "orange" if value > 0.3 else "green"
|
| 136 |
+
html += (
|
| 137 |
+
f'<li>{key_str}: <span style="color: {color};">{score_pct}</span></li>'
|
| 138 |
+
)
|
| 139 |
elif isinstance(value, bool):
|
| 140 |
html += f"<li>{key_str}: {value}</li>"
|
| 141 |
else:
|
|
|
|
| 222 |
unguarded_history[-1][1] = "▌"
|
| 223 |
|
| 224 |
messages_for_moderation = [{"role": "user", "content": user_message}]
|
| 225 |
+
|
| 226 |
# Call v1 moderation if adversarial v1 is enabled
|
| 227 |
if security_enabled and adversarial_checkbox:
|
| 228 |
moderation_api_response = await chatbot.moderate_chat_history(
|
|
|
|
| 230 |
)
|
| 231 |
result = moderation_api_response.get("result", {})
|
| 232 |
recommendation = moderation_api_response.get("recommendation", {})
|
| 233 |
+
user_input_blocked = (
|
| 234 |
+
recommendation.get("action") == "block" and block_content
|
| 235 |
+
)
|
| 236 |
|
| 237 |
user_input_moderation_result = ModerationResultData(
|
| 238 |
flagged=result.get("flagged", False),
|
|
|
|
| 251 |
# Build detection config for v2 - only adversarial
|
| 252 |
detection_config_v2 = {
|
| 253 |
"safety": False,
|
| 254 |
+
"security": {"adversarial": adversarial_threshold_2},
|
|
|
|
|
|
|
| 255 |
}
|
| 256 |
+
|
| 257 |
moderation_api_response_v2 = await chatbot.moderate_chat_history(
|
| 258 |
messages_for_moderation, detection_config_v2, model_version="20251105"
|
| 259 |
)
|
| 260 |
print(f"[DEBUG] V2 User Input Response: {moderation_api_response_v2}")
|
| 261 |
+
|
| 262 |
result_v2 = moderation_api_response_v2.get("result", {})
|
| 263 |
recommendation_v2 = moderation_api_response_v2.get("recommendation", {})
|
| 264 |
+
|
| 265 |
# Extract adversarial v2 probability from security->adversarial
|
| 266 |
v2_probability = 0.0
|
| 267 |
category_details_v2 = result_v2.get("category_details", {})
|
| 268 |
print(f"[DEBUG] V2 category_details: {category_details_v2}")
|
| 269 |
+
|
| 270 |
if "security" in category_details_v2:
|
| 271 |
v2_probability = category_details_v2["security"].get("adversarial", 0.0)
|
| 272 |
print(f"[DEBUG] V2 probability extracted: {v2_probability}")
|
| 273 |
+
|
| 274 |
# Add to categories as adversarial_v2
|
| 275 |
if "security" not in user_input_moderation_result.categories:
|
| 276 |
user_input_moderation_result.categories["security"] = {}
|
| 277 |
+
user_input_moderation_result.categories["security"]["adversarial_v2"] = (
|
| 278 |
+
v2_probability
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
# Check if v2 flagged content
|
| 282 |
v2_flagged = v2_probability > adversarial_threshold_2
|
| 283 |
if v2_flagged or recommendation_v2.get("action") == "block":
|
|
|
|
| 312 |
unguarded_text += token
|
| 313 |
unguarded_history[-1][1] = unguarded_text
|
| 314 |
guarded_history[-1][1] = unguarded_text
|
| 315 |
+
yield (
|
| 316 |
+
unguarded_history,
|
| 317 |
+
guarded_history,
|
| 318 |
+
(
|
| 319 |
+
moderation_html
|
| 320 |
+
if moderate_user_input
|
| 321 |
+
else "<p>Moderating response...</p>"
|
| 322 |
+
),
|
| 323 |
)
|
| 324 |
|
| 325 |
# Step 3: Moderate bot response
|
|
|
|
| 327 |
if moderate_user_input:
|
| 328 |
messages_for_moderation.append({"role": "user", "content": user_message})
|
| 329 |
messages_for_moderation.append({"role": "assistant", "content": unguarded_text})
|
| 330 |
+
|
| 331 |
# Call v1 moderation if adversarial v1 is enabled
|
| 332 |
is_blocked = False
|
| 333 |
categories = {}
|
| 334 |
flagged = False
|
| 335 |
+
|
| 336 |
if security_enabled and adversarial_checkbox:
|
| 337 |
moderation_api_response = await chatbot.moderate_chat_history(
|
| 338 |
messages_for_moderation, detection_config
|
|
|
|
| 349 |
# Build detection config for v2 - only adversarial
|
| 350 |
detection_config_v2 = {
|
| 351 |
"safety": False,
|
| 352 |
+
"security": {"adversarial": adversarial_threshold_2},
|
|
|
|
|
|
|
| 353 |
}
|
| 354 |
+
|
| 355 |
moderation_api_response_v2 = await chatbot.moderate_chat_history(
|
| 356 |
messages_for_moderation, detection_config_v2, model_version="20251105"
|
| 357 |
)
|
| 358 |
print(f"[DEBUG] V2 Bot Response: {moderation_api_response_v2}")
|
| 359 |
+
|
| 360 |
result_v2 = moderation_api_response_v2.get("result", {})
|
| 361 |
recommendation_v2 = moderation_api_response_v2.get("recommendation", {})
|
| 362 |
+
|
| 363 |
# Extract adversarial v2 probability from security->adversarial
|
| 364 |
v2_probability = 0.0
|
| 365 |
category_details_v2 = result_v2.get("category_details", {})
|
| 366 |
print(f"[DEBUG] V2 category_details: {category_details_v2}")
|
| 367 |
+
|
| 368 |
if "security" in category_details_v2:
|
| 369 |
v2_probability = category_details_v2["security"].get("adversarial", 0.0)
|
| 370 |
print(f"[DEBUG] V2 probability extracted: {v2_probability}")
|
| 371 |
+
|
| 372 |
# Add to categories as adversarial_v2
|
| 373 |
if "security" not in categories:
|
| 374 |
categories["security"] = {}
|
| 375 |
categories["security"]["adversarial_v2"] = v2_probability
|
| 376 |
+
|
| 377 |
# Check if v2 flagged content
|
| 378 |
v2_flagged = v2_probability > adversarial_threshold_2
|
| 379 |
if v2_flagged or recommendation_v2.get("action") == "block":
|
|
|
|
| 593 |
)
|
| 594 |
|
| 595 |
# Master toggle sync
|
| 596 |
+
safety_checkboxes = [
|
| 597 |
+
hate_checkbox,
|
| 598 |
+
sexual_checkbox,
|
| 599 |
+
violence_checkbox,
|
| 600 |
+
wrongdoing_checkbox,
|
| 601 |
+
]
|
| 602 |
security_checkboxes = [adversarial_checkbox, adversarial_checkbox_2]
|
| 603 |
|
| 604 |
def sync_subs_to_master(*subs):
|
|
|
|
| 621 |
sync_master_to_subs, [safety_enabled, *safety_checkboxes], safety_checkboxes
|
| 622 |
)
|
| 623 |
security_enabled.change(
|
| 624 |
+
sync_master_to_subs,
|
| 625 |
+
[security_enabled, *security_checkboxes],
|
| 626 |
+
security_checkboxes,
|
| 627 |
)
|
| 628 |
|
| 629 |
return demo
|