Mike Ferchak commited on
Commit
b500f71
·
1 Parent(s): 6303c45

pyright and .gitignore

Browse files
Files changed (2) hide show
  1. .gitignore +75 -0
  2. app.py +44 -29
.gitignore ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Environment variables and secrets
2
+ .env
3
+ .env.*
4
+ !.env.example
5
+ *.env
6
+
7
+ # Claude Code related
8
+ .claude/
9
+ .clauderc
10
+ .claude-*
11
+ claude_*
12
+ .clinerules
13
+ CLAUDE.md
14
+
15
+ # Python
16
+ __pycache__/
17
+ *.py[cod]
18
+ *$py.class
19
+ *.so
20
+ .Python
21
+ build/
22
+ develop-eggs/
23
+ dist/
24
+ downloads/
25
+ eggs/
26
+ .eggs/
27
+ lib/
28
+ lib64/
29
+ parts/
30
+ sdist/
31
+ var/
32
+ wheels/
33
+ *.egg-info/
34
+ .installed.cfg
35
+ *.egg
36
+ MANIFEST
37
+
38
+ # Virtual environments
39
+ venv/
40
+ ENV/
41
+ env/
42
+ .venv
43
+
44
+ # IDEs
45
+ .vscode/
46
+ .idea/
47
+ *.swp
48
+ *.swo
49
+ *~
50
+ .DS_Store
51
+
52
+ # Jupyter Notebook
53
+ .ipynb_checkpoints
54
+
55
+ # pytest
56
+ .pytest_cache/
57
+ .coverage
58
+ htmlcov/
59
+
60
+ # Gradio
61
+ flagged/
62
+ gradio_cached_examples/
63
+
64
+ # Logs
65
+ *.log
66
+
67
+ # Database
68
+ *.db
69
+ *.sqlite
70
+ *.sqlite3
71
+
72
+ # Temporary files
73
+ tmp/
74
+ temp/
75
+ *.tmp
app.py CHANGED
@@ -50,7 +50,7 @@ class ChatBot:
50
  api_key=os.environ.get("SGLANG_API_KEY"),
51
  )
52
  self.alinia_client = httpx.AsyncClient(
53
- base_url=os.environ.get("ALINIA_API_URL", "https://api.alinia.ai"),
54
  headers={"Authorization": f"Bearer {os.environ.get('ALINIA_API_KEY', '')}"},
55
  )
56
 
@@ -133,7 +133,9 @@ def _format_nested_html(data: Any) -> str:
133
  elif isinstance(value, float):
134
  score_pct = f"{value * 100:.2f}%"
135
  color = "red" if value > 0.7 else "orange" if value > 0.3 else "green"
136
- html += f'<li>{key_str}: <span style="color: {color};">{score_pct}</span></li>'
 
 
137
  elif isinstance(value, bool):
138
  html += f"<li>{key_str}: {value}</li>"
139
  else:
@@ -220,7 +222,7 @@ async def bot_response_fn(
220
  unguarded_history[-1][1] = "▌"
221
 
222
  messages_for_moderation = [{"role": "user", "content": user_message}]
223
-
224
  # Call v1 moderation if adversarial v1 is enabled
225
  if security_enabled and adversarial_checkbox:
226
  moderation_api_response = await chatbot.moderate_chat_history(
@@ -228,7 +230,9 @@ async def bot_response_fn(
228
  )
229
  result = moderation_api_response.get("result", {})
230
  recommendation = moderation_api_response.get("recommendation", {})
231
- user_input_blocked = recommendation.get("action") == "block" and block_content
 
 
232
 
233
  user_input_moderation_result = ModerationResultData(
234
  flagged=result.get("flagged", False),
@@ -247,33 +251,33 @@ async def bot_response_fn(
247
  # Build detection config for v2 - only adversarial
248
  detection_config_v2 = {
249
  "safety": False,
250
- "security": {
251
- "adversarial": adversarial_threshold_2
252
- }
253
  }
254
-
255
  moderation_api_response_v2 = await chatbot.moderate_chat_history(
256
  messages_for_moderation, detection_config_v2, model_version="20251105"
257
  )
258
  print(f"[DEBUG] V2 User Input Response: {moderation_api_response_v2}")
259
-
260
  result_v2 = moderation_api_response_v2.get("result", {})
261
  recommendation_v2 = moderation_api_response_v2.get("recommendation", {})
262
-
263
  # Extract adversarial v2 probability from security->adversarial
264
  v2_probability = 0.0
265
  category_details_v2 = result_v2.get("category_details", {})
266
  print(f"[DEBUG] V2 category_details: {category_details_v2}")
267
-
268
  if "security" in category_details_v2:
269
  v2_probability = category_details_v2["security"].get("adversarial", 0.0)
270
  print(f"[DEBUG] V2 probability extracted: {v2_probability}")
271
-
272
  # Add to categories as adversarial_v2
273
  if "security" not in user_input_moderation_result.categories:
274
  user_input_moderation_result.categories["security"] = {}
275
- user_input_moderation_result.categories["security"]["adversarial_v2"] = v2_probability
276
-
 
 
277
  # Check if v2 flagged content
278
  v2_flagged = v2_probability > adversarial_threshold_2
279
  if v2_flagged or recommendation_v2.get("action") == "block":
@@ -308,8 +312,14 @@ async def bot_response_fn(
308
  unguarded_text += token
309
  unguarded_history[-1][1] = unguarded_text
310
  guarded_history[-1][1] = unguarded_text
311
- yield unguarded_history, guarded_history, (
312
- moderation_html if moderate_user_input else "<p>Moderating response...</p>"
 
 
 
 
 
 
313
  )
314
 
315
  # Step 3: Moderate bot response
@@ -317,12 +327,12 @@ async def bot_response_fn(
317
  if moderate_user_input:
318
  messages_for_moderation.append({"role": "user", "content": user_message})
319
  messages_for_moderation.append({"role": "assistant", "content": unguarded_text})
320
-
321
  # Call v1 moderation if adversarial v1 is enabled
322
  is_blocked = False
323
  categories = {}
324
  flagged = False
325
-
326
  if security_enabled and adversarial_checkbox:
327
  moderation_api_response = await chatbot.moderate_chat_history(
328
  messages_for_moderation, detection_config
@@ -339,33 +349,31 @@ async def bot_response_fn(
339
  # Build detection config for v2 - only adversarial
340
  detection_config_v2 = {
341
  "safety": False,
342
- "security": {
343
- "adversarial": adversarial_threshold_2
344
- }
345
  }
346
-
347
  moderation_api_response_v2 = await chatbot.moderate_chat_history(
348
  messages_for_moderation, detection_config_v2, model_version="20251105"
349
  )
350
  print(f"[DEBUG] V2 Bot Response: {moderation_api_response_v2}")
351
-
352
  result_v2 = moderation_api_response_v2.get("result", {})
353
  recommendation_v2 = moderation_api_response_v2.get("recommendation", {})
354
-
355
  # Extract adversarial v2 probability from security->adversarial
356
  v2_probability = 0.0
357
  category_details_v2 = result_v2.get("category_details", {})
358
  print(f"[DEBUG] V2 category_details: {category_details_v2}")
359
-
360
  if "security" in category_details_v2:
361
  v2_probability = category_details_v2["security"].get("adversarial", 0.0)
362
  print(f"[DEBUG] V2 probability extracted: {v2_probability}")
363
-
364
  # Add to categories as adversarial_v2
365
  if "security" not in categories:
366
  categories["security"] = {}
367
  categories["security"]["adversarial_v2"] = v2_probability
368
-
369
  # Check if v2 flagged content
370
  v2_flagged = v2_probability > adversarial_threshold_2
371
  if v2_flagged or recommendation_v2.get("action") == "block":
@@ -585,7 +593,12 @@ def create_demo() -> gr.Blocks:
585
  )
586
 
587
  # Master toggle sync
588
- safety_checkboxes = [hate_checkbox, sexual_checkbox, violence_checkbox, wrongdoing_checkbox]
 
 
 
 
 
589
  security_checkboxes = [adversarial_checkbox, adversarial_checkbox_2]
590
 
591
  def sync_subs_to_master(*subs):
@@ -608,7 +621,9 @@ def create_demo() -> gr.Blocks:
608
  sync_master_to_subs, [safety_enabled, *safety_checkboxes], safety_checkboxes
609
  )
610
  security_enabled.change(
611
- sync_master_to_subs, [security_enabled, *security_checkboxes], security_checkboxes
 
 
612
  )
613
 
614
  return demo
 
50
  api_key=os.environ.get("SGLANG_API_KEY"),
51
  )
52
  self.alinia_client = httpx.AsyncClient(
53
+ base_url=os.environ.get("ALINIA_API_URL", "https://staging.api.alinia.ai"),
54
  headers={"Authorization": f"Bearer {os.environ.get('ALINIA_API_KEY', '')}"},
55
  )
56
 
 
133
  elif isinstance(value, float):
134
  score_pct = f"{value * 100:.2f}%"
135
  color = "red" if value > 0.7 else "orange" if value > 0.3 else "green"
136
+ html += (
137
+ f'<li>{key_str}: <span style="color: {color};">{score_pct}</span></li>'
138
+ )
139
  elif isinstance(value, bool):
140
  html += f"<li>{key_str}: {value}</li>"
141
  else:
 
222
  unguarded_history[-1][1] = "▌"
223
 
224
  messages_for_moderation = [{"role": "user", "content": user_message}]
225
+
226
  # Call v1 moderation if adversarial v1 is enabled
227
  if security_enabled and adversarial_checkbox:
228
  moderation_api_response = await chatbot.moderate_chat_history(
 
230
  )
231
  result = moderation_api_response.get("result", {})
232
  recommendation = moderation_api_response.get("recommendation", {})
233
+ user_input_blocked = (
234
+ recommendation.get("action") == "block" and block_content
235
+ )
236
 
237
  user_input_moderation_result = ModerationResultData(
238
  flagged=result.get("flagged", False),
 
251
  # Build detection config for v2 - only adversarial
252
  detection_config_v2 = {
253
  "safety": False,
254
+ "security": {"adversarial": adversarial_threshold_2},
 
 
255
  }
256
+
257
  moderation_api_response_v2 = await chatbot.moderate_chat_history(
258
  messages_for_moderation, detection_config_v2, model_version="20251105"
259
  )
260
  print(f"[DEBUG] V2 User Input Response: {moderation_api_response_v2}")
261
+
262
  result_v2 = moderation_api_response_v2.get("result", {})
263
  recommendation_v2 = moderation_api_response_v2.get("recommendation", {})
264
+
265
  # Extract adversarial v2 probability from security->adversarial
266
  v2_probability = 0.0
267
  category_details_v2 = result_v2.get("category_details", {})
268
  print(f"[DEBUG] V2 category_details: {category_details_v2}")
269
+
270
  if "security" in category_details_v2:
271
  v2_probability = category_details_v2["security"].get("adversarial", 0.0)
272
  print(f"[DEBUG] V2 probability extracted: {v2_probability}")
273
+
274
  # Add to categories as adversarial_v2
275
  if "security" not in user_input_moderation_result.categories:
276
  user_input_moderation_result.categories["security"] = {}
277
+ user_input_moderation_result.categories["security"]["adversarial_v2"] = (
278
+ v2_probability
279
+ )
280
+
281
  # Check if v2 flagged content
282
  v2_flagged = v2_probability > adversarial_threshold_2
283
  if v2_flagged or recommendation_v2.get("action") == "block":
 
312
  unguarded_text += token
313
  unguarded_history[-1][1] = unguarded_text
314
  guarded_history[-1][1] = unguarded_text
315
+ yield (
316
+ unguarded_history,
317
+ guarded_history,
318
+ (
319
+ moderation_html
320
+ if moderate_user_input
321
+ else "<p>Moderating response...</p>"
322
+ ),
323
  )
324
 
325
  # Step 3: Moderate bot response
 
327
  if moderate_user_input:
328
  messages_for_moderation.append({"role": "user", "content": user_message})
329
  messages_for_moderation.append({"role": "assistant", "content": unguarded_text})
330
+
331
  # Call v1 moderation if adversarial v1 is enabled
332
  is_blocked = False
333
  categories = {}
334
  flagged = False
335
+
336
  if security_enabled and adversarial_checkbox:
337
  moderation_api_response = await chatbot.moderate_chat_history(
338
  messages_for_moderation, detection_config
 
349
  # Build detection config for v2 - only adversarial
350
  detection_config_v2 = {
351
  "safety": False,
352
+ "security": {"adversarial": adversarial_threshold_2},
 
 
353
  }
354
+
355
  moderation_api_response_v2 = await chatbot.moderate_chat_history(
356
  messages_for_moderation, detection_config_v2, model_version="20251105"
357
  )
358
  print(f"[DEBUG] V2 Bot Response: {moderation_api_response_v2}")
359
+
360
  result_v2 = moderation_api_response_v2.get("result", {})
361
  recommendation_v2 = moderation_api_response_v2.get("recommendation", {})
362
+
363
  # Extract adversarial v2 probability from security->adversarial
364
  v2_probability = 0.0
365
  category_details_v2 = result_v2.get("category_details", {})
366
  print(f"[DEBUG] V2 category_details: {category_details_v2}")
367
+
368
  if "security" in category_details_v2:
369
  v2_probability = category_details_v2["security"].get("adversarial", 0.0)
370
  print(f"[DEBUG] V2 probability extracted: {v2_probability}")
371
+
372
  # Add to categories as adversarial_v2
373
  if "security" not in categories:
374
  categories["security"] = {}
375
  categories["security"]["adversarial_v2"] = v2_probability
376
+
377
  # Check if v2 flagged content
378
  v2_flagged = v2_probability > adversarial_threshold_2
379
  if v2_flagged or recommendation_v2.get("action") == "block":
 
593
  )
594
 
595
  # Master toggle sync
596
+ safety_checkboxes = [
597
+ hate_checkbox,
598
+ sexual_checkbox,
599
+ violence_checkbox,
600
+ wrongdoing_checkbox,
601
+ ]
602
  security_checkboxes = [adversarial_checkbox, adversarial_checkbox_2]
603
 
604
  def sync_subs_to_master(*subs):
 
621
  sync_master_to_subs, [safety_enabled, *safety_checkboxes], safety_checkboxes
622
  )
623
  security_enabled.change(
624
+ sync_master_to_subs,
625
+ [security_enabled, *security_checkboxes],
626
+ security_checkboxes,
627
  )
628
 
629
  return demo