throgletworld commited on
Commit
3e58827
·
verified ·
1 Parent(s): 39dbbe7

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -3
app.py CHANGED
@@ -141,7 +141,27 @@ def analyze_audio(audio_path, threshold, progress=gr.Progress()):
141
  timeline_rows.append({"time": time_str, "detected": detected or ["Fluent"], "probs": probs})
142
 
143
  progress(0.75, desc="Transcribing ...")
144
- transcription = whisper_model.transcribe(audio_path).get("text", "").strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
  progress(0.90, desc="Building report ...")
147
  total_stutters = sum(counts.values())
@@ -180,9 +200,9 @@ def analyze_audio(audio_path, threshold, progress=gr.Progress()):
180
 
181
  summary_md = "\n".join(summary_lines)
182
 
183
- tl_lines = ["| Time | Detected |", "|------|----------|"]
184
  for row in timeline_rows:
185
- tl_lines.append(f"| {row['time']} | {', '.join(row['detected'])} |")
186
  timeline_md = "\n".join(tl_lines)
187
 
188
  recs = ["## Recommendations\n"]
 
141
  timeline_rows.append({"time": time_str, "detected": detected or ["Fluent"], "probs": probs})
142
 
143
  progress(0.75, desc="Transcribing ...")
144
+ whisper_result = whisper_model.transcribe(audio_path, word_timestamps=True)
145
+ transcription = whisper_result.get("text", "").strip()
146
+
147
+ # Extract word-level timestamps from Whisper
148
+ word_timestamps_list = []
149
+ for seg in whisper_result.get("segments", []):
150
+ for w in seg.get("words", []):
151
+ word_timestamps_list.append({
152
+ "word": w["word"].strip(),
153
+ "start": w["start"],
154
+ "end": w["end"],
155
+ })
156
+
157
+ # Map words to each chunk's time range
158
+ for row in timeline_rows:
159
+ t_start, t_end = [float(x) for x in row["time"].replace("s", "").split("-")]
160
+ chunk_words = [
161
+ w["word"] for w in word_timestamps_list
162
+ if w["start"] >= t_start - 0.15 and w["end"] <= t_end + 0.15
163
+ ]
164
+ row["words"] = " ".join(chunk_words) if chunk_words else "—"
165
 
166
  progress(0.90, desc="Building report ...")
167
  total_stutters = sum(counts.values())
 
200
 
201
  summary_md = "\n".join(summary_lines)
202
 
203
+ tl_lines = ["| Time | Detected | Words Spoken |", "|------|----------|--------------|"]
204
  for row in timeline_rows:
205
+ tl_lines.append(f"| {row['time']} | {', '.join(row['detected'])} | {row.get('words', '—')} |")
206
  timeline_md = "\n".join(tl_lines)
207
 
208
  recs = ["## Recommendations\n"]