Upload app.py
Browse files
app.py
CHANGED
|
@@ -141,7 +141,27 @@ def analyze_audio(audio_path, threshold, progress=gr.Progress()):
|
|
| 141 |
timeline_rows.append({"time": time_str, "detected": detected or ["Fluent"], "probs": probs})
|
| 142 |
|
| 143 |
progress(0.75, desc="Transcribing ...")
|
| 144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
progress(0.90, desc="Building report ...")
|
| 147 |
total_stutters = sum(counts.values())
|
|
@@ -180,9 +200,9 @@ def analyze_audio(audio_path, threshold, progress=gr.Progress()):
|
|
| 180 |
|
| 181 |
summary_md = "\n".join(summary_lines)
|
| 182 |
|
| 183 |
-
tl_lines = ["| Time | Detected |", "|------|----------|"]
|
| 184 |
for row in timeline_rows:
|
| 185 |
-
tl_lines.append(f"| {row['time']} | {', '.join(row['detected'])} |")
|
| 186 |
timeline_md = "\n".join(tl_lines)
|
| 187 |
|
| 188 |
recs = ["## Recommendations\n"]
|
|
|
|
| 141 |
timeline_rows.append({"time": time_str, "detected": detected or ["Fluent"], "probs": probs})
|
| 142 |
|
| 143 |
progress(0.75, desc="Transcribing ...")
|
| 144 |
+
whisper_result = whisper_model.transcribe(audio_path, word_timestamps=True)
|
| 145 |
+
transcription = whisper_result.get("text", "").strip()
|
| 146 |
+
|
| 147 |
+
# Extract word-level timestamps from Whisper
|
| 148 |
+
word_timestamps_list = []
|
| 149 |
+
for seg in whisper_result.get("segments", []):
|
| 150 |
+
for w in seg.get("words", []):
|
| 151 |
+
word_timestamps_list.append({
|
| 152 |
+
"word": w["word"].strip(),
|
| 153 |
+
"start": w["start"],
|
| 154 |
+
"end": w["end"],
|
| 155 |
+
})
|
| 156 |
+
|
| 157 |
+
# Map words to each chunk's time range
|
| 158 |
+
for row in timeline_rows:
|
| 159 |
+
t_start, t_end = [float(x) for x in row["time"].replace("s", "").split("-")]
|
| 160 |
+
chunk_words = [
|
| 161 |
+
w["word"] for w in word_timestamps_list
|
| 162 |
+
if w["start"] >= t_start - 0.15 and w["end"] <= t_end + 0.15
|
| 163 |
+
]
|
| 164 |
+
row["words"] = " ".join(chunk_words) if chunk_words else "—"
|
| 165 |
|
| 166 |
progress(0.90, desc="Building report ...")
|
| 167 |
total_stutters = sum(counts.values())
|
|
|
|
| 200 |
|
| 201 |
summary_md = "\n".join(summary_lines)
|
| 202 |
|
| 203 |
+
tl_lines = ["| Time | Detected | Words Spoken |", "|------|----------|--------------|"]
|
| 204 |
for row in timeline_rows:
|
| 205 |
+
tl_lines.append(f"| {row['time']} | {', '.join(row['detected'])} | {row.get('words', '—')} |")
|
| 206 |
timeline_md = "\n".join(tl_lines)
|
| 207 |
|
| 208 |
recs = ["## Recommendations\n"]
|