CyrineElghali commited on
Commit
49dfdf6
·
verified ·
1 Parent(s): a8bfaa0

Create pdfutils.py

Browse files
Files changed (1) hide show
  1. pdfutils.py +117 -0
pdfutils.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from reportlab.lib.pagesizes import A4, landscape
2
+ from reportlab.lib.units import inch
3
+ from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
4
+ from reportlab.lib import colors
5
+ from reportlab.lib.styles import getSampleStyleSheet
6
+ import tempfile
7
+ import streamlit as st
8
+ import json
9
+
10
+
11
+ def process_markdown(markdown_text):
12
+ result = []
13
+ lines = markdown_text.strip().splitlines()
14
+ syllabus_title = None # Add syllabus_title
15
+ title = None
16
+ table_data = []
17
+ headers = []
18
+
19
+ for line in lines:
20
+ line = line.strip()
21
+
22
+ if line.startswith("# ") and not syllabus_title: # Syllabus title (only once)
23
+ syllabus_title = line.lstrip("# ").strip()
24
+
25
+ elif "Module: " in line: # Section title
26
+ if title:
27
+ result.append({"syllabus_title": syllabus_title, "title": title, "headers": headers, "table": table_data})
28
+ title = line.lstrip("## ").strip()
29
+ headers = []
30
+ table_data = []
31
+
32
+ elif "|" in line and not headers:
33
+ headers = [col.strip() for col in line.split("|")]
34
+ headers = [h for h in headers if h] # Remove empty headers
35
+
36
+ elif line.startswith("|---"): # Skip separator line
37
+ continue
38
+
39
+ elif "|" in line and headers:
40
+ columns = [col.strip() for col in line.split("|")]
41
+ columns = [c for c in columns if c] # Remove empty columns
42
+ table_data.append(columns)
43
+
44
+ if title:
45
+ result.append({"syllabus_title": syllabus_title, "title": title, "headers": headers, "table": table_data})
46
+
47
+ return json.dumps(result, indent=4)
48
+
49
+ def create_pdf(json_string):
50
+ """
51
+ Generate a PDF from a JSON string containing titles and tables.
52
+ Args:
53
+ json_string (str): JSON string containing extracted markdown data.
54
+ Returns:
55
+ str: Filepath of the generated PDF.
56
+ """
57
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
58
+ filepath = temp_file.name
59
+
60
+ doc = SimpleDocTemplate(filepath, pagesize=landscape(A4), rightMargin=inch, leftMargin=inch,
61
+ topMargin=inch, bottomMargin=inch)
62
+ elements = []
63
+ styles = getSampleStyleSheet()
64
+
65
+ try:
66
+ data = json.loads(json_string)
67
+ except json.JSONDecodeError:
68
+ raise ValueError("Invalid JSON string provided.")
69
+
70
+ syllabus_title_added = False
71
+
72
+ for item in data:
73
+ if not syllabus_title_added:
74
+ syllabus_title = item.get("syllabus_title", "")
75
+ if syllabus_title:
76
+ elements.append(Paragraph(syllabus_title, styles['h1']))
77
+ elements.append(Spacer(1, 12))
78
+ elements.append(Spacer(1, 12))
79
+ syllabus_title_added = True
80
+
81
+ title = item.get("title", "")
82
+ headers = item.get("headers", [])
83
+ table_data = item.get("table", [])
84
+
85
+ if title:
86
+ elements.append(Paragraph(title, styles['h1']))
87
+ elements.append(Spacer(1, 12))
88
+
89
+ if table_data:
90
+ num_columns = len(headers) if headers else 1
91
+ available_width = landscape(A4)[0] - (doc.leftMargin + doc.rightMargin)
92
+ col_widths = [available_width / num_columns] * num_columns
93
+
94
+ # Add headers to table data
95
+ table_data.insert(0, headers)
96
+
97
+ wrapped_data = [[Paragraph(cell.replace('<br>', '<br/>'), styles['Normal']) for cell in row] for row in table_data]
98
+
99
+ table = Table(wrapped_data, colWidths=col_widths, repeatRows=1)
100
+ style = TableStyle([
101
+ ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
102
+ ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
103
+ ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
104
+ ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
105
+ ('GRID', (0, 0), (-1, -1), 1, colors.black),
106
+ ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
107
+ ('WORDWRAP', (0, 0), (-1, -1), 'ON')
108
+ ])
109
+ table.setStyle(style)
110
+ elements.append(table)
111
+ elements.append(Spacer(1, 12))
112
+
113
+ footer = Paragraph("Generated using the OBE Syllabus Maker created by the WVSU AI Dev Team (c) 2025.", styles['Normal'])
114
+ elements.append(footer)
115
+
116
+ doc.build(elements)
117
+ return filepath