#!/usr/bin/env python
"""
consolidate_theses.py - Consolidates all HTML thesis files into a single HTML file
with clear separation between different documents
"""
import os
import re
def consolidate_html_theses():
# Define the parent directory containing HTML files
parent_dir = "/Users/home/YandexDisk/TECHNOLYCEUM/ict/Year/2025/ai/ai7/ai7-m3"
# List of HTML thesis files to consolidate
html_files = [
"Lesson_ SQLite Database Implementation.html",
"Presentaion_School Schedule Assistant Bot _ Student Project.html",
"Professional_Thesis_Scheduler_Bot.html",
"Scheduler Bot_ Telegram & CSV Database.html",
"Student Database Search System _ Beginner's Guide.html",
"Thesis_ Intelligent School Schedule Management System_23_Jan_2026.html",
"Thesis_AI7_Building_A_ Scheduler_Bot_A Student Project.html"
]
# Output file
output_file = "consolidated_theses.html"
# Start building the consolidated HTML
consolidated_html = """
"""
# Read the HTML file and extract content
try:
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# Remove HTML and HEAD tags, keeping only BODY content
# First remove the DOCTYPE declaration if present
content = re.sub(r']*>', '', content, flags=re.IGNORECASE)
# Remove HTML tags and everything outside the body
body_start = content.find('', body_start) + 1
body_end = content.rfind('')
if body_end != -1:
content = content[body_start:body_end]
# If no body tags found, try to remove head section
if body_start == -1 or body_end == -1:
head_match = re.search(r']*>.*?', content, re.DOTALL | re.IGNORECASE)
if head_match:
content = content.replace(head_match.group(0), '')
# Remove html tags if present
content = re.sub(r']*>|', '', content, flags=re.IGNORECASE)
# Add the content to the consolidated HTML
consolidated_html += content
except Exception as e:
print(f"Error processing {filename}: {str(e)}")
consolidated_html += f"
Error reading this document: {str(e)}
"
# Close the document content div
consolidated_html += """
"""
# Add footer
consolidated_html += """
"""
# Write the consolidated HTML to file
with open(output_file, 'w', encoding='utf-8') as f:
f.write(consolidated_html)
print(f"Consolidated HTML thesis document created: {output_file}")
print(f"Included {len([f for f in html_files if os.path.exists(os.path.join(parent_dir, f))])} documents in the consolidated file")
if __name__ == "__main__":
consolidate_html_theses()