This commit is contained in:
2026-02-05 10:15:09 +03:00
parent 2427fce842
commit 67241a5ed0
33 changed files with 13147 additions and 154 deletions

View File

@@ -0,0 +1,182 @@
#!/usr/bin/env python
"""
combine_thesis_html.py - Combines all HTML files in the Thesis materials directory
into the main thesis document
"""
import os
import re
from bs4 import BeautifulSoup
def combine_html_files():
# Directory containing the HTML files
thesis_dir = "/Users/home/YandexDisk/TECHNOLYCEUM/ict/Year/2025/ai/ai7/ai7-m3/scheduler_bots/Thesis materials"
# Main file to append content to
main_file = "Thesis_ Intelligent School Schedule Management System.html"
main_file_path = os.path.join(thesis_dir, main_file)
# Get all HTML files in the directory
html_files = [f for f in os.listdir(thesis_dir) if f.endswith('.html')]
print(f"Found {len(html_files)} HTML files:")
for i, f in enumerate(html_files, 1):
print(f" {i}. {f}")
# Read the main file content
with open(main_file_path, 'r', encoding='utf-8') as f:
main_content = f.read()
# Parse the main file with BeautifulSoup
soup_main = BeautifulSoup(main_content, 'html.parser')
# Find the body element in the main file
main_body = soup_main.find('body')
if not main_body:
# If no body tag, create one
main_body = soup_main.new_tag('body')
soup_main.html.insert(0, main_body) if soup_main.html else soup_main.insert(0, main_body)
# Add a separator before adding new content
separator = soup_main.new_tag('hr')
separator['style'] = 'margin: 40px 0; border: 2px solid #4a6fa5;'
main_body.append(separator)
# Add a heading for the appended content
appendix_heading = soup_main.new_tag('h2')
appendix_heading.string = 'Additional Thesis Materials'
appendix_heading['style'] = 'color: #2c3e50; margin-top: 40px; border-bottom: 2px solid #4a6fa5; padding-bottom: 10px;'
main_body.append(appendix_heading)
# Process each additional HTML file
for filename in html_files:
if filename == main_file: # Skip the main file
continue
print(f"Processing {filename}...")
file_path = os.path.join(thesis_dir, filename)
# Read the additional file content
with open(file_path, 'r', encoding='utf-8') as f:
additional_content = f.read()
# Parse the additional file
soup_additional = BeautifulSoup(additional_content, 'html.parser')
# Create a section for this file
section_div = soup_main.new_tag('div')
section_div['class'] = 'additional-section'
section_div['style'] = 'margin: 30px 0; padding: 20px; border: 1px solid #ddd; border-radius: 8px; background-color: #fafafa;'
# Add a heading for this section
section_heading = soup_main.new_tag('h3')
section_heading.string = f'Content from: {filename}'
section_heading['style'] = 'color: #4a6fa5; margin-top: 0;'
section_div.append(section_heading)
# Get body content from the additional file
additional_body = soup_additional.find('body')
if additional_body:
# Copy child elements from the additional body to our section
for child in additional_body.children:
if child.name: # Only copy actual elements, not text nodes
section_div.append(child.extract())
else:
# If no body tag, add the whole content
section_div.append(soup_additional)
# Append the section to the main body
main_body.append(section_div)
# Write the combined content back to the main file
with open(main_file_path, 'w', encoding='utf-8') as f:
f.write(str(soup_main.prettify()))
print(f"All HTML files have been combined into {main_file}")
print(f"Combined file saved at: {main_file_path}")
if __name__ == "__main__":
# Check if BeautifulSoup is available
try:
import bs4
combine_html_files()
except ImportError:
print("BeautifulSoup4 library is required for this script.")
print("Install it with: pip install beautifulsoup4")
# Create a simple version without BeautifulSoup
print("Creating a basic combination without BeautifulSoup...")
thesis_dir = "/Users/home/YandexDisk/TECHNOLYCEUM/ict/Year/2025/ai/ai7/ai7-m3/scheduler_bots/Thesis materials"
main_file = "Thesis_ Intelligent School Schedule Management System.html"
main_file_path = os.path.join(thesis_dir, main_file)
# Get all HTML files in the directory
html_files = [f for f in os.listdir(thesis_dir) if f.endswith('.html')]
# Read the main file content
with open(main_file_path, 'r', encoding='utf-8') as f:
main_content = f.read()
# Find the closing body tag to insert additional content
body_close_pos = main_content.rfind('</body>')
if body_close_pos == -1:
# If no closing body tag, find the closing html tag
html_close_pos = main_content.rfind('</html>')
if html_close_pos != -1:
insert_pos = html_close_pos
else:
# If no closing html tag, append at the end
insert_pos = len(main_content)
else:
insert_pos = body_close_pos
# Prepare the additional content
additional_content = '\n\n<!-- Additional Thesis Materials -->\n<hr style="margin: 40px 0; border: 2px solid #4a6fa5;">\n<h2 style="color: #2c3e50; margin-top: 40px; border-bottom: 2px solid #4a6fa5; padding-bottom: 10px;">Additional Thesis Materials</h2>\n\n'
# Process each additional HTML file
for filename in html_files:
if filename == main_file: # Skip the main file
continue
print(f"Processing {filename}...")
file_path = os.path.join(thesis_dir, filename)
# Read the additional file content
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Remove HTML and HEAD sections to only get body content
# Remove doctype
content = re.sub(r'<!DOCTYPE[^>]*>', '', content, flags=re.IGNORECASE)
# Remove html tags
content = re.sub(r'<html[^>]*>|</html>', '', content, flags=re.IGNORECASE)
# Remove head section
content = re.sub(r'<head[^>]*>.*?</head>', '', content, flags=re.DOTALL | re.IGNORECASE)
# Remove opening and closing body tags
content = re.sub(r'<body[^>]*>|</body>', '', content, flags=re.IGNORECASE)
# Add section wrapper
section_content = f'\n<div class="additional-section" style="margin: 30px 0; padding: 20px; border: 1px solid #ddd; border-radius: 8px; background-color: #fafafa;">\n'
section_content += f'<h3 style="color: #4a6fa5; margin-top: 0;">Content from: {filename}</h3>\n'
section_content += content
section_content += '\n</div>\n'
additional_content += section_content
# Insert the additional content
combined_content = main_content[:insert_pos] + additional_content + main_content[insert_pos:]
# Write the combined content back to the main file
with open(main_file_path, 'w', encoding='utf-8') as f:
f.write(combined_content)
print(f"All HTML files have been combined into {main_file}")
print(f"Combined file saved at: {main_file_path}")