Added
This commit is contained in:
182
scheduler_bots/combine_thesis_html.py
Normal file
182
scheduler_bots/combine_thesis_html.py
Normal file
@@ -0,0 +1,182 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
combine_thesis_html.py - Combines all HTML files in the Thesis materials directory
|
||||
into the main thesis document
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def combine_html_files():
|
||||
# Directory containing the HTML files
|
||||
thesis_dir = "/Users/home/YandexDisk/TECHNOLYCEUM/ict/Year/2025/ai/ai7/ai7-m3/scheduler_bots/Thesis materials"
|
||||
|
||||
# Main file to append content to
|
||||
main_file = "Thesis_ Intelligent School Schedule Management System.html"
|
||||
main_file_path = os.path.join(thesis_dir, main_file)
|
||||
|
||||
# Get all HTML files in the directory
|
||||
html_files = [f for f in os.listdir(thesis_dir) if f.endswith('.html')]
|
||||
|
||||
print(f"Found {len(html_files)} HTML files:")
|
||||
for i, f in enumerate(html_files, 1):
|
||||
print(f" {i}. {f}")
|
||||
|
||||
# Read the main file content
|
||||
with open(main_file_path, 'r', encoding='utf-8') as f:
|
||||
main_content = f.read()
|
||||
|
||||
# Parse the main file with BeautifulSoup
|
||||
soup_main = BeautifulSoup(main_content, 'html.parser')
|
||||
|
||||
# Find the body element in the main file
|
||||
main_body = soup_main.find('body')
|
||||
if not main_body:
|
||||
# If no body tag, create one
|
||||
main_body = soup_main.new_tag('body')
|
||||
soup_main.html.insert(0, main_body) if soup_main.html else soup_main.insert(0, main_body)
|
||||
|
||||
# Add a separator before adding new content
|
||||
separator = soup_main.new_tag('hr')
|
||||
separator['style'] = 'margin: 40px 0; border: 2px solid #4a6fa5;'
|
||||
main_body.append(separator)
|
||||
|
||||
# Add a heading for the appended content
|
||||
appendix_heading = soup_main.new_tag('h2')
|
||||
appendix_heading.string = 'Additional Thesis Materials'
|
||||
appendix_heading['style'] = 'color: #2c3e50; margin-top: 40px; border-bottom: 2px solid #4a6fa5; padding-bottom: 10px;'
|
||||
main_body.append(appendix_heading)
|
||||
|
||||
# Process each additional HTML file
|
||||
for filename in html_files:
|
||||
if filename == main_file: # Skip the main file
|
||||
continue
|
||||
|
||||
print(f"Processing {filename}...")
|
||||
|
||||
file_path = os.path.join(thesis_dir, filename)
|
||||
|
||||
# Read the additional file content
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
additional_content = f.read()
|
||||
|
||||
# Parse the additional file
|
||||
soup_additional = BeautifulSoup(additional_content, 'html.parser')
|
||||
|
||||
# Create a section for this file
|
||||
section_div = soup_main.new_tag('div')
|
||||
section_div['class'] = 'additional-section'
|
||||
section_div['style'] = 'margin: 30px 0; padding: 20px; border: 1px solid #ddd; border-radius: 8px; background-color: #fafafa;'
|
||||
|
||||
# Add a heading for this section
|
||||
section_heading = soup_main.new_tag('h3')
|
||||
section_heading.string = f'Content from: {filename}'
|
||||
section_heading['style'] = 'color: #4a6fa5; margin-top: 0;'
|
||||
section_div.append(section_heading)
|
||||
|
||||
# Get body content from the additional file
|
||||
additional_body = soup_additional.find('body')
|
||||
if additional_body:
|
||||
# Copy child elements from the additional body to our section
|
||||
for child in additional_body.children:
|
||||
if child.name: # Only copy actual elements, not text nodes
|
||||
section_div.append(child.extract())
|
||||
else:
|
||||
# If no body tag, add the whole content
|
||||
section_div.append(soup_additional)
|
||||
|
||||
# Append the section to the main body
|
||||
main_body.append(section_div)
|
||||
|
||||
# Write the combined content back to the main file
|
||||
with open(main_file_path, 'w', encoding='utf-8') as f:
|
||||
f.write(str(soup_main.prettify()))
|
||||
|
||||
print(f"All HTML files have been combined into {main_file}")
|
||||
print(f"Combined file saved at: {main_file_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check if BeautifulSoup is available
|
||||
try:
|
||||
import bs4
|
||||
combine_html_files()
|
||||
except ImportError:
|
||||
print("BeautifulSoup4 library is required for this script.")
|
||||
print("Install it with: pip install beautifulsoup4")
|
||||
|
||||
# Create a simple version without BeautifulSoup
|
||||
print("Creating a basic combination without BeautifulSoup...")
|
||||
|
||||
thesis_dir = "/Users/home/YandexDisk/TECHNOLYCEUM/ict/Year/2025/ai/ai7/ai7-m3/scheduler_bots/Thesis materials"
|
||||
main_file = "Thesis_ Intelligent School Schedule Management System.html"
|
||||
main_file_path = os.path.join(thesis_dir, main_file)
|
||||
|
||||
# Get all HTML files in the directory
|
||||
html_files = [f for f in os.listdir(thesis_dir) if f.endswith('.html')]
|
||||
|
||||
# Read the main file content
|
||||
with open(main_file_path, 'r', encoding='utf-8') as f:
|
||||
main_content = f.read()
|
||||
|
||||
# Find the closing body tag to insert additional content
|
||||
body_close_pos = main_content.rfind('</body>')
|
||||
if body_close_pos == -1:
|
||||
# If no closing body tag, find the closing html tag
|
||||
html_close_pos = main_content.rfind('</html>')
|
||||
if html_close_pos != -1:
|
||||
insert_pos = html_close_pos
|
||||
else:
|
||||
# If no closing html tag, append at the end
|
||||
insert_pos = len(main_content)
|
||||
else:
|
||||
insert_pos = body_close_pos
|
||||
|
||||
# Prepare the additional content
|
||||
additional_content = '\n\n<!-- Additional Thesis Materials -->\n<hr style="margin: 40px 0; border: 2px solid #4a6fa5;">\n<h2 style="color: #2c3e50; margin-top: 40px; border-bottom: 2px solid #4a6fa5; padding-bottom: 10px;">Additional Thesis Materials</h2>\n\n'
|
||||
|
||||
# Process each additional HTML file
|
||||
for filename in html_files:
|
||||
if filename == main_file: # Skip the main file
|
||||
continue
|
||||
|
||||
print(f"Processing {filename}...")
|
||||
|
||||
file_path = os.path.join(thesis_dir, filename)
|
||||
|
||||
# Read the additional file content
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Remove HTML and HEAD sections to only get body content
|
||||
# Remove doctype
|
||||
content = re.sub(r'<!DOCTYPE[^>]*>', '', content, flags=re.IGNORECASE)
|
||||
|
||||
# Remove html tags
|
||||
content = re.sub(r'<html[^>]*>|</html>', '', content, flags=re.IGNORECASE)
|
||||
|
||||
# Remove head section
|
||||
content = re.sub(r'<head[^>]*>.*?</head>', '', content, flags=re.DOTALL | re.IGNORECASE)
|
||||
|
||||
# Remove opening and closing body tags
|
||||
content = re.sub(r'<body[^>]*>|</body>', '', content, flags=re.IGNORECASE)
|
||||
|
||||
# Add section wrapper
|
||||
section_content = f'\n<div class="additional-section" style="margin: 30px 0; padding: 20px; border: 1px solid #ddd; border-radius: 8px; background-color: #fafafa;">\n'
|
||||
section_content += f'<h3 style="color: #4a6fa5; margin-top: 0;">Content from: {filename}</h3>\n'
|
||||
section_content += content
|
||||
section_content += '\n</div>\n'
|
||||
|
||||
additional_content += section_content
|
||||
|
||||
# Insert the additional content
|
||||
combined_content = main_content[:insert_pos] + additional_content + main_content[insert_pos:]
|
||||
|
||||
# Write the combined content back to the main file
|
||||
with open(main_file_path, 'w', encoding='utf-8') as f:
|
||||
f.write(combined_content)
|
||||
|
||||
print(f"All HTML files have been combined into {main_file}")
|
||||
print(f"Combined file saved at: {main_file_path}")
|
||||
Reference in New Issue
Block a user