Added

2026-02-05 10:15:09 +03:00
parent 2427fce842
commit 67241a5ed0
33 changed files with 13147 additions and 154 deletions
--- a/scheduler_bots/combine_thesis_html.py
+++ b/scheduler_bots/combine_thesis_html.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python
+"""
+combine_thesis_html.py - Combines all HTML files in the Thesis materials directory 
+into the main thesis document
+"""
+
+import os
+import re
+from bs4 import BeautifulSoup
+
+
+def combine_html_files():
+    # Directory containing the HTML files
+    thesis_dir = "/Users/home/YandexDisk/TECHNOLYCEUM/ict/Year/2025/ai/ai7/ai7-m3/scheduler_bots/Thesis materials"
+    
+    # Main file to append content to
+    main_file = "Thesis_ Intelligent School Schedule Management System.html"
+    main_file_path = os.path.join(thesis_dir, main_file)
+    
+    # Get all HTML files in the directory
+    html_files = [f for f in os.listdir(thesis_dir) if f.endswith('.html')]
+    
+    print(f"Found {len(html_files)} HTML files:")
+    for i, f in enumerate(html_files, 1):
+        print(f"  {i}. {f}")
+    
+    # Read the main file content
+    with open(main_file_path, 'r', encoding='utf-8') as f:
+        main_content = f.read()
+    
+    # Parse the main file with BeautifulSoup
+    soup_main = BeautifulSoup(main_content, 'html.parser')
+    
+    # Find the body element in the main file
+    main_body = soup_main.find('body')
+    if not main_body:
+        # If no body tag, create one
+        main_body = soup_main.new_tag('body')
+        soup_main.html.insert(0, main_body) if soup_main.html else soup_main.insert(0, main_body)
+    
+    # Add a separator before adding new content
+    separator = soup_main.new_tag('hr')
+    separator['style'] = 'margin: 40px 0; border: 2px solid #4a6fa5;'
+    main_body.append(separator)
+    
+    # Add a heading for the appended content
+    appendix_heading = soup_main.new_tag('h2')
+    appendix_heading.string = 'Additional Thesis Materials'
+    appendix_heading['style'] = 'color: #2c3e50; margin-top: 40px; border-bottom: 2px solid #4a6fa5; padding-bottom: 10px;'
+    main_body.append(appendix_heading)
+    
+    # Process each additional HTML file
+    for filename in html_files:
+        if filename == main_file:  # Skip the main file
+            continue
+        
+        print(f"Processing {filename}...")
+        
+        file_path = os.path.join(thesis_dir, filename)
+        
+        # Read the additional file content
+        with open(file_path, 'r', encoding='utf-8') as f:
+            additional_content = f.read()
+        
+        # Parse the additional file
+        soup_additional = BeautifulSoup(additional_content, 'html.parser')
+        
+        # Create a section for this file
+        section_div = soup_main.new_tag('div')
+        section_div['class'] = 'additional-section'
+        section_div['style'] = 'margin: 30px 0; padding: 20px; border: 1px solid #ddd; border-radius: 8px; background-color: #fafafa;'
+        
+        # Add a heading for this section
+        section_heading = soup_main.new_tag('h3')
+        section_heading.string = f'Content from: {filename}'
+        section_heading['style'] = 'color: #4a6fa5; margin-top: 0;'
+        section_div.append(section_heading)
+        
+        # Get body content from the additional file
+        additional_body = soup_additional.find('body')
+        if additional_body:
+            # Copy child elements from the additional body to our section
+            for child in additional_body.children:
+                if child.name:  # Only copy actual elements, not text nodes
+                    section_div.append(child.extract())
+        else:
+            # If no body tag, add the whole content
+            section_div.append(soup_additional)
+        
+        # Append the section to the main body
+        main_body.append(section_div)
+    
+    # Write the combined content back to the main file
+    with open(main_file_path, 'w', encoding='utf-8') as f:
+        f.write(str(soup_main.prettify()))
+    
+    print(f"All HTML files have been combined into {main_file}")
+    print(f"Combined file saved at: {main_file_path}")
+
+
+if __name__ == "__main__":
+    # Check if BeautifulSoup is available
+    try:
+        import bs4
+        combine_html_files()
+    except ImportError:
+        print("BeautifulSoup4 library is required for this script.")
+        print("Install it with: pip install beautifulsoup4")
+        
+        # Create a simple version without BeautifulSoup
+        print("Creating a basic combination without BeautifulSoup...")
+        
+        thesis_dir = "/Users/home/YandexDisk/TECHNOLYCEUM/ict/Year/2025/ai/ai7/ai7-m3/scheduler_bots/Thesis materials"
+        main_file = "Thesis_ Intelligent School Schedule Management System.html"
+        main_file_path = os.path.join(thesis_dir, main_file)
+        
+        # Get all HTML files in the directory
+        html_files = [f for f in os.listdir(thesis_dir) if f.endswith('.html')]
+        
+        # Read the main file content
+        with open(main_file_path, 'r', encoding='utf-8') as f:
+            main_content = f.read()
+        
+        # Find the closing body tag to insert additional content
+        body_close_pos = main_content.rfind('</body>')
+        if body_close_pos == -1:
+            # If no closing body tag, find the closing html tag
+            html_close_pos = main_content.rfind('</html>')
+            if html_close_pos != -1:
+                insert_pos = html_close_pos
+            else:
+                # If no closing html tag, append at the end
+                insert_pos = len(main_content)
+        else:
+            insert_pos = body_close_pos
+        
+        # Prepare the additional content
+        additional_content = '\n\n<!-- Additional Thesis Materials -->\n<hr style="margin: 40px 0; border: 2px solid #4a6fa5;">\n<h2 style="color: #2c3e50; margin-top: 40px; border-bottom: 2px solid #4a6fa5; padding-bottom: 10px;">Additional Thesis Materials</h2>\n\n'
+        
+        # Process each additional HTML file
+        for filename in html_files:
+            if filename == main_file:  # Skip the main file
+                continue
+            
+            print(f"Processing {filename}...")
+            
+            file_path = os.path.join(thesis_dir, filename)
+            
+            # Read the additional file content
+            with open(file_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+            
+            # Remove HTML and HEAD sections to only get body content
+            # Remove doctype
+            content = re.sub(r'<!DOCTYPE[^>]*>', '', content, flags=re.IGNORECASE)
+            
+            # Remove html tags
+            content = re.sub(r'<html[^>]*>|</html>', '', content, flags=re.IGNORECASE)
+            
+            # Remove head section
+            content = re.sub(r'<head[^>]*>.*?</head>', '', content, flags=re.DOTALL | re.IGNORECASE)
+            
+            # Remove opening and closing body tags
+            content = re.sub(r'<body[^>]*>|</body>', '', content, flags=re.IGNORECASE)
+            
+            # Add section wrapper
+            section_content = f'\n<div class="additional-section" style="margin: 30px 0; padding: 20px; border: 1px solid #ddd; border-radius: 8px; background-color: #fafafa;">\n'
+            section_content += f'<h3 style="color: #4a6fa5; margin-top: 0;">Content from: {filename}</h3>\n'
+            section_content += content
+            section_content += '\n</div>\n'
+            
+            additional_content += section_content
+        
+        # Insert the additional content
+        combined_content = main_content[:insert_pos] + additional_content + main_content[insert_pos:]
+        
+        # Write the combined content back to the main file
+        with open(main_file_path, 'w', encoding='utf-8') as f:
+            f.write(combined_content)
+        
+        print(f"All HTML files have been combined into {main_file}")
+        print(f"Combined file saved at: {main_file_path}")