Added

2026-02-05 10:15:09 +03:00
parent 2427fce842
commit 67241a5ed0
33 changed files with 13147 additions and 154 deletions
--- a/scheduler_bots/consolidate_theses.py
+++ b/scheduler_bots/consolidate_theses.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python
+"""
+consolidate_theses.py - Consolidates all HTML thesis files into a single HTML file
+with clear separation between different documents
+"""
+
+import os
+import re
+
+
+def consolidate_html_theses():
+    # Define the parent directory containing HTML files
+    parent_dir = "/Users/home/YandexDisk/TECHNOLYCEUM/ict/Year/2025/ai/ai7/ai7-m3"
+    
+    # List of HTML thesis files to consolidate
+    html_files = [
+        "Lesson_ SQLite Database Implementation.html",
+        "Presentaion_School Schedule Assistant Bot _ Student Project.html",
+        "Professional_Thesis_Scheduler_Bot.html",
+        "Scheduler Bot_ Telegram & CSV Database.html",
+        "Student Database Search System _ Beginner's Guide.html",
+        "Thesis_ Intelligent School Schedule Management System_23_Jan_2026.html",
+        "Thesis_AI7_Building_A_ Scheduler_Bot_A Student Project.html"
+    ]
+    
+    # Output file
+    output_file = "consolidated_theses.html"
+    
+    # Start building the consolidated HTML
+    consolidated_html = """<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Consolidated Thesis Documents</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            margin: 20px;
+            background-color: #f9f9f9;
+            line-height: 1.6;
+        }
+        .document-separator {
+            page-break-before: always;
+            border-top: 3px solid #333;
+            margin: 30px 0;
+        }
+        .document-header {
+            background-color: #e9ecef;
+            padding: 15px;
+            border-radius: 5px;
+            margin-bottom: 20px;
+            border-left: 4px solid #007bff;
+        }
+        .document-title {
+            color: #2c3e50;
+            font-size: 24px;
+            margin: 0;
+        }
+        .document-source {
+            color: #6c757d;
+            font-size: 14px;
+            margin-top: 5px;
+        }
+        .document-content {
+            background-color: white;
+            padding: 20px;
+            border-radius: 5px;
+            box-shadow: 0 2px 5px rgba(0,0,0,0.1);
+            margin-bottom: 30px;
+        }
+        .toc {
+            background-color: #f8f9fa;
+            padding: 20px;
+            border-radius: 5px;
+            margin-bottom: 30px;
+            border-left: 4px solid #28a745;
+        }
+        .toc h2 {
+            color: #2c3e50;
+            margin-top: 0;
+        }
+        .toc ul {
+            list-style-type: decimal;
+            padding-left: 20px;
+        }
+        .toc li {
+            margin-bottom: 8px;
+        }
+        .toc a {
+            text-decoration: none;
+            color: #007bff;
+        }
+        .toc a:hover {
+            text-decoration: underline;
+        }
+        h1 {
+            color: #343a40;
+            border-bottom: 2px solid #007bff;
+            padding-bottom: 10px;
+        }
+        .footer {
+            text-align: center;
+            margin-top: 30px;
+            padding: 15px;
+            color: #6c757d;
+            font-size: 12px;
+            border-top: 1px solid #dee2e6;
+        }
+    </style>
+</head>
+<body>
+    <h1>Consolidated Thesis Collection</h1>
+    <div class="toc">
+        <h2>Table of Contents</h2>
+        <ul>
+"""
+    
+    # Add links to each document in the TOC
+    for i, filename in enumerate(html_files, 1):
+        doc_title = os.path.splitext(filename)[0].replace('_', ' ')
+        consolidated_html += f'            <li><a href="#doc-{i}">{i}. {doc_title}</a></li>\n'
+    
+    # Close TOC section
+    consolidated_html += """        </ul>
+    </div>
+"""
+    
+    # Process each HTML file
+    for i, filename in enumerate(html_files, 1):
+        filepath = os.path.join(parent_dir, filename)
+        
+        if not os.path.exists(filepath):
+            print(f"File not found: {filename}")
+            continue
+        
+        print(f"Processing {filename}...")
+        
+        # Add document separator and header
+        doc_title = os.path.splitext(filename)[0].replace('_', ' ')
+        consolidated_html += f"""    <div class="document-separator" id="doc-{i}"></div>
+    <div class="document-header">
+        <h2 class="document-title">{i}. {doc_title}</h2>
+        <div class="document-source">Source file: {filename}</div>
+    </div>
+    <div class="document-content">
+"""
+        
+        # Read the HTML file and extract content
+        try:
+            with open(filepath, 'r', encoding='utf-8') as f:
+                content = f.read()
+                
+            # Remove HTML and HEAD tags, keeping only BODY content
+            # First remove the DOCTYPE declaration if present
+            content = re.sub(r'<!DOCTYPE[^>]*>', '', content, flags=re.IGNORECASE)
+            
+            # Remove HTML tags and everything outside the body
+            body_start = content.find('<body')
+            if body_start != -1:
+                body_start = content.find('>', body_start) + 1
+                body_end = content.rfind('</body>')
+                if body_end != -1:
+                    content = content[body_start:body_end]
+            
+            # If no body tags found, try to remove head section
+            if body_start == -1 or body_end == -1:
+                head_match = re.search(r'<head[^>]*>.*?</head>', content, re.DOTALL | re.IGNORECASE)
+                if head_match:
+                    content = content.replace(head_match.group(0), '')
+                
+                # Remove html tags if present
+                content = re.sub(r'<html[^>]*>|</html>', '', content, flags=re.IGNORECASE)
+            
+            # Add the content to the consolidated HTML
+            consolidated_html += content
+            
+        except Exception as e:
+            print(f"Error processing {filename}: {str(e)}")
+            consolidated_html += f"<p><em>Error reading this document: {str(e)}</em></p>"
+        
+        # Close the document content div
+        consolidated_html += """    </div>
+"""
+    
+    # Add footer
+    consolidated_html += """    <div class="footer">
+        <p>Consolidated from multiple thesis documents | Generated automatically</p>
+    </div>
+</body>
+</html>"""
+    
+    # Write the consolidated HTML to file
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write(consolidated_html)
+    
+    print(f"Consolidated HTML thesis document created: {output_file}")
+    print(f"Included {len([f for f in html_files if os.path.exists(os.path.join(parent_dir, f))])} documents in the consolidated file")
+
+
+if __name__ == "__main__":
+    consolidate_html_theses()