936 lines
34 KiB
HTML
936 lines
34 KiB
HTML
<!DOCTYPE html>
|
||
<!-- saved from url=(0070)file:///Users/home/Downloads/deepseek_html_20251204_5e1c1b.html#slide1 -->
|
||
<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<title>PDF to CSV Challenge: School Schedule Transformation</title>
|
||
<link rel="stylesheet" href="./PDF to CSV Challenge_ School Schedule Transformation_files/all.min.css">
|
||
<style>
|
||
* {
|
||
margin: 0;
|
||
padding: 0;
|
||
box-sizing: border-box;
|
||
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
||
}
|
||
|
||
body {
|
||
background-color: #f5f7fa;
|
||
color: #333;
|
||
line-height: 1.6;
|
||
overflow-x: hidden;
|
||
}
|
||
|
||
.slides-container {
|
||
width: 100%;
|
||
height: 100vh;
|
||
overflow-y: auto;
|
||
scroll-behavior: smooth;
|
||
scroll-snap-type: y mandatory;
|
||
}
|
||
|
||
.slide {
|
||
min-height: 100vh;
|
||
padding: 60px 40px;
|
||
display: flex;
|
||
flex-direction: column;
|
||
justify-content: center;
|
||
align-items: center;
|
||
scroll-snap-align: start;
|
||
max-width: 900px;
|
||
margin: 0 auto;
|
||
}
|
||
|
||
.slide-content {
|
||
width: 100%;
|
||
background-color: white;
|
||
border-radius: 15px;
|
||
padding: 40px;
|
||
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.08);
|
||
}
|
||
|
||
.slide-number {
|
||
position: absolute;
|
||
top: 20px;
|
||
right: 20px;
|
||
background-color: #3498db;
|
||
color: white;
|
||
width: 40px;
|
||
height: 40px;
|
||
border-radius: 50%;
|
||
display: flex;
|
||
align-items: center;
|
||
justify-content: center;
|
||
font-weight: bold;
|
||
}
|
||
|
||
.lesson-tag {
|
||
position: absolute;
|
||
top: 20px;
|
||
left: 20px;
|
||
background-color: #9b59b6;
|
||
color: white;
|
||
padding: 6px 15px;
|
||
border-radius: 20px;
|
||
font-size: 0.9rem;
|
||
font-weight: bold;
|
||
}
|
||
|
||
h1 {
|
||
font-size: 2.5rem;
|
||
color: #2c3e50;
|
||
margin-bottom: 20px;
|
||
text-align: center;
|
||
}
|
||
|
||
h2 {
|
||
font-size: 2rem;
|
||
color: #2c3e50;
|
||
margin-bottom: 20px;
|
||
border-bottom: 3px solid #3498db;
|
||
padding-bottom: 10px;
|
||
}
|
||
|
||
h3 {
|
||
font-size: 1.5rem;
|
||
color: #2c3e50;
|
||
margin-bottom: 15px;
|
||
}
|
||
|
||
.icon-large {
|
||
font-size: 3rem;
|
||
color: #3498db;
|
||
margin-bottom: 20px;
|
||
text-align: center;
|
||
display: block;
|
||
}
|
||
|
||
.intro-text {
|
||
font-size: 1.2rem;
|
||
text-align: center;
|
||
margin-bottom: 30px;
|
||
color: #555;
|
||
}
|
||
|
||
.download-buttons {
|
||
display: flex;
|
||
flex-wrap: wrap;
|
||
justify-content: center;
|
||
gap: 20px;
|
||
margin: 30px 0;
|
||
}
|
||
|
||
.download-btn {
|
||
display: flex;
|
||
flex-direction: column;
|
||
align-items: center;
|
||
justify-content: center;
|
||
width: 160px;
|
||
padding: 20px 15px;
|
||
background-color: #f8f9fa;
|
||
border-radius: 10px;
|
||
text-decoration: none;
|
||
color: #2c3e50;
|
||
transition: all 0.3s ease;
|
||
border: 2px solid #e0e0e0;
|
||
}
|
||
|
||
.download-btn:hover {
|
||
transform: translateY(-5px);
|
||
box-shadow: 0 8px 20px rgba(0, 0, 0, 0.1);
|
||
border-color: #3498db;
|
||
}
|
||
|
||
.download-btn i {
|
||
font-size: 2rem;
|
||
margin-bottom: 10px;
|
||
}
|
||
|
||
.btn-pdf i {
|
||
color: #e74c3c;
|
||
}
|
||
|
||
.btn-csv i {
|
||
color: #27ae60;
|
||
}
|
||
|
||
.btn-instructions i {
|
||
color: #9b59b6;
|
||
}
|
||
|
||
.time-indicator {
|
||
display: inline-block;
|
||
background-color: #f39c12;
|
||
color: white;
|
||
padding: 8px 16px;
|
||
border-radius: 20px;
|
||
font-weight: bold;
|
||
margin-bottom: 20px;
|
||
text-align: center;
|
||
}
|
||
|
||
.lesson-card {
|
||
background-color: #f8f9fa;
|
||
border-radius: 10px;
|
||
padding: 25px;
|
||
margin-bottom: 25px;
|
||
border-left: 5px solid #3498db;
|
||
}
|
||
|
||
.lesson-card.lesson2 {
|
||
border-left-color: #9b59b6;
|
||
}
|
||
|
||
.timeline {
|
||
margin-left: 20px;
|
||
margin-top: 15px;
|
||
}
|
||
|
||
.timeline li {
|
||
margin-bottom: 10px;
|
||
position: relative;
|
||
padding-left: 25px;
|
||
}
|
||
|
||
.timeline li:before {
|
||
content: "→";
|
||
position: absolute;
|
||
left: 0;
|
||
color: #3498db;
|
||
font-weight: bold;
|
||
}
|
||
|
||
.data-preview {
|
||
background-color: #2c3e50;
|
||
color: #ecf0f1;
|
||
padding: 20px;
|
||
border-radius: 8px;
|
||
overflow-x: auto;
|
||
margin-top: 15px;
|
||
font-family: 'Courier New', monospace;
|
||
font-size: 0.9rem;
|
||
max-height: 300px;
|
||
overflow-y: auto;
|
||
}
|
||
|
||
.tip-box {
|
||
background-color: #e1f5fe;
|
||
border-radius: 8px;
|
||
padding: 20px;
|
||
margin: 25px 0;
|
||
border-left: 5px solid #03a9f4;
|
||
}
|
||
|
||
.tip-box i {
|
||
color: #03a9f4;
|
||
margin-right: 10px;
|
||
}
|
||
|
||
.challenge-button {
|
||
display: block;
|
||
width: 200px;
|
||
margin: 30px auto;
|
||
background: linear-gradient(to right, #3498db, #2c3e50);
|
||
color: white;
|
||
border: none;
|
||
padding: 15px 30px;
|
||
border-radius: 50px;
|
||
font-size: 1.1rem;
|
||
font-weight: bold;
|
||
cursor: pointer;
|
||
transition: all 0.3s ease;
|
||
box-shadow: 0 4px 10px rgba(52, 152, 219, 0.3);
|
||
}
|
||
|
||
.challenge-button:hover {
|
||
transform: translateY(-3px);
|
||
box-shadow: 0 6px 15px rgba(52, 152, 219, 0.4);
|
||
}
|
||
|
||
.tool-list {
|
||
display: flex;
|
||
flex-wrap: wrap;
|
||
gap: 15px;
|
||
margin-top: 20px;
|
||
}
|
||
|
||
.tool-item {
|
||
background-color: #f0f7ff;
|
||
padding: 15px;
|
||
border-radius: 8px;
|
||
flex: 1;
|
||
min-width: 200px;
|
||
text-align: center;
|
||
}
|
||
|
||
.tool-item i {
|
||
color: #3498db;
|
||
font-size: 1.5rem;
|
||
margin-bottom: 10px;
|
||
}
|
||
|
||
.step-list {
|
||
margin-top: 20px;
|
||
}
|
||
|
||
.step-item {
|
||
display: flex;
|
||
align-items: flex-start;
|
||
margin-bottom: 25px;
|
||
}
|
||
|
||
.step-number {
|
||
background-color: #3498db;
|
||
color: white;
|
||
width: 36px;
|
||
height: 36px;
|
||
border-radius: 50%;
|
||
display: flex;
|
||
align-items: center;
|
||
justify-content: center;
|
||
font-weight: bold;
|
||
margin-right: 15px;
|
||
flex-shrink: 0;
|
||
}
|
||
|
||
.navigation {
|
||
position: fixed;
|
||
bottom: 30px;
|
||
right: 30px;
|
||
display: flex;
|
||
gap: 10px;
|
||
z-index: 100;
|
||
}
|
||
|
||
.nav-btn {
|
||
width: 50px;
|
||
height: 50px;
|
||
border-radius: 50%;
|
||
background-color: #3498db;
|
||
color: white;
|
||
border: none;
|
||
font-size: 1.2rem;
|
||
cursor: pointer;
|
||
display: flex;
|
||
align-items: center;
|
||
justify-content: center;
|
||
box-shadow: 0 4px 10px rgba(0, 0, 0, 0.2);
|
||
transition: all 0.3s ease;
|
||
}
|
||
|
||
.nav-btn:hover {
|
||
background-color: #2980b9;
|
||
transform: scale(1.1);
|
||
}
|
||
|
||
.question-list {
|
||
margin-left: 20px;
|
||
margin-top: 15px;
|
||
}
|
||
|
||
.question-list li {
|
||
margin-bottom: 10px;
|
||
padding-left: 10px;
|
||
}
|
||
|
||
@media (max-width: 768px) {
|
||
.slide {
|
||
padding: 40px 20px;
|
||
}
|
||
|
||
.slide-content {
|
||
padding: 25px;
|
||
}
|
||
|
||
h1 {
|
||
font-size: 2rem;
|
||
}
|
||
|
||
h2 {
|
||
font-size: 1.7rem;
|
||
}
|
||
|
||
.download-btn {
|
||
width: 140px;
|
||
padding: 15px 10px;
|
||
}
|
||
}
|
||
</style>
|
||
</head>
|
||
<body>
|
||
<div class="slides-container" id="slidesContainer">
|
||
<!-- Slide 1: Title -->
|
||
<div class="slide" id="slide1">
|
||
<div class="slide-content">
|
||
<div class="lesson-tag">2 Lessons | 80 minutes</div>
|
||
<div class="slide-number">1</div>
|
||
<i class="fas fa-exchange-alt icon-large"></i>
|
||
<h1>Data Transformation Challenge</h1>
|
||
<p class="intro-text">Convert a school schedule from PDF format to structured CSV data</p>
|
||
|
||
<div class="time-indicator">
|
||
<i class="fas fa-clock"></i> Two 40-minute lessons
|
||
</div>
|
||
|
||
<div class="tip-box">
|
||
<p><i class="fas fa-info-circle"></i> <strong>Real-world skill:</strong> PDF data extraction is a common task in data analysis, administrative work, and automation projects.</p>
|
||
</div>
|
||
|
||
<div class="download-buttons">
|
||
<a href="Schedule.pdf" class="download-btn btn-pdf" id="downloadPdf">
|
||
<i class="fas fa-file-pdf"></i>
|
||
<span>Schedule.pdf</span>
|
||
<small>Input file</small>
|
||
</a>
|
||
|
||
<a href="file:///Users/home/Downloads/deepseek_html_20251204_5e1c1b.html#" class="download-btn btn-csv" id="downloadCsv">
|
||
<i class="fas fa-file-csv"></i>
|
||
<span>Template.csv</span>
|
||
<small>Target format</small>
|
||
</a>
|
||
</div>
|
||
|
||
<p style="text-align: center; margin-top: 20px;">
|
||
Download both files before starting the challenge
|
||
</p>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- Slide 2: Lesson Plan -->
|
||
<div class="slide" id="slide2">
|
||
<div class="slide-content">
|
||
<div class="lesson-tag">Lesson Plan</div>
|
||
<div class="slide-number">2</div>
|
||
<h2><i class="fas fa-chalkboard-teacher"></i> Two-Lesson Structure</h2>
|
||
|
||
<div class="lesson-card">
|
||
<h3><i class="fas fa-search"></i> Lesson 1: Analysis & Extraction</h3>
|
||
<p><strong>Focus:</strong> Understanding the data and planning the extraction</p>
|
||
<ul class="timeline">
|
||
<li><strong>10 min</strong> - Introduction to PDF data extraction</li>
|
||
<li><strong>15 min</strong> - Analyze Schedule.pdf structure</li>
|
||
<li><strong>10 min</strong> - Choose tools and methods</li>
|
||
<li><strong>5 min</strong> - Begin data extraction</li>
|
||
</ul>
|
||
</div>
|
||
|
||
<div class="lesson-card lesson2">
|
||
<h3><i class="fas fa-laptop-code"></i> Lesson 2: Transformation & Validation</h3>
|
||
<p><strong>Focus:</strong> Cleaning data and creating the final CSV</p>
|
||
<ul class="timeline">
|
||
<li><strong>10 min</strong> - Review and clean extracted data</li>
|
||
<li><strong>15 min</strong> - Transform to CSV format</li>
|
||
<li><strong>10 min</strong> - Validate against template</li>
|
||
<li><strong>5 min</strong> - Discussion and reflection</li>
|
||
</ul>
|
||
</div>
|
||
|
||
<div class="tip-box">
|
||
<p><i class="fas fa-lightbulb"></i> <strong>Pro tip:</strong> Take notes during Lesson 1 about the PDF structure. This will save time in Lesson 2.</p>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- Slide 3: The Challenge -->
|
||
<div class="slide" id="slide3">
|
||
<div class="slide-content">
|
||
<div class="lesson-tag">Challenge Overview</div>
|
||
<div class="slide-number">3</div>
|
||
<h2><i class="fas fa-bullseye"></i> The Challenge</h2>
|
||
|
||
<h3>Your Mission:</h3>
|
||
<p>Transform unstructured schedule data from a PDF into a structured CSV file.</p>
|
||
|
||
<div class="step-list">
|
||
<div class="step-item">
|
||
<div class="step-number">1</div>
|
||
<div>
|
||
<h4>Extract</h4>
|
||
<p>Get data out of the PDF file using Python libraries or tools</p>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="step-item">
|
||
<div class="step-number">2</div>
|
||
<div>
|
||
<h4>Clean</h4>
|
||
<p>Organize the messy, unstructured text into logical groups</p>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="step-item">
|
||
<div class="step-number">3</div>
|
||
<div>
|
||
<h4>Transform</h4>
|
||
<p>Convert the data to match the CSV template format</p>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="step-item">
|
||
<div class="step-number">4</div>
|
||
<div>
|
||
<h4>Validate</h4>
|
||
<p>Check that your CSV matches the expected structure</p>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<button class="challenge-button" id="startChallenge">
|
||
<i class="fas fa-play-circle"></i> Begin Challenge
|
||
</button>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- Slide 4: Input - Schedule.pdf -->
|
||
<div class="slide" id="slide4">
|
||
<div class="slide-content">
|
||
<div class="lesson-tag">Input File</div>
|
||
<div class="slide-number">4</div>
|
||
<h2><i class="fas fa-file-pdf"></i> Input: Schedule.pdf</h2>
|
||
<p>This PDF contains unstructured school schedule data with:</p>
|
||
|
||
<ul style="margin-left: 20px; margin-top: 15px;">
|
||
<li>Days of week in Russian (Пн, Вт, Ср, Чт, Пт)</li>
|
||
<li>Time slots (1-13 with specific times)</li>
|
||
<li>Class information (subject, class, room)</li>
|
||
<li>Teacher name at the bottom</li>
|
||
</ul>
|
||
|
||
<h3 style="margin-top: 25px;">PDF Content Preview:</h3>
|
||
<div class="data-preview">
|
||
01.09.2025
|
||
aSc Расписание
|
||
6A/6B ICT B24 Ict1
|
||
2А/2В/2С Maths B24 E5
|
||
7C/7D ICT B24 Ict1
|
||
...
|
||
Пн Вт Ср Чт Пт
|
||
1 9:00 - 9:40
|
||
2 10:00 - 10:40
|
||
...
|
||
Учитель Bob Santos
|
||
</div>
|
||
|
||
<div class="tip-box">
|
||
<p><i class="fas fa-exclamation-triangle"></i> <strong>Challenge:</strong> The data is unstructured - you'll need to find patterns to extract it correctly.</p>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- Slide 5: Output - CSV Template -->
|
||
<div class="slide" id="slide5">
|
||
<div class="slide-content">
|
||
<div class="lesson-tag">Output File</div>
|
||
<div class="slide-number">5</div>
|
||
<h2><i class="fas fa-file-csv"></i> Output: Template.csv</h2>
|
||
<p>Your goal is to create a CSV file matching this structure:</p>
|
||
|
||
<ul style="margin-left: 20px; margin-top: 15px; margin-bottom: 20px;">
|
||
<li>First row: Column headers (Day, time slots)</li>
|
||
<li>Each row: A day of the week (Monday-Friday)</li>
|
||
<li>Cells: Class information or empty if no class</li>
|
||
<li>Multi-line cells for detailed class info</li>
|
||
</ul>
|
||
|
||
<h3>CSV Structure Preview:</h3>
|
||
<div class="data-preview">
|
||
Day,1 (9:00-9:40),2 (10:00-10:40),3 (11:00-11:40)...
|
||
Monday,,"Subject: Maths Class: 2А/2В/2С E5
|
||
Room: B24",,,"Subject: ICT Class: 6A/6B Room: B24"...
|
||
Tuesday,"Subject: Технотрек Class: 7A/7B/7C/7D/7E Room: B24, B02"...
|
||
</div>
|
||
|
||
<div class="tip-box">
|
||
<p><i class="fas fa-code"></i> <strong>Note:</strong> Notice how class information is formatted as "Subject: ... Class: ... Room: ..."</p>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- Slide 6: Tools & Libraries -->
|
||
<div class="slide" id="slide6">
|
||
<div class="slide-content">
|
||
<div class="lesson-tag">Tools</div>
|
||
<div class="slide-number">6</div>
|
||
<h2><i class="fas fa-tools"></i> Recommended Tools</h2>
|
||
<p>Choose from these options for the data extraction:</p>
|
||
|
||
<div class="tool-list">
|
||
<div class="tool-item">
|
||
<i class="fab fa-python"></i>
|
||
<h4>PyPDF2</h4>
|
||
<p>Basic PDF text extraction</p>
|
||
</div>
|
||
|
||
<div class="tool-item">
|
||
<i class="fas fa-file-pdf"></i>
|
||
<h4>pdfplumber</h4>
|
||
<p>Advanced table extraction</p>
|
||
</div>
|
||
|
||
<div class="tool-item">
|
||
<i class="fas fa-table"></i>
|
||
<h4>tabula-py</h4>
|
||
<p>Extract tables from PDF</p>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="tool-list">
|
||
<div class="tool-item">
|
||
<i class="fas fa-database"></i>
|
||
<h4>pandas</h4>
|
||
<p>Data cleaning & CSV export</p>
|
||
</div>
|
||
|
||
<div class="tool-item">
|
||
<i class="fas fa-mouse-pointer"></i>
|
||
<h4>Tabula (GUI)</h4>
|
||
<p>Visual table extraction tool</p>
|
||
</div>
|
||
|
||
<div class="tool-item">
|
||
<i class="fas fa-check-circle"></i>
|
||
<h4>Manual</h4>
|
||
<p>Copy-paste & clean in spreadsheet</p>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="tip-box">
|
||
<p><i class="fas fa-lightbulb"></i> <strong>Suggestion:</strong> Start with pdfplumber for Python or Tabula GUI if you're new to PDF extraction.</p>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- Slide 7: Key Considerations -->
|
||
<div class="slide" id="slide7">
|
||
<div class="slide-content">
|
||
<div class="lesson-tag">Tips</div>
|
||
<div class="slide-number">7</div>
|
||
<h2><i class="fas fa-key"></i> Key Considerations</h2>
|
||
|
||
<h3>Important Details to Notice:</h3>
|
||
<div class="step-list">
|
||
<div class="step-item">
|
||
<div class="step-number">1</div>
|
||
<div>
|
||
<h4>Russian to English</h4>
|
||
<p>Convert Пн, Вт, Ср, Чт, Пт to Monday, Tuesday, Wednesday, Thursday, Friday</p>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="step-item">
|
||
<div class="step-number">2</div>
|
||
<div>
|
||
<h4>Time Slots</h4>
|
||
<p>Match class information to the correct time slots (1-13 with specific times)</p>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="step-item">
|
||
<div class="step-number">3</div>
|
||
<div>
|
||
<h4>Formatting</h4>
|
||
<p>Follow the exact format: "Subject: ... Class: ... Room: ..." in CSV cells</p>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="step-item">
|
||
<div class="step-number">4</div>
|
||
<div>
|
||
<h4>Empty Cells</h4>
|
||
<p>Leave cells empty for time slots with no classes</p>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<h3 style="margin-top: 30px;">Common Challenges:</h3>
|
||
<ul style="margin-left: 20px; margin-top: 15px;">
|
||
<li>Handling multi-room assignments (e.g., "B24,B02")</li>
|
||
<li>Dealing with split classes (e.g., "6A/6B")</li>
|
||
<li>Identifying which classes belong to which time slots</li>
|
||
<li>Managing multi-line cells in the CSV</li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- Slide 8: Discussion Questions -->
|
||
<div class="slide" id="slide8">
|
||
<div class="slide-content">
|
||
<div class="lesson-tag">Reflection</div>
|
||
<div class="slide-number">8</div>
|
||
<h2><i class="fas fa-question-circle"></i> Discussion Questions</h2>
|
||
|
||
<p>After completing the challenge, consider these questions:</p>
|
||
|
||
<div class="question-list">
|
||
<li>What was the most challenging part of extracting data from the PDF?</li>
|
||
<li>How did you handle the Russian day abbreviations?</li>
|
||
<li>What pattern recognition strategies worked best?</li>
|
||
<li>How would you validate that all data was extracted correctly?</li>
|
||
<li>If the PDF format changed next semester, how could you make your solution more flexible?</li>
|
||
<li>What real-world applications can you think of for PDF data extraction skills?</li>
|
||
</div>
|
||
|
||
<div class="tip-box" style="margin-top: 30px;">
|
||
<p><i class="fas fa-graduation-cap"></i> <strong>Learning outcome:</strong> This challenge develops problem-solving, pattern recognition, and data transformation skills applicable to many real-world scenarios.</p>
|
||
</div>
|
||
|
||
<div class="download-buttons" style="margin-top: 30px;">
|
||
<a href="file:///Users/home/Downloads/deepseek_html_20251204_5e1c1b.html#" class="download-btn btn-instructions" id="downloadInstructions">
|
||
<i class="fas fa-file-alt"></i>
|
||
<span>Instructions</span>
|
||
<small>Detailed guide</small>
|
||
</a>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- Navigation buttons -->
|
||
<div class="navigation">
|
||
<button class="nav-btn" id="prevBtn" style="display: none;">
|
||
<i class="fas fa-chevron-up"></i>
|
||
</button>
|
||
<button class="nav-btn" id="nextBtn" style="display: flex;">
|
||
<i class="fas fa-chevron-down"></i>
|
||
</button>
|
||
</div>
|
||
</div>
|
||
|
||
<script>
|
||
// File contents for download
|
||
const pdfContent = `01.09.2025
|
||
aSc Расписание
|
||
6A/6B ICT B24 Ict1
|
||
2А/2В/2С Maths B24 E5
|
||
7C/7D ICT B24 Ict1
|
||
7A/7B ICT B24 Ict1
|
||
10AВ C ICT (6) B24 Семинар
|
||
7A/7B/7C/7D/7E Технотрек B24,B02 AI Core Track
|
||
6A/6B/6C/6D Технотрек B24,B22 AI Core Track
|
||
11A ICT (7) B24 Семинар
|
||
8ABC D ICT (1) B24 Семинар
|
||
9ABC ICT (3) B24 Семинар
|
||
6C/6D ICT B24 Ict1
|
||
1А /1В/1C Maths B24 E6
|
||
1А /1В/1C Maths B24 E5
|
||
7A/7B/7C/7D/7E Технотрек B24,B22 AI Core Track
|
||
7A/7B/7C/7D/7E Технотрек B24,B29 AI Core Track
|
||
2А/2В/2С Maths B24 E6
|
||
10AВ C ICT (6) B24 Семинар
|
||
11A ICT (7) B24 Семинар
|
||
6A/6B/6C/6D Технотрек B24,B33 AI Core Track
|
||
6A/6B/6C/6D Технотрек B24,B33 AI Core Track
|
||
10AВ C ICT (6) B24 Семинар
|
||
11A ICT (7) B24 Семинар
|
||
Обед 1-4 Обед 5-7 Обед 8-11
|
||
Пн Вт Ср Чт Пт
|
||
1 9:00 - 9:40
|
||
2 10:00 - 10:40
|
||
3 11:00 - 11:40
|
||
4 11:50 - 12:30
|
||
5 12:40 - 13:20
|
||
Обед 1-4 12:40 - 13:20
|
||
6 13:30 - 14:10
|
||
Обед 5-7 13:30 - 14:10
|
||
7 14:20 - 15:00
|
||
Обед 8-11 14:20 - 15:00
|
||
8 15:20 - 16:00
|
||
9 16:15 - 16:55
|
||
10 17:05 - 17:45
|
||
11 17:55 - 18:35
|
||
12 18:45 - 19:20
|
||
13 19:20 - 20:00
|
||
Учитель Bob Santos`;
|
||
|
||
const csvContent = `Day,1 (9:00-9:40),2 (10:00-10:40),3 (11:00-11:40),4 (11:50-12:30),5 (12:40-13:20),6 (13:30-14:10),7 (14:20-15:00),8 (15:20-16:00),9 (16:15-16:55),10 (17:05-17:45),11 (17:55-18:35),12 (18:45-19:20),13 (19:20-20:00),,,,
|
||
Monday,,"Subject: Maths Class: 2А/2В/2С E5
|
||
Room: B24",,,"Subject: ICT Class: 6A/6B Room: B24",,"Subject: ICT Class: 7C/7D Room: B24","Subject: ICT Class: 10ABC Room: B24","Subject: ICT Class: 7A/7B Room: B24",,,,,,,,
|
||
Tuesday,"Subject: Технотрек Class: 7A/7B/7C/7D/7E Room: B24, B02","Subject: Технотрек Class: 6A/6B/6C/6D Room: B24",,,,,,"Subject: ICT Class: 9ABC Room: B24","Subject: ICT Class: 8ABC Room: B24","Subject: ICT Class: 11A Room: B24",,,,,,,
|
||
Wednesday,,,,"Subject: ICT Class: 6C/6D Room: B24",,"Subject: Maths Class: 1А/1В/1С E6
|
||
Room: B24",,,,,,,,,,,
|
||
Thursday,,"Subject: Технотрек Class: 7A/7B/7C/7D/7E Room: B24","Subject: Технотрек Class: 7A/7B/7C/7D/7E Room: B24","Subject: Maths Class: 1А/1В/1С E6
|
||
Room: B24",,,"Subject: Maths Class: 2А/2В/2С E6 Room: B24",,"Subject: ICT Class: 10ABC Room: B24","Subject: ICT Class: 11A Room: B24",,,,,,,
|
||
Friday,,,"Subject: Технотрек Class: 6A/6B/6C/6D Room: B24","Subject: Технотрек Class: 6A/6B/6C/6D Room: B24",,"Subject: ICT Class: 10ABC Room: B24",,,"Subject: ICT Class: 11A Room: B24",,,,,,,,`;
|
||
|
||
const instructionsContent = `DATA TRANSFORMATION CHALLENGE
|
||
Schedule PDF to CSV Conversion
|
||
|
||
CHALLENGE OBJECTIVE:
|
||
Transform the unstructured schedule data in Schedule.pdf into a structured CSV file that matches schedule_template.csv.
|
||
|
||
LESSON 1: ANALYSIS & EXTRACTION (40 minutes)
|
||
1. Download Schedule.pdf and schedule_template.csv
|
||
2. Open Schedule.pdf and examine its structure
|
||
3. Identify:
|
||
- Days of week (Пн, Вт, Ср, Чт, Пт = Monday-Friday)
|
||
- Time slots (1-13 with specific times)
|
||
- Class information patterns
|
||
4. Choose your extraction method:
|
||
- Python with PyPDF2 or pdfplumber
|
||
- Tabula GUI tool
|
||
- Manual copy-paste
|
||
5. Begin extracting data from the PDF
|
||
|
||
LESSON 2: TRANSFORMATION & VALIDATION (40 minutes)
|
||
1. Clean your extracted data
|
||
2. Organize into days and time slots
|
||
3. Format data to match CSV template:
|
||
- "Subject: [subject] Class: [class] Room: [room]"
|
||
- Multi-line cells where needed
|
||
- Empty cells for no classes
|
||
4. Convert Russian days to English
|
||
5. Validate against template.csv
|
||
6. Save your final CSV file
|
||
|
||
KEY FORMATTING REQUIREMENTS:
|
||
- First row: Headers (Day, time slots)
|
||
- Rows: Monday through Friday
|
||
- Cells: Either empty or formatted class info
|
||
- Multi-room: "Room: B24,B02"
|
||
- Multi-class: "Class: 6A/6B"
|
||
|
||
VALIDATION CHECKLIST:
|
||
✓ All 5 weekdays present
|
||
✓ All 13 time slots as columns
|
||
✓ Russian days converted to English
|
||
✓ Class info matches template format
|
||
✓ Empty cells for time slots with no classes
|
||
✓ Multi-line cells formatted correctly
|
||
|
||
TOOLS & RESOURCES:
|
||
- Python libraries: PyPDF2, pdfplumber, pandas
|
||
- GUI tool: Tabula (tabula.technology)
|
||
- Text editor or spreadsheet software
|
||
|
||
TIPS FOR SUCCESS:
|
||
1. Start by mapping the PDF structure on paper
|
||
2. Extract all text first, then organize
|
||
3. Test small sections before doing everything
|
||
4. Compare with template frequently
|
||
5. Ask for help if stuck on a pattern
|
||
|
||
Good luck with the challenge!`;
|
||
|
||
// Download functions
|
||
function downloadFile(filename, content, contentType) {
|
||
const blob = new Blob([content], { type: contentType });
|
||
const url = URL.createObjectURL(blob);
|
||
const a = document.createElement('a');
|
||
a.href = url;
|
||
a.download = filename;
|
||
document.body.appendChild(a);
|
||
a.click();
|
||
document.body.removeChild(a);
|
||
URL.revokeObjectURL(url);
|
||
}
|
||
|
||
// Set up download buttons
|
||
document.getElementById('downloadPdf').addEventListener('click', function(e) {
|
||
e.preventDefault();
|
||
downloadFile('Schedule.pdf', pdfContent, 'application/pdf');
|
||
});
|
||
|
||
document.getElementById('downloadCsv').addEventListener('click', function(e) {
|
||
e.preventDefault();
|
||
downloadFile('schedule_template.csv', csvContent, 'text/csv');
|
||
});
|
||
|
||
document.getElementById('downloadInstructions').addEventListener('click', function(e) {
|
||
e.preventDefault();
|
||
downloadFile('Challenge_Instructions.pdf', instructionsContent, 'application/pdf');
|
||
});
|
||
|
||
// Start challenge button
|
||
document.getElementById('startChallenge').addEventListener('click', function() {
|
||
alert('Challenge started! \n\n1. Download Schedule.pdf and schedule_template.csv\n2. Begin with Lesson 1: Analyze the PDF structure\n3. You have two 40-minute lessons to complete this\n\nGood luck!');
|
||
});
|
||
|
||
// Slide navigation
|
||
const slidesContainer = document.getElementById('slidesContainer');
|
||
const slides = document.querySelectorAll('.slide');
|
||
const prevBtn = document.getElementById('prevBtn');
|
||
const nextBtn = document.getElementById('nextBtn');
|
||
let currentSlide = 0;
|
||
|
||
function updateNavigation() {
|
||
prevBtn.style.display = currentSlide === 0 ? 'none' : 'flex';
|
||
nextBtn.style.display = currentSlide === slides.length - 1 ? 'none' : 'flex';
|
||
|
||
// Update URL hash for bookmarking
|
||
window.location.hash = `slide${currentSlide + 1}`;
|
||
}
|
||
|
||
function goToSlide(index) {
|
||
if (index >= 0 && index < slides.length) {
|
||
currentSlide = index;
|
||
slides[currentSlide].scrollIntoView({ behavior: 'smooth' });
|
||
updateNavigation();
|
||
}
|
||
}
|
||
|
||
function nextSlide() {
|
||
if (currentSlide < slides.length - 1) {
|
||
goToSlide(currentSlide + 1);
|
||
}
|
||
}
|
||
|
||
function prevSlide() {
|
||
if (currentSlide > 0) {
|
||
goToSlide(currentSlide - 1);
|
||
}
|
||
}
|
||
|
||
// Initialize navigation
|
||
updateNavigation();
|
||
|
||
// Event listeners for navigation buttons
|
||
prevBtn.addEventListener('click', prevSlide);
|
||
nextBtn.addEventListener('click', nextSlide);
|
||
|
||
// Keyboard navigation
|
||
document.addEventListener('keydown', function(e) {
|
||
if (e.key === 'ArrowDown' || e.key === 'PageDown' || e.key === ' ') {
|
||
e.preventDefault();
|
||
nextSlide();
|
||
} else if (e.key === 'ArrowUp' || e.key === 'PageUp') {
|
||
e.preventDefault();
|
||
prevSlide();
|
||
} else if (e.key >= '1' && e.key <= '8') {
|
||
e.preventDefault();
|
||
goToSlide(parseInt(e.key) - 1);
|
||
}
|
||
});
|
||
|
||
// Handle hash on page load
|
||
window.addEventListener('load', function() {
|
||
const hash = window.location.hash;
|
||
if (hash) {
|
||
const slideNum = parseInt(hash.replace('#slide', ''));
|
||
if (!isNaN(slideNum) && slideNum >= 1 && slideNum <= slides.length) {
|
||
goToSlide(slideNum - 1);
|
||
}
|
||
}
|
||
});
|
||
|
||
// Scroll detection for updating current slide
|
||
let scrollTimeout;
|
||
slidesContainer.addEventListener('scroll', function() {
|
||
clearTimeout(scrollTimeout);
|
||
scrollTimeout = setTimeout(() => {
|
||
const slideTops = Array.from(slides).map(slide => {
|
||
const rect = slide.getBoundingClientRect();
|
||
return rect.top;
|
||
});
|
||
|
||
const closest = slideTops.reduce((prev, curr, index) => {
|
||
return Math.abs(curr) < Math.abs(prev) ? curr : prev;
|
||
}, slideTops[0]);
|
||
|
||
const closestIndex = slideTops.indexOf(closest);
|
||
if (closestIndex !== -1 && Math.abs(closest) < window.innerHeight / 2) {
|
||
currentSlide = closestIndex;
|
||
updateNavigation();
|
||
}
|
||
}, 100);
|
||
});
|
||
</script>
|
||
|
||
</body></html> |