From 258d837dadbc78e0fc3f2d2ff4b52545e7b55f64 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Wed, 21 May 2025 07:14:01 +0000
Subject: [PATCH] feat: Add initial pronunciation practice tool

This commit introduces a new web application designed to help you improve your English pronunciation.

Key features include:
-   **File Upload:** You can upload a TXT file containing English text and an MP3 audio file of your reading.
-   **Speech-to-Text (STT):** The backend uses Google Cloud Speech-to-Text (GCP STT) to transcribe your audio. (Requires GCP setup).
-   **Text Comparison:** The original text is compared with the transcribed text using difflib to identify discrepancies.
-   **Results Display:** The frontend shows the original text, the transcribed text, and a highlighted diff of the comparison.
-   **Issue Storage:** Identified pronunciation differences (problematic words/phrases) are stored in your session.
-   **Exercise Page:** A dedicated page (`/exercise`) displays these stored issues, providing targeted practice material.
-   **Backend & Frontend:** Implemented using Flask for the backend and vanilla JavaScript for the frontend.
-   **Unit Tests:** Basic unit tests for the Flask backend logic have been added, covering file handling, STT mocking, session management, and page rendering.

The tool is structured within the `pronunciation_practice_tool` directory, containing templates, static assets, upload handling, and tests.
---
 pronunciation_practice_tool/__init__.py       |   1 +
 pronunciation_practice_tool/app.py            | 147 ++++++++++++++++++
 pronunciation_practice_tool/requirements.txt  |   3 +
 pronunciation_practice_tool/static/app.js     | 112 +++++++++++++
 pronunciation_practice_tool/static/style.css  |  12 ++
 .../templates/exercise.html                   |  39 +++++
 .../templates/index.html                      |  27 ++++
 pronunciation_practice_tool/tests/__init__.py |   1 +
 pronunciation_practice_tool/tests/test_app.py | 123 +++++++++++++++
 pronunciation_practice_tool/uploads/.gitkeep  |   1 +
 10 files changed, 466 insertions(+)
 create mode 100644 pronunciation_practice_tool/__init__.py
 create mode 100644 pronunciation_practice_tool/app.py
 create mode 100644 pronunciation_practice_tool/requirements.txt
 create mode 100644 pronunciation_practice_tool/static/app.js
 create mode 100644 pronunciation_practice_tool/static/style.css
 create mode 100644 pronunciation_practice_tool/templates/exercise.html
 create mode 100644 pronunciation_practice_tool/templates/index.html
 create mode 100644 pronunciation_practice_tool/tests/__init__.py
 create mode 100644 pronunciation_practice_tool/tests/test_app.py
 create mode 100644 pronunciation_practice_tool/uploads/.gitkeep

diff --git a/pronunciation_practice_tool/__init__.py b/pronunciation_practice_tool/__init__.py
new file mode 100644
index 0000000..e17c3f6
--- /dev/null
+++ b/pronunciation_practice_tool/__init__.py
@@ -0,0 +1 @@
+# This file makes pronunciation_practice_tool a Python package.
diff --git a/pronunciation_practice_tool/app.py b/pronunciation_practice_tool/app.py
new file mode 100644
index 0000000..6cd1d8d
--- /dev/null
+++ b/pronunciation_practice_tool/app.py
@@ -0,0 +1,147 @@
+import os
+from flask import Flask, request, jsonify, render_template, session # Added session
+from werkzeug.utils import secure_filename
+from google.cloud import speech
+import difflib
+
+app = Flask(__name__, template_folder='templates', static_folder='static')
+app.secret_key = os.urandom(24) # Necessary for session management
+
+UPLOAD_FOLDER = 'uploads'
+ALLOWED_EXTENSIONS_TEXT = {'txt'}
+ALLOWED_EXTENSIONS_AUDIO = {'mp3'}
+
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+
+# Function to transcribe audio using Google Cloud Speech-to-Text
+def transcribe_audio_gcp(audio_file_path):
+    client = speech.SpeechClient()
+
+    with open(audio_file_path, 'rb') as audio_file:
+        content = audio_file.read()
+
+    audio = speech.RecognitionAudio(content=content)
+    config = speech.RecognitionConfig(
+        language_code='en-US',
+        enable_automatic_punctuation=True
+    )
+    
+    try:
+        response = client.recognize(config=config, audio=audio)
+        transcript = "".join(result.alternatives[0].transcript for result in response.results)
+        return transcript
+    except Exception as e:
+        print(f"Google Cloud Speech-to-Text Error: {e}")
+        if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"):
+            return "GCP STT Error: GOOGLE_APPLICATION_CREDENTIALS not set."
+        return f"GCP STT Error: {e}"
+
+
+def allowed_file(filename, allowed_extensions):
+    return '.' in filename and \
+           filename.rsplit('.', 1)[1].lower() in allowed_extensions
+
+@app.route('/')
+def index():
+    return render_template('index.html')
+
+@app.route('/upload', methods=['POST'])
+def upload_files():
+    if 'text-file' not in request.files or 'audio-file' not in request.files:
+        session['pronunciation_issues'] = [] # Clear/initialize session on error
+        return jsonify({'error': 'Missing file part(s)'}), 400
+
+    text_file = request.files['text-file']
+    audio_file = request.files['audio-file']
+
+    if text_file.filename == '' or audio_file.filename == '':
+        session['pronunciation_issues'] = [] # Clear/initialize session on error
+        return jsonify({'error': 'No selected file(s)'}), 400
+
+    text_filename = ''
+    audio_filename = ''
+    uploaded_text_content = ""
+    transcript = ""
+    diff_output_list = []
+    pronunciation_issues = [] 
+
+    # Always initialize session variable at the start of processing
+    session['pronunciation_issues'] = []
+
+    if text_file and allowed_file(text_file.filename, ALLOWED_EXTENSIONS_TEXT):
+        text_filename = secure_filename(text_file.filename)
+        text_filepath = os.path.join(app.config['UPLOAD_FOLDER'], text_filename)
+        text_file.save(text_filepath)
+        try:
+            with open(text_filepath, 'r', encoding='utf-8') as f:
+                uploaded_text_content = f.read()
+        except Exception as e:
+            print(f"Error reading text file: {e}")
+            # uploaded_text_content will remain empty
+    else:
+        session['pronunciation_issues'] = [] # Clear/initialize session on error
+        return jsonify({'error': 'Invalid text file type. Please upload a .txt file.'}), 400
+
+    if audio_file and allowed_file(audio_file.filename, ALLOWED_EXTENSIONS_AUDIO):
+        audio_filename = secure_filename(audio_file.filename)
+        audio_filepath = os.path.join(app.config['UPLOAD_FOLDER'], audio_filename)
+        audio_file.save(audio_filepath)
+
+        transcript = transcribe_audio_gcp(audio_filepath)
+        
+        if uploaded_text_content and transcript and not transcript.startswith("GCP STT Error:"):
+            original_lines = uploaded_text_content.splitlines()
+            transcript_lines = transcript.splitlines()
+            
+            d = difflib.Differ()
+            diff = list(d.compare(original_lines, transcript_lines))
+            diff_output_list = list(diff) # Keep full diff for frontend display
+
+            # Process diff to extract pronunciation issues for session
+            for line in diff:
+                if line.startswith('- ') or line.startswith('+ '):
+                    issue_text = line[2:].strip()
+                    if issue_text: 
+                        pronunciation_issues.append(issue_text)
+            
+            session['pronunciation_issues'] = pronunciation_issues
+
+        # If transcription failed or original text was empty, pronunciation_issues remains empty (as initialized)
+        # No specific else needed here as session['pronunciation_issues'] is already []
+
+    else:
+        # This block is for invalid audio file type
+        if text_filename and os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], text_filename)):
+            os.remove(os.path.join(app.config['UPLOAD_FOLDER'], text_filename))
+        # session['pronunciation_issues'] is already [] due to initialization at the start or after text file error
+        return jsonify({'error': 'Invalid audio file type. Please upload an .mp3 file.'}), 400
+       
+    # Final check: if, for any reason, effective processing didn't happen 
+    # (e.g. transcript error after successful file uploads but before diff)
+    # ensure issues are empty.
+    if not (uploaded_text_content and transcript and not transcript.startswith("GCP STT Error:")):
+         session['pronunciation_issues'] = []
+
+    return jsonify({
+        'message': 'Files processed successfully',
+        'text_file': text_filename,
+        'audio_file': audio_filename,
+        'text_content': uploaded_text_content,
+        'transcribed_text': transcript,
+        'differences': diff_output_list 
+    }), 200
+
+@app.route('/exercise')
+def exercise_page():
+    issues = session.get('pronunciation_issues', [])
+    # The prompt mentions:
+    # if not issues: # If issues list is empty or not found
+    #     # Maybe redirect to home or show a message that no issues were found for practice
+    #     # For now, we'll pass the empty list and let the template handle it.
+    #     pass
+    # This logic is handled by passing issues (which can be empty) to the template.
+    return render_template('exercise.html', issues=issues)
+
+if __name__ == '__main__':
+    app.run(debug=True)
diff --git a/pronunciation_practice_tool/requirements.txt b/pronunciation_practice_tool/requirements.txt
new file mode 100644
index 0000000..78058f1
--- /dev/null
+++ b/pronunciation_practice_tool/requirements.txt
@@ -0,0 +1,3 @@
+Flask>=2.0
+google-cloud-speech>=2.0
+werkzeug>=2.0
diff --git a/pronunciation_practice_tool/static/app.js b/pronunciation_practice_tool/static/app.js
new file mode 100644
index 0000000..7b96b87
--- /dev/null
+++ b/pronunciation_practice_tool/static/app.js
@@ -0,0 +1,112 @@
+document.getElementById('upload-form').addEventListener('submit', async function(event) {
+    event.preventDefault();
+    const textFile = document.getElementById('text-file').files[0];
+    const audioFile = document.getElementById('audio-file').files[0];
+    const resultsDiv = document.getElementById('results');
+    resultsDiv.innerHTML = ''; // Clear previous results
+
+    if (!textFile || !audioFile) {
+        alert('Please select both a TXT text file and an MP3 audio file.');
+        return;
+    }
+
+    const formData = new FormData();
+    formData.append('text-file', textFile);
+    formData.append('audio-file', audioFile);
+
+    try {
+        const response = await fetch('/upload', {
+            method: 'POST',
+            body: formData
+        });
+
+        const result = await response.json();
+
+        if (response.ok) {
+            let diffHtml = '';
+            if (result.differences && result.differences.length > 0) {
+                diffHtml = result.differences.map(line => {
+                    let style = '';
+                    let prefix = line.substring(0, 2);
+                    let displayLine = line.substring(2); // Remove prefix for display
+                    
+                    // Handle lines that might be shorter than 2 chars (e.g. empty lines in diff)
+                    // This check was slightly different from my previous version.
+                    if (line.length < 2) { 
+                        prefix = ''; 
+                        displayLine = line;
+                    }
+
+                    if (prefix === '+ ') {
+                        style = 'color: green; background-color: #e6ffe6; display: block; white-space: pre-wrap;';
+                    } else if (prefix === '- ') {
+                        style = 'color: red; background-color: #ffe6e6; display: block; white-space: pre-wrap;';
+                    } else if (prefix === '? ') {
+                        style = 'color: blue; background-color: #e6e6ff; display: block; white-space: pre-wrap;';
+                    } else {
+                         // Common lines (no prefix from difflib.Differ like '  ')
+                        style = 'display: block; white-space: pre-wrap;';
+                    }
+                    // Escape HTML to prevent XSS
+                    displayLine = displayLine.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
+                    // My previous version returned prefix + displayLine, this one returns displayLine only for styled lines, but the prompt's image shows prefix.
+                    // The prompt's textual JS example for map returns `<span style="${style}">${line}</span>` which is line *before* `substring(2)`.
+                    // Reconciling: The prompt's JS example for the map function is: return `<span style="${style}">${line}</span>`;
+                    // This implies 'line' *still contains* the prefix.
+                    // However, the text description says "line = line.substring(2); // Remove prefix for display"
+                    // Let's stick to the JS code block provided in the prompt, which uses the original `line` for the span content.
+                    // This means the prefix will be part of the span, and the color will apply to it too.
+                    // The prompt text description was: return `<span style="${style}">${line}</span>`; (where line was substringed)
+                    // The prompt's code block was: return `<span style="${style}">${line}</span>`; (where line was NOT substringed for the span content)
+                    // Let's use the version from the prompt's code block, which seems more complete.
+                    // The provided code in the prompt is:
+                    //    line = line.substring(2); // Remove prefix for display
+                    //    ...
+                    //    return `<span style="${style}">${line}</span>`;
+                    // This means `displayLine` should be used.
+                    return `<span style="${style}">${displayLine}</span>`; // Using displayLine (line without prefix)
+                }).join('<br>'); // Using <br> as per prompt's JS
+            } else if (result.transcribed_text && !result.transcribed_text.startsWith("GCP STT Error:")) {
+                // If there are no differences, it means the texts are identical or one is empty.
+                if (result.text_content === result.transcribed_text) {
+                    diffHtml = "<p>Texts are identical!</p>";
+                } else if (result.transcribed_text) { // Check if transcription was successful
+                    diffHtml = "<p>No significant differences found by difflib. Texts may be very similar or one might be a subset of the other with no conflicting lines.</p>";
+                } else {
+                     diffHtml = "<p>Comparison not performed (e.g., transcription failed or texts were empty).</p>";
+                }
+            } else {
+                diffHtml = "<p>Could not generate differences. Transcription might have failed or texts were empty.</p>"
+            }
+
+            resultsDiv.innerHTML = `<p><strong>${result.message}</strong></p>
+                                    <p>Text file: ${result.text_file}</p>
+                                    <p>Audio file: ${result.audio_file}</p>
+                                    <hr>
+                                    <h3>Original Uploaded Text:</h3>
+                                    <pre id="uploaded-text-content" style="white-space: pre-wrap;">${result.text_content}</pre>
+                                    <hr>
+                                    <h3>Transcribed Text from Audio:</h3>
+                                    <pre id="transcribed-text-content" style="white-space: pre-wrap;">${result.transcribed_text || "Transcription not available."}</pre>
+                                    <hr>
+                                    <h3>Text Comparison (Differences):</h3>
+                                    <div id="diff-output">${diffHtml}</div>`;
+            
+            // Add link to exercise page
+            const exerciseLink = document.createElement('a');
+            exerciseLink.href = '/exercise';
+            exerciseLink.textContent = 'Go to Exercises';
+            exerciseLink.className = 'exercise-link-button'; // For styling
+            resultsDiv.appendChild(document.createElement('hr'));
+            resultsDiv.appendChild(exerciseLink);
+
+            console.log('Processing successful:', result);
+        } else {
+            resultsDiv.innerHTML = `<p style="color:red;">Error: ${result.error}</p>`;
+            console.error('Upload failed:', result);
+        }
+    } catch (error) {
+        resultsDiv.innerHTML = `<p style="color:red;">An unexpected error occurred: ${error.toString()}</p>`;
+        console.error('Network or unexpected error:', error);
+    }
+});
diff --git a/pronunciation_practice_tool/static/style.css b/pronunciation_practice_tool/static/style.css
new file mode 100644
index 0000000..8859d57
--- /dev/null
+++ b/pronunciation_practice_tool/static/style.css
@@ -0,0 +1,12 @@
+/* style.css */
+body { font-family: Arial, sans-serif; margin: 0; padding: 20px; background-color: #f4f4f4; color: #333; }
+#upload-form div { margin-bottom: 15px; }
+#upload-form label { display: block; margin-bottom: 5px; font-weight: bold; }
+#upload-form input[type="file"], #upload-form button { padding: 10px; border-radius: 5px; border: 1px solid #ddd; }
+#upload-form button { background-color: #007bff; color: white; cursor: pointer; }
+#upload-form button:hover { background-color: #0056b3; }
+#results { margin-top: 20px; padding: 15px; background-color: #fff; border: 1px solid #ddd; border-radius: 5px; }
+#results pre { white-space: pre-wrap; word-wrap: break-word; background-color: #f9f9f9; padding: 10px; border: 1px solid #eee; border-radius: 4px;}
+#diff-output span { display: block; padding: 2px 0; } /* Make each diff line take full width */
+.exercise-link-button { display: inline-block; margin-top: 15px; padding: 10px 15px; background-color: #28a745; color: white; text-decoration: none; border-radius: 5px; }
+.exercise-link-button:hover { background-color: #218838; }
diff --git a/pronunciation_practice_tool/templates/exercise.html b/pronunciation_practice_tool/templates/exercise.html
new file mode 100644
index 0000000..ef28867
--- /dev/null
+++ b/pronunciation_practice_tool/templates/exercise.html
@@ -0,0 +1,39 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Pronunciation Exercises</title>
+    <!-- Link to the same style.css or a new one if preferred -->
+    <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
+    <style>
+        body { font-family: sans-serif; margin: 20px; }
+        h1 { color: #333; }
+        ul { list-style-type: disc; margin-left: 20px; }
+        li { margin-bottom: 10px; padding: 5px; background-color: #f9f9f9; border-left: 3px solid #007bff; }
+        .no-issues { color: #555; font-style: italic; }
+        a { color: #007bff; text-decoration: none; }
+        a:hover { text-decoration: underline; }
+        .container { max-width: 800px; margin: auto; padding: 20px; background-color: #fff; border-radius: 8px; box-shadow: 0 0 10px rgba(0,0,0,0.1); }
+        .home-link { display: inline-block; margin-top: 20px; padding: 10px 15px; background-color: #007bff; color: white; border-radius: 5px; }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>Targeted Pronunciation Practice</h1>
+        
+        {% if issues %}
+            <p>Here are some words or phrases identified for your practice based on your last reading:</p>
+            <ul>
+                {% for issue in issues %}
+                    <li>{{ issue }}</li>
+                {% endfor %}
+            </ul>
+        {% else %}
+            <p class="no-issues">No specific pronunciation issues were identified from your last session, or you haven't uploaded a file yet. Try uploading a text and audio file!</p>
+        {% endif %}
+        
+        <a href="{{ url_for('index') }}" class="home-link">Upload New Files</a>
+    </div>
+</body>
+</html>
diff --git a/pronunciation_practice_tool/templates/index.html b/pronunciation_practice_tool/templates/index.html
new file mode 100644
index 0000000..4c93020
--- /dev/null
+++ b/pronunciation_practice_tool/templates/index.html
@@ -0,0 +1,27 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Pronunciation Practice</title>
+    <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
+</head>
+<body>
+    <h1>Upload Your Reading</h1>
+    <form id="upload-form">
+        <div>
+            <label for="text-file">Upload English Text (TXT):</label>
+            <input type="file" id="text-file" name="text-file" accept=".txt" required>
+        </div>
+        <div>
+            <label for="audio-file">Upload Your Reading Audio (MP3):</label>
+            <input type="file" id="audio-file" name="audio-file" accept=".mp3" required>
+        </div>
+        <button type="submit">Upload and Analyze</button>
+    </form>
+    <div id="results">
+        <!-- Results will be displayed here -->
+    </div>
+    <script src="{{ url_for('static', filename='app.js') }}"></script>
+</body>
+</html>
diff --git a/pronunciation_practice_tool/tests/__init__.py b/pronunciation_practice_tool/tests/__init__.py
new file mode 100644
index 0000000..48373ba
--- /dev/null
+++ b/pronunciation_practice_tool/tests/__init__.py
@@ -0,0 +1 @@
+# This file makes tests a Python package.
diff --git a/pronunciation_practice_tool/tests/test_app.py b/pronunciation_practice_tool/tests/test_app.py
new file mode 100644
index 0000000..4c16cd8
--- /dev/null
+++ b/pronunciation_practice_tool/tests/test_app.py
@@ -0,0 +1,123 @@
+import unittest
+from unittest.mock import patch, MagicMock
+import os
+import io
+from pronunciation_practice_tool.app import app # Assuming app.py is in pronunciation_practice_tool
+
+class FlaskAppTests(unittest.TestCase):
+
+    def setUp(self):
+        app.testing = True
+        app.secret_key = 'test_secret_key' # Consistent secret key for testing sessions
+        self.client = app.test_client()
+        # Ensure the UPLOAD_FOLDER exists for tests, typically handled by app itself
+        os.makedirs(os.path.join(app.config['UPLOAD_FOLDER']), exist_ok=True) 
+
+    def tearDown(self):
+        # Clean up any files created in UPLOAD_FOLDER during tests
+        upload_folder = app.config['UPLOAD_FOLDER']
+        for f in os.listdir(upload_folder):
+            if os.path.isfile(os.path.join(upload_folder, f)): # Make sure it's a file
+                os.remove(os.path.join(upload_folder, f))
+        # os.rmdir(upload_folder) # Optionally remove if it was created solely for tests and is empty
+
+    def test_index_page(self):
+        response = self.client.get('/')
+        self.assertEqual(response.status_code, 200)
+        self.assertIn(b"Upload Your Reading", response.data)
+
+    @patch('pronunciation_practice_tool.app.transcribe_audio_gcp')
+    def test_upload_files_success(self, mock_transcribe_audio_gcp):
+        # Mock the STT function
+        mock_transcribe_audio_gcp.return_value = "This is the transcribed text."
+
+        data = {
+            'text-file': (io.BytesIO(b"This is the original text."), 'test.txt'),
+            'audio-file': (io.BytesIO(b"dummy mp3 data"), 'test.mp3')
+        }
+        response = self.client.post('/upload', content_type='multipart/form-data', data=data)
+        
+        self.assertEqual(response.status_code, 200)
+        json_data = response.get_json()
+        self.assertEqual(json_data['message'], 'Files processed successfully')
+        self.assertEqual(json_data['text_content'], "This is the original text.")
+        self.assertEqual(json_data['transcribed_text'], "This is the transcribed text.")
+        
+        # Check session for pronunciation issues
+        with self.client.session_transaction() as sess:
+            self.assertIn('pronunciation_issues', sess)
+            # Based on "This is the original text." vs "This is the transcribed text."
+            # difflib.Differ().compare(["This is the original text."], ["This is the transcribed text."]) yields:
+            # ['- This is the original text.', '?           ^ ^ ^ ^ ^ ^', '+ This is the transcribed text.']
+            # So issues should be: ["This is the original text.", "This is the transcribed text."] (after stripping prefixes)
+            
+            # A more robust check for the content of pronunciation_issues:
+            expected_issues = sorted(["This is the original text.", "This is the transcribed text."])
+            self.assertEqual(sorted(sess['pronunciation_issues']), expected_issues)
+
+
+    def test_upload_files_missing_file(self):
+        data = {
+            'text-file': (io.BytesIO(b"some text"), 'test.txt')
+            # Missing audio-file
+        }
+        response = self.client.post('/upload', content_type='multipart/form-data', data=data)
+        self.assertEqual(response.status_code, 400)
+        json_data = response.get_json()
+        self.assertEqual(json_data['error'], 'Missing file part(s)')
+
+    def test_upload_files_invalid_text_type(self):
+        data = {
+            'text-file': (io.BytesIO(b"some text"), 'test.docx'), # Invalid type
+            'audio-file': (io.BytesIO(b"dummy mp3 data"), 'test.mp3')
+        }
+        response = self.client.post('/upload', content_type='multipart/form-data', data=data)
+        self.assertEqual(response.status_code, 400)
+        json_data = response.get_json()
+        self.assertEqual(json_data['error'], 'Invalid text file type. Please upload a .txt file.')
+
+    def test_upload_files_invalid_audio_type(self):
+        data = {
+            'text-file': (io.BytesIO(b"some text"), 'test.txt'),
+            'audio-file': (io.BytesIO(b"dummy wav data"), 'test.wav') # Invalid type
+        }
+        response = self.client.post('/upload', content_type='multipart/form-data', data=data)
+        self.assertEqual(response.status_code, 400)
+        json_data = response.get_json()
+        self.assertEqual(json_data['error'], 'Invalid audio file type. Please upload an .mp3 file.')
+
+    @patch('pronunciation_practice_tool.app.transcribe_audio_gcp')
+    def test_pronunciation_issues_storage_and_exercise_page(self, mock_transcribe_audio_gcp):
+        mock_transcribe_audio_gcp.return_value = "hello world" # Different from original
+        original_text = "hallo worlde"
+
+        data = {
+            'text-file': (io.BytesIO(original_text.encode('utf-8')), 'original.txt'),
+            'audio-file': (io.BytesIO(b"dummy audio"), 'audio.mp3')
+        }
+        self.client.post('/upload', content_type='multipart/form-data', data=data) # Populate session
+
+        # Check exercise page
+        response = self.client.get('/exercise')
+        self.assertEqual(response.status_code, 200)
+        self.assertIn(b"Targeted Pronunciation Practice", response.data)
+        # difflib will produce something like:
+        # - hallo worlde
+        # ? ^     ^
+        # + hello world
+        # ? ^     ^
+        # Issues stored: "hallo worlde", "hello world"
+        self.assertIn(b"hallo worlde", response.data)
+        self.assertIn(b"hello world", response.data)
+
+    def test_exercise_page_no_issues(self):
+        # Access exercise page with an empty session
+        with self.client.session_transaction() as sess:
+            sess['pronunciation_issues'] = []
+        
+        response = self.client.get('/exercise')
+        self.assertEqual(response.status_code, 200)
+        self.assertIn(b"No specific pronunciation issues were identified", response.data)
+
+if __name__ == '__main__':
+    unittest.main(argv=['first-arg-is-ignored'], exit=False) # Add these for running in some environments
diff --git a/pronunciation_practice_tool/uploads/.gitkeep b/pronunciation_practice_tool/uploads/.gitkeep
new file mode 100644
index 0000000..f003fa2
--- /dev/null
+++ b/pronunciation_practice_tool/uploads/.gitkeep
@@ -0,0 +1 @@
+# This file is to ensure the directory is tracked by git.