dotgreg · jokemon · May 21, 2025
diff --git a/pronunciation_practice_tool/__init__.py b/pronunciation_practice_tool/__init__.py
@@ -0,0 +1 @@
+# This file makes pronunciation_practice_tool a Python package.
diff --git a/pronunciation_practice_tool/app.py b/pronunciation_practice_tool/app.py
@@ -0,0 +1,147 @@
+import os
+from flask import Flask, request, jsonify, render_template, session # Added session
+from werkzeug.utils import secure_filename
+from google.cloud import speech
+import difflib
+
+app = Flask(__name__, template_folder='templates', static_folder='static')
+app.secret_key = os.urandom(24) # Necessary for session management
+
+UPLOAD_FOLDER = 'uploads'
+ALLOWED_EXTENSIONS_TEXT = {'txt'}
+ALLOWED_EXTENSIONS_AUDIO = {'mp3'}
+
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+
+# Function to transcribe audio using Google Cloud Speech-to-Text
+def transcribe_audio_gcp(audio_file_path):
+    client = speech.SpeechClient()
+
+    with open(audio_file_path, 'rb') as audio_file:
+        content = audio_file.read()
+
+    audio = speech.RecognitionAudio(content=content)
+    config = speech.RecognitionConfig(
+        language_code='en-US',
+        enable_automatic_punctuation=True
+    )
+
+    try:
+        response = client.recognize(config=config, audio=audio)
+        transcript = "".join(result.alternatives[0].transcript for result in response.results)
+        return transcript
+    except Exception as e:
+        print(f"Google Cloud Speech-to-Text Error: {e}")
+        if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"):
+            return "GCP STT Error: GOOGLE_APPLICATION_CREDENTIALS not set."
+        return f"GCP STT Error: {e}"
+
+
+def allowed_file(filename, allowed_extensions):
+    return '.' in filename and \
+           filename.rsplit('.', 1)[1].lower() in allowed_extensions
+
+@app.route('/')
+def index():
+    return render_template('index.html')
+
+@app.route('/upload', methods=['POST'])
+def upload_files():
+    if 'text-file' not in request.files or 'audio-file' not in request.files:
+        session['pronunciation_issues'] = [] # Clear/initialize session on error
+        return jsonify({'error': 'Missing file part(s)'}), 400
+
+    text_file = request.files['text-file']
+    audio_file = request.files['audio-file']
+
+    if text_file.filename == '' or audio_file.filename == '':
+        session['pronunciation_issues'] = [] # Clear/initialize session on error
+        return jsonify({'error': 'No selected file(s)'}), 400
+
+    text_filename = ''
+    audio_filename = ''
+    uploaded_text_content = ""
+    transcript = ""
+    diff_output_list = []
+    pronunciation_issues = [] 
+
+    # Always initialize session variable at the start of processing
+    session['pronunciation_issues'] = []
+
+    if text_file and allowed_file(text_file.filename, ALLOWED_EXTENSIONS_TEXT):
+        text_filename = secure_filename(text_file.filename)
+        text_filepath = os.path.join(app.config['UPLOAD_FOLDER'], text_filename)
+        text_file.save(text_filepath)
+        try:
+            with open(text_filepath, 'r', encoding='utf-8') as f:
+                uploaded_text_content = f.read()
+        except Exception as e:
+            print(f"Error reading text file: {e}")
+            # uploaded_text_content will remain empty
+    else:
+        session['pronunciation_issues'] = [] # Clear/initialize session on error
+        return jsonify({'error': 'Invalid text file type. Please upload a .txt file.'}), 400
+
+    if audio_file and allowed_file(audio_file.filename, ALLOWED_EXTENSIONS_AUDIO):
+        audio_filename = secure_filename(audio_file.filename)
+        audio_filepath = os.path.join(app.config['UPLOAD_FOLDER'], audio_filename)
+        audio_file.save(audio_filepath)
+
+        transcript = transcribe_audio_gcp(audio_filepath)
+
+        if uploaded_text_content and transcript and not transcript.startswith("GCP STT Error:"):
+            original_lines = uploaded_text_content.splitlines()
+            transcript_lines = transcript.splitlines()
+
+            d = difflib.Differ()
+            diff = list(d.compare(original_lines, transcript_lines))
+            diff_output_list = list(diff) # Keep full diff for frontend display
+
+            # Process diff to extract pronunciation issues for session
+            for line in diff:
+                if line.startswith('- ') or line.startswith('+ '):
+                    issue_text = line[2:].strip()
+                    if issue_text: 
+                        pronunciation_issues.append(issue_text)
+
+            session['pronunciation_issues'] = pronunciation_issues
+
+        # If transcription failed or original text was empty, pronunciation_issues remains empty (as initialized)
+        # No specific else needed here as session['pronunciation_issues'] is already []
+
+    else:
+        # This block is for invalid audio file type
+        if text_filename and os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], text_filename)):
+            os.remove(os.path.join(app.config['UPLOAD_FOLDER'], text_filename))
+        # session['pronunciation_issues'] is already [] due to initialization at the start or after text file error
+        return jsonify({'error': 'Invalid audio file type. Please upload an .mp3 file.'}), 400
+
+    # Final check: if, for any reason, effective processing didn't happen 
+    # (e.g. transcript error after successful file uploads but before diff)
+    # ensure issues are empty.
+    if not (uploaded_text_content and transcript and not transcript.startswith("GCP STT Error:")):
+         session['pronunciation_issues'] = []
+
+    return jsonify({
+        'message': 'Files processed successfully',
+        'text_file': text_filename,
+        'audio_file': audio_filename,
+        'text_content': uploaded_text_content,
+        'transcribed_text': transcript,
+        'differences': diff_output_list 
+    }), 200
+
+@app.route('/exercise')
+def exercise_page():
+    issues = session.get('pronunciation_issues', [])
+    # The prompt mentions:
+    # if not issues: # If issues list is empty or not found
+    #     # Maybe redirect to home or show a message that no issues were found for practice
+    #     # For now, we'll pass the empty list and let the template handle it.
+    #     pass
+    # This logic is handled by passing issues (which can be empty) to the template.
+    return render_template('exercise.html', issues=issues)
+
+if __name__ == '__main__':
+    app.run(debug=True)
diff --git a/pronunciation_practice_tool/requirements.txt b/pronunciation_practice_tool/requirements.txt
@@ -0,0 +1,3 @@
+Flask>=2.0
+google-cloud-speech>=2.0
+werkzeug>=2.0
diff --git a/pronunciation_practice_tool/static/app.js b/pronunciation_practice_tool/static/app.js
@@ -0,0 +1,112 @@
+document.getElementById('upload-form').addEventListener('submit', async function(event) {
+    event.preventDefault();
+    const textFile = document.getElementById('text-file').files[0];
+    const audioFile = document.getElementById('audio-file').files[0];
+    const resultsDiv = document.getElementById('results');
+    resultsDiv.innerHTML = ''; // Clear previous results
+
+    if (!textFile || !audioFile) {
+        alert('Please select both a TXT text file and an MP3 audio file.');
+        return;
+    }
+
+    const formData = new FormData();
+    formData.append('text-file', textFile);
+    formData.append('audio-file', audioFile);
+
+    try {
+        const response = await fetch('/upload', {
+            method: 'POST',
+            body: formData
+        });
+
+        const result = await response.json();
+
+        if (response.ok) {
+            let diffHtml = '';
+            if (result.differences && result.differences.length > 0) {
+                diffHtml = result.differences.map(line => {
+                    let style = '';
+                    let prefix = line.substring(0, 2);
+                    let displayLine = line.substring(2); // Remove prefix for display
+
+                    // Handle lines that might be shorter than 2 chars (e.g. empty lines in diff)
+                    // This check was slightly different from my previous version.
+                    if (line.length < 2) { 
+                        prefix = ''; 
+                        displayLine = line;
+                    }
+
+                    if (prefix === '+ ') {
+                        style = 'color: green; background-color: #e6ffe6; display: block; white-space: pre-wrap;';
+                    } else if (prefix === '- ') {
+                        style = 'color: red; background-color: #ffe6e6; display: block; white-space: pre-wrap;';
+                    } else if (prefix === '? ') {
+                        style = 'color: blue; background-color: #e6e6ff; display: block; white-space: pre-wrap;';
+                    } else {
+                         // Common lines (no prefix from difflib.Differ like '  ')
+                        style = 'display: block; white-space: pre-wrap;';
+                    }
+                    // Escape HTML to prevent XSS
+                    displayLine = displayLine.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
+                    // My previous version returned prefix + displayLine, this one returns displayLine only for styled lines, but the prompt's image shows prefix.
+                    // The prompt's textual JS example for map returns `<span style="${style}">${line}</span>` which is line *before* `substring(2)`.
+                    // Reconciling: The prompt's JS example for the map function is: return `<span style="${style}">${line}</span>`;
+                    // This implies 'line' *still contains* the prefix.
+                    // However, the text description says "line = line.substring(2); // Remove prefix for display"
+                    // Let's stick to the JS code block provided in the prompt, which uses the original `line` for the span content.
+                    // This means the prefix will be part of the span, and the color will apply to it too.
+                    // The prompt text description was: return `<span style="${style}">${line}</span>`; (where line was substringed)
+                    // The prompt's code block was: return `<span style="${style}">${line}</span>`; (where line was NOT substringed for the span content)
+                    // Let's use the version from the prompt's code block, which seems more complete.
+                    // The provided code in the prompt is:
+                    //    line = line.substring(2); // Remove prefix for display
+                    //    ...
+                    //    return `<span style="${style}">${line}</span>`;
+                    // This means `displayLine` should be used.
+                    return `<span style="${style}">${displayLine}</span>`; // Using displayLine (line without prefix)
+                }).join('<br>'); // Using <br> as per prompt's JS
+            } else if (result.transcribed_text && !result.transcribed_text.startsWith("GCP STT Error:")) {
+                // If there are no differences, it means the texts are identical or one is empty.
+                if (result.text_content === result.transcribed_text) {
+                    diffHtml = "<p>Texts are identical!</p>";
+                } else if (result.transcribed_text) { // Check if transcription was successful
+                    diffHtml = "<p>No significant differences found by difflib. Texts may be very similar or one might be a subset of the other with no conflicting lines.</p>";
+                } else {
+                     diffHtml = "<p>Comparison not performed (e.g., transcription failed or texts were empty).</p>";
+                }
+            } else {
+                diffHtml = "<p>Could not generate differences. Transcription might have failed or texts were empty.</p>"
+            }
+
+            resultsDiv.innerHTML = `<p><strong>${result.message}</strong></p>
+                                    <p>Text file: ${result.text_file}</p>
+                                    <p>Audio file: ${result.audio_file}</p>
+                                    <hr>
+                                    <h3>Original Uploaded Text:</h3>
+                                    <pre id="uploaded-text-content" style="white-space: pre-wrap;">${result.text_content}</pre>
+                                    <hr>
+                                    <h3>Transcribed Text from Audio:</h3>
+                                    <pre id="transcribed-text-content" style="white-space: pre-wrap;">${result.transcribed_text || "Transcription not available."}</pre>
+                                    <hr>
+                                    <h3>Text Comparison (Differences):</h3>
+                                    <div id="diff-output">${diffHtml}</div>`;
+
+            // Add link to exercise page
+            const exerciseLink = document.createElement('a');
+            exerciseLink.href = '/exercise';
+            exerciseLink.textContent = 'Go to Exercises';
+            exerciseLink.className = 'exercise-link-button'; // For styling
+            resultsDiv.appendChild(document.createElement('hr'));
+            resultsDiv.appendChild(exerciseLink);
+
+            console.log('Processing successful:', result);
+        } else {
+            resultsDiv.innerHTML = `<p style="color:red;">Error: ${result.error}</p>`;
+            console.error('Upload failed:', result);
+        }
+    } catch (error) {
+        resultsDiv.innerHTML = `<p style="color:red;">An unexpected error occurred: ${error.toString()}</p>`;
+        console.error('Network or unexpected error:', error);
+    }
+});
diff --git a/pronunciation_practice_tool/static/style.css b/pronunciation_practice_tool/static/style.css
@@ -0,0 +1,12 @@
+/* style.css */
+body { font-family: Arial, sans-serif; margin: 0; padding: 20px; background-color: #f4f4f4; color: #333; }
+#upload-form div { margin-bottom: 15px; }
+#upload-form label { display: block; margin-bottom: 5px; font-weight: bold; }
+#upload-form input[type="file"], #upload-form button { padding: 10px; border-radius: 5px; border: 1px solid #ddd; }
+#upload-form button { background-color: #007bff; color: white; cursor: pointer; }
+#upload-form button:hover { background-color: #0056b3; }
+#results { margin-top: 20px; padding: 15px; background-color: #fff; border: 1px solid #ddd; border-radius: 5px; }
+#results pre { white-space: pre-wrap; word-wrap: break-word; background-color: #f9f9f9; padding: 10px; border: 1px solid #eee; border-radius: 4px;}
+#diff-output span { display: block; padding: 2px 0; } /* Make each diff line take full width */
+.exercise-link-button { display: inline-block; margin-top: 15px; padding: 10px 15px; background-color: #28a745; color: white; text-decoration: none; border-radius: 5px; }
+.exercise-link-button:hover { background-color: #218838; }
diff --git a/pronunciation_practice_tool/templates/exercise.html b/pronunciation_practice_tool/templates/exercise.html
@@ -0,0 +1,39 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Pronunciation Exercises</title>
+    <!-- Link to the same style.css or a new one if preferred -->
+    <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
+    <style>
+        body { font-family: sans-serif; margin: 20px; }
+        h1 { color: #333; }
+        ul { list-style-type: disc; margin-left: 20px; }
+        li { margin-bottom: 10px; padding: 5px; background-color: #f9f9f9; border-left: 3px solid #007bff; }
+        .no-issues { color: #555; font-style: italic; }
+        a { color: #007bff; text-decoration: none; }
+        a:hover { text-decoration: underline; }
+        .container { max-width: 800px; margin: auto; padding: 20px; background-color: #fff; border-radius: 8px; box-shadow: 0 0 10px rgba(0,0,0,0.1); }
+        .home-link { display: inline-block; margin-top: 20px; padding: 10px 15px; background-color: #007bff; color: white; border-radius: 5px; }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>Targeted Pronunciation Practice</h1>
+
+        {% if issues %}
+            <p>Here are some words or phrases identified for your practice based on your last reading:</p>
+            <ul>
+                {% for issue in issues %}
+                    <li>{{ issue }}</li>
+                {% endfor %}
+            </ul>
+        {% else %}
+            <p class="no-issues">No specific pronunciation issues were identified from your last session, or you haven't uploaded a file yet. Try uploading a text and audio file!</p>
+        {% endif %}
+
+        <a href="{{ url_for('index') }}" class="home-link">Upload New Files</a>
+    </div>
+</body>
+</html>
diff --git a/pronunciation_practice_tool/templates/index.html b/pronunciation_practice_tool/templates/index.html
@@ -0,0 +1,27 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Pronunciation Practice</title>
+    <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
+</head>
+<body>
+    <h1>Upload Your Reading</h1>
+    <form id="upload-form">
+        <div>
+            <label for="text-file">Upload English Text (TXT):</label>
+            <input type="file" id="text-file" name="text-file" accept=".txt" required>
+        </div>
+        <div>
+            <label for="audio-file">Upload Your Reading Audio (MP3):</label>
+            <input type="file" id="audio-file" name="audio-file" accept=".mp3" required>
+        </div>
+        <button type="submit">Upload and Analyze</button>
+    </form>
+    <div id="results">
+        <!-- Results will be displayed here -->
+    </div>
+    <script src="{{ url_for('static', filename='app.js') }}"></script>
+</body>
+</html>
diff --git a/pronunciation_practice_tool/tests/__init__.py b/pronunciation_practice_tool/tests/__init__.py
@@ -0,0 +1 @@
+# This file makes tests a Python package.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		# This file makes pronunciation_practice_tool a Python package.