From 258d837dadbc78e0fc3f2d2ff4b52545e7b55f64 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 21 May 2025 07:14:01 +0000 Subject: [PATCH] feat: Add initial pronunciation practice tool This commit introduces a new web application designed to help you improve your English pronunciation. Key features include: - **File Upload:** You can upload a TXT file containing English text and an MP3 audio file of your reading. - **Speech-to-Text (STT):** The backend uses Google Cloud Speech-to-Text (GCP STT) to transcribe your audio. (Requires GCP setup). - **Text Comparison:** The original text is compared with the transcribed text using difflib to identify discrepancies. - **Results Display:** The frontend shows the original text, the transcribed text, and a highlighted diff of the comparison. - **Issue Storage:** Identified pronunciation differences (problematic words/phrases) are stored in your session. - **Exercise Page:** A dedicated page (`/exercise`) displays these stored issues, providing targeted practice material. - **Backend & Frontend:** Implemented using Flask for the backend and vanilla JavaScript for the frontend. - **Unit Tests:** Basic unit tests for the Flask backend logic have been added, covering file handling, STT mocking, session management, and page rendering. The tool is structured within the `pronunciation_practice_tool` directory, containing templates, static assets, upload handling, and tests. --- pronunciation_practice_tool/__init__.py | 1 + pronunciation_practice_tool/app.py | 147 ++++++++++++++++++ pronunciation_practice_tool/requirements.txt | 3 + pronunciation_practice_tool/static/app.js | 112 +++++++++++++ pronunciation_practice_tool/static/style.css | 12 ++ .../templates/exercise.html | 39 +++++ .../templates/index.html | 27 ++++ pronunciation_practice_tool/tests/__init__.py | 1 + pronunciation_practice_tool/tests/test_app.py | 123 +++++++++++++++ pronunciation_practice_tool/uploads/.gitkeep | 1 + 10 files changed, 466 insertions(+) create mode 100644 pronunciation_practice_tool/__init__.py create mode 100644 pronunciation_practice_tool/app.py create mode 100644 pronunciation_practice_tool/requirements.txt create mode 100644 pronunciation_practice_tool/static/app.js create mode 100644 pronunciation_practice_tool/static/style.css create mode 100644 pronunciation_practice_tool/templates/exercise.html create mode 100644 pronunciation_practice_tool/templates/index.html create mode 100644 pronunciation_practice_tool/tests/__init__.py create mode 100644 pronunciation_practice_tool/tests/test_app.py create mode 100644 pronunciation_practice_tool/uploads/.gitkeep diff --git a/pronunciation_practice_tool/__init__.py b/pronunciation_practice_tool/__init__.py new file mode 100644 index 0000000..e17c3f6 --- /dev/null +++ b/pronunciation_practice_tool/__init__.py @@ -0,0 +1 @@ +# This file makes pronunciation_practice_tool a Python package. diff --git a/pronunciation_practice_tool/app.py b/pronunciation_practice_tool/app.py new file mode 100644 index 0000000..6cd1d8d --- /dev/null +++ b/pronunciation_practice_tool/app.py @@ -0,0 +1,147 @@ +import os +from flask import Flask, request, jsonify, render_template, session # Added session +from werkzeug.utils import secure_filename +from google.cloud import speech +import difflib + +app = Flask(__name__, template_folder='templates', static_folder='static') +app.secret_key = os.urandom(24) # Necessary for session management + +UPLOAD_FOLDER = 'uploads' +ALLOWED_EXTENSIONS_TEXT = {'txt'} +ALLOWED_EXTENSIONS_AUDIO = {'mp3'} + +app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER +os.makedirs(UPLOAD_FOLDER, exist_ok=True) + +# Function to transcribe audio using Google Cloud Speech-to-Text +def transcribe_audio_gcp(audio_file_path): + client = speech.SpeechClient() + + with open(audio_file_path, 'rb') as audio_file: + content = audio_file.read() + + audio = speech.RecognitionAudio(content=content) + config = speech.RecognitionConfig( + language_code='en-US', + enable_automatic_punctuation=True + ) + + try: + response = client.recognize(config=config, audio=audio) + transcript = "".join(result.alternatives[0].transcript for result in response.results) + return transcript + except Exception as e: + print(f"Google Cloud Speech-to-Text Error: {e}") + if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"): + return "GCP STT Error: GOOGLE_APPLICATION_CREDENTIALS not set." + return f"GCP STT Error: {e}" + + +def allowed_file(filename, allowed_extensions): + return '.' in filename and \ + filename.rsplit('.', 1)[1].lower() in allowed_extensions + +@app.route('/') +def index(): + return render_template('index.html') + +@app.route('/upload', methods=['POST']) +def upload_files(): + if 'text-file' not in request.files or 'audio-file' not in request.files: + session['pronunciation_issues'] = [] # Clear/initialize session on error + return jsonify({'error': 'Missing file part(s)'}), 400 + + text_file = request.files['text-file'] + audio_file = request.files['audio-file'] + + if text_file.filename == '' or audio_file.filename == '': + session['pronunciation_issues'] = [] # Clear/initialize session on error + return jsonify({'error': 'No selected file(s)'}), 400 + + text_filename = '' + audio_filename = '' + uploaded_text_content = "" + transcript = "" + diff_output_list = [] + pronunciation_issues = [] + + # Always initialize session variable at the start of processing + session['pronunciation_issues'] = [] + + if text_file and allowed_file(text_file.filename, ALLOWED_EXTENSIONS_TEXT): + text_filename = secure_filename(text_file.filename) + text_filepath = os.path.join(app.config['UPLOAD_FOLDER'], text_filename) + text_file.save(text_filepath) + try: + with open(text_filepath, 'r', encoding='utf-8') as f: + uploaded_text_content = f.read() + except Exception as e: + print(f"Error reading text file: {e}") + # uploaded_text_content will remain empty + else: + session['pronunciation_issues'] = [] # Clear/initialize session on error + return jsonify({'error': 'Invalid text file type. Please upload a .txt file.'}), 400 + + if audio_file and allowed_file(audio_file.filename, ALLOWED_EXTENSIONS_AUDIO): + audio_filename = secure_filename(audio_file.filename) + audio_filepath = os.path.join(app.config['UPLOAD_FOLDER'], audio_filename) + audio_file.save(audio_filepath) + + transcript = transcribe_audio_gcp(audio_filepath) + + if uploaded_text_content and transcript and not transcript.startswith("GCP STT Error:"): + original_lines = uploaded_text_content.splitlines() + transcript_lines = transcript.splitlines() + + d = difflib.Differ() + diff = list(d.compare(original_lines, transcript_lines)) + diff_output_list = list(diff) # Keep full diff for frontend display + + # Process diff to extract pronunciation issues for session + for line in diff: + if line.startswith('- ') or line.startswith('+ '): + issue_text = line[2:].strip() + if issue_text: + pronunciation_issues.append(issue_text) + + session['pronunciation_issues'] = pronunciation_issues + + # If transcription failed or original text was empty, pronunciation_issues remains empty (as initialized) + # No specific else needed here as session['pronunciation_issues'] is already [] + + else: + # This block is for invalid audio file type + if text_filename and os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], text_filename)): + os.remove(os.path.join(app.config['UPLOAD_FOLDER'], text_filename)) + # session['pronunciation_issues'] is already [] due to initialization at the start or after text file error + return jsonify({'error': 'Invalid audio file type. Please upload an .mp3 file.'}), 400 + + # Final check: if, for any reason, effective processing didn't happen + # (e.g. transcript error after successful file uploads but before diff) + # ensure issues are empty. + if not (uploaded_text_content and transcript and not transcript.startswith("GCP STT Error:")): + session['pronunciation_issues'] = [] + + return jsonify({ + 'message': 'Files processed successfully', + 'text_file': text_filename, + 'audio_file': audio_filename, + 'text_content': uploaded_text_content, + 'transcribed_text': transcript, + 'differences': diff_output_list + }), 200 + +@app.route('/exercise') +def exercise_page(): + issues = session.get('pronunciation_issues', []) + # The prompt mentions: + # if not issues: # If issues list is empty or not found + # # Maybe redirect to home or show a message that no issues were found for practice + # # For now, we'll pass the empty list and let the template handle it. + # pass + # This logic is handled by passing issues (which can be empty) to the template. + return render_template('exercise.html', issues=issues) + +if __name__ == '__main__': + app.run(debug=True) diff --git a/pronunciation_practice_tool/requirements.txt b/pronunciation_practice_tool/requirements.txt new file mode 100644 index 0000000..78058f1 --- /dev/null +++ b/pronunciation_practice_tool/requirements.txt @@ -0,0 +1,3 @@ +Flask>=2.0 +google-cloud-speech>=2.0 +werkzeug>=2.0 diff --git a/pronunciation_practice_tool/static/app.js b/pronunciation_practice_tool/static/app.js new file mode 100644 index 0000000..7b96b87 --- /dev/null +++ b/pronunciation_practice_tool/static/app.js @@ -0,0 +1,112 @@ +document.getElementById('upload-form').addEventListener('submit', async function(event) { + event.preventDefault(); + const textFile = document.getElementById('text-file').files[0]; + const audioFile = document.getElementById('audio-file').files[0]; + const resultsDiv = document.getElementById('results'); + resultsDiv.innerHTML = ''; // Clear previous results + + if (!textFile || !audioFile) { + alert('Please select both a TXT text file and an MP3 audio file.'); + return; + } + + const formData = new FormData(); + formData.append('text-file', textFile); + formData.append('audio-file', audioFile); + + try { + const response = await fetch('/upload', { + method: 'POST', + body: formData + }); + + const result = await response.json(); + + if (response.ok) { + let diffHtml = ''; + if (result.differences && result.differences.length > 0) { + diffHtml = result.differences.map(line => { + let style = ''; + let prefix = line.substring(0, 2); + let displayLine = line.substring(2); // Remove prefix for display + + // Handle lines that might be shorter than 2 chars (e.g. empty lines in diff) + // This check was slightly different from my previous version. + if (line.length < 2) { + prefix = ''; + displayLine = line; + } + + if (prefix === '+ ') { + style = 'color: green; background-color: #e6ffe6; display: block; white-space: pre-wrap;'; + } else if (prefix === '- ') { + style = 'color: red; background-color: #ffe6e6; display: block; white-space: pre-wrap;'; + } else if (prefix === '? ') { + style = 'color: blue; background-color: #e6e6ff; display: block; white-space: pre-wrap;'; + } else { + // Common lines (no prefix from difflib.Differ like ' ') + style = 'display: block; white-space: pre-wrap;'; + } + // Escape HTML to prevent XSS + displayLine = displayLine.replace(/&/g, "&").replace(//g, ">"); + // My previous version returned prefix + displayLine, this one returns displayLine only for styled lines, but the prompt's image shows prefix. + // The prompt's textual JS example for map returns `${line}` which is line *before* `substring(2)`. + // Reconciling: The prompt's JS example for the map function is: return `${line}`; + // This implies 'line' *still contains* the prefix. + // However, the text description says "line = line.substring(2); // Remove prefix for display" + // Let's stick to the JS code block provided in the prompt, which uses the original `line` for the span content. + // This means the prefix will be part of the span, and the color will apply to it too. + // The prompt text description was: return `${line}`; (where line was substringed) + // The prompt's code block was: return `${line}`; (where line was NOT substringed for the span content) + // Let's use the version from the prompt's code block, which seems more complete. + // The provided code in the prompt is: + // line = line.substring(2); // Remove prefix for display + // ... + // return `${line}`; + // This means `displayLine` should be used. + return `${displayLine}`; // Using displayLine (line without prefix) + }).join('
'); // Using
as per prompt's JS + } else if (result.transcribed_text && !result.transcribed_text.startsWith("GCP STT Error:")) { + // If there are no differences, it means the texts are identical or one is empty. + if (result.text_content === result.transcribed_text) { + diffHtml = "

Texts are identical!

"; + } else if (result.transcribed_text) { // Check if transcription was successful + diffHtml = "

No significant differences found by difflib. Texts may be very similar or one might be a subset of the other with no conflicting lines.

"; + } else { + diffHtml = "

Comparison not performed (e.g., transcription failed or texts were empty).

"; + } + } else { + diffHtml = "

Could not generate differences. Transcription might have failed or texts were empty.

" + } + + resultsDiv.innerHTML = `

${result.message}

+

Text file: ${result.text_file}

+

Audio file: ${result.audio_file}

+
+

Original Uploaded Text:

+
${result.text_content}
+
+

Transcribed Text from Audio:

+
${result.transcribed_text || "Transcription not available."}
+
+

Text Comparison (Differences):

+
${diffHtml}
`; + + // Add link to exercise page + const exerciseLink = document.createElement('a'); + exerciseLink.href = '/exercise'; + exerciseLink.textContent = 'Go to Exercises'; + exerciseLink.className = 'exercise-link-button'; // For styling + resultsDiv.appendChild(document.createElement('hr')); + resultsDiv.appendChild(exerciseLink); + + console.log('Processing successful:', result); + } else { + resultsDiv.innerHTML = `

Error: ${result.error}

`; + console.error('Upload failed:', result); + } + } catch (error) { + resultsDiv.innerHTML = `

An unexpected error occurred: ${error.toString()}

`; + console.error('Network or unexpected error:', error); + } +}); diff --git a/pronunciation_practice_tool/static/style.css b/pronunciation_practice_tool/static/style.css new file mode 100644 index 0000000..8859d57 --- /dev/null +++ b/pronunciation_practice_tool/static/style.css @@ -0,0 +1,12 @@ +/* style.css */ +body { font-family: Arial, sans-serif; margin: 0; padding: 20px; background-color: #f4f4f4; color: #333; } +#upload-form div { margin-bottom: 15px; } +#upload-form label { display: block; margin-bottom: 5px; font-weight: bold; } +#upload-form input[type="file"], #upload-form button { padding: 10px; border-radius: 5px; border: 1px solid #ddd; } +#upload-form button { background-color: #007bff; color: white; cursor: pointer; } +#upload-form button:hover { background-color: #0056b3; } +#results { margin-top: 20px; padding: 15px; background-color: #fff; border: 1px solid #ddd; border-radius: 5px; } +#results pre { white-space: pre-wrap; word-wrap: break-word; background-color: #f9f9f9; padding: 10px; border: 1px solid #eee; border-radius: 4px;} +#diff-output span { display: block; padding: 2px 0; } /* Make each diff line take full width */ +.exercise-link-button { display: inline-block; margin-top: 15px; padding: 10px 15px; background-color: #28a745; color: white; text-decoration: none; border-radius: 5px; } +.exercise-link-button:hover { background-color: #218838; } diff --git a/pronunciation_practice_tool/templates/exercise.html b/pronunciation_practice_tool/templates/exercise.html new file mode 100644 index 0000000..ef28867 --- /dev/null +++ b/pronunciation_practice_tool/templates/exercise.html @@ -0,0 +1,39 @@ + + + + + + Pronunciation Exercises + + + + + +
+

Targeted Pronunciation Practice

+ + {% if issues %} +

Here are some words or phrases identified for your practice based on your last reading:

+ + {% else %} +

No specific pronunciation issues were identified from your last session, or you haven't uploaded a file yet. Try uploading a text and audio file!

+ {% endif %} + + Upload New Files +
+ + diff --git a/pronunciation_practice_tool/templates/index.html b/pronunciation_practice_tool/templates/index.html new file mode 100644 index 0000000..4c93020 --- /dev/null +++ b/pronunciation_practice_tool/templates/index.html @@ -0,0 +1,27 @@ + + + + + + Pronunciation Practice + + + +

Upload Your Reading

+
+
+ + +
+
+ + +
+ +
+
+ +
+ + + diff --git a/pronunciation_practice_tool/tests/__init__.py b/pronunciation_practice_tool/tests/__init__.py new file mode 100644 index 0000000..48373ba --- /dev/null +++ b/pronunciation_practice_tool/tests/__init__.py @@ -0,0 +1 @@ +# This file makes tests a Python package. diff --git a/pronunciation_practice_tool/tests/test_app.py b/pronunciation_practice_tool/tests/test_app.py new file mode 100644 index 0000000..4c16cd8 --- /dev/null +++ b/pronunciation_practice_tool/tests/test_app.py @@ -0,0 +1,123 @@ +import unittest +from unittest.mock import patch, MagicMock +import os +import io +from pronunciation_practice_tool.app import app # Assuming app.py is in pronunciation_practice_tool + +class FlaskAppTests(unittest.TestCase): + + def setUp(self): + app.testing = True + app.secret_key = 'test_secret_key' # Consistent secret key for testing sessions + self.client = app.test_client() + # Ensure the UPLOAD_FOLDER exists for tests, typically handled by app itself + os.makedirs(os.path.join(app.config['UPLOAD_FOLDER']), exist_ok=True) + + def tearDown(self): + # Clean up any files created in UPLOAD_FOLDER during tests + upload_folder = app.config['UPLOAD_FOLDER'] + for f in os.listdir(upload_folder): + if os.path.isfile(os.path.join(upload_folder, f)): # Make sure it's a file + os.remove(os.path.join(upload_folder, f)) + # os.rmdir(upload_folder) # Optionally remove if it was created solely for tests and is empty + + def test_index_page(self): + response = self.client.get('/') + self.assertEqual(response.status_code, 200) + self.assertIn(b"Upload Your Reading", response.data) + + @patch('pronunciation_practice_tool.app.transcribe_audio_gcp') + def test_upload_files_success(self, mock_transcribe_audio_gcp): + # Mock the STT function + mock_transcribe_audio_gcp.return_value = "This is the transcribed text." + + data = { + 'text-file': (io.BytesIO(b"This is the original text."), 'test.txt'), + 'audio-file': (io.BytesIO(b"dummy mp3 data"), 'test.mp3') + } + response = self.client.post('/upload', content_type='multipart/form-data', data=data) + + self.assertEqual(response.status_code, 200) + json_data = response.get_json() + self.assertEqual(json_data['message'], 'Files processed successfully') + self.assertEqual(json_data['text_content'], "This is the original text.") + self.assertEqual(json_data['transcribed_text'], "This is the transcribed text.") + + # Check session for pronunciation issues + with self.client.session_transaction() as sess: + self.assertIn('pronunciation_issues', sess) + # Based on "This is the original text." vs "This is the transcribed text." + # difflib.Differ().compare(["This is the original text."], ["This is the transcribed text."]) yields: + # ['- This is the original text.', '? ^ ^ ^ ^ ^ ^', '+ This is the transcribed text.'] + # So issues should be: ["This is the original text.", "This is the transcribed text."] (after stripping prefixes) + + # A more robust check for the content of pronunciation_issues: + expected_issues = sorted(["This is the original text.", "This is the transcribed text."]) + self.assertEqual(sorted(sess['pronunciation_issues']), expected_issues) + + + def test_upload_files_missing_file(self): + data = { + 'text-file': (io.BytesIO(b"some text"), 'test.txt') + # Missing audio-file + } + response = self.client.post('/upload', content_type='multipart/form-data', data=data) + self.assertEqual(response.status_code, 400) + json_data = response.get_json() + self.assertEqual(json_data['error'], 'Missing file part(s)') + + def test_upload_files_invalid_text_type(self): + data = { + 'text-file': (io.BytesIO(b"some text"), 'test.docx'), # Invalid type + 'audio-file': (io.BytesIO(b"dummy mp3 data"), 'test.mp3') + } + response = self.client.post('/upload', content_type='multipart/form-data', data=data) + self.assertEqual(response.status_code, 400) + json_data = response.get_json() + self.assertEqual(json_data['error'], 'Invalid text file type. Please upload a .txt file.') + + def test_upload_files_invalid_audio_type(self): + data = { + 'text-file': (io.BytesIO(b"some text"), 'test.txt'), + 'audio-file': (io.BytesIO(b"dummy wav data"), 'test.wav') # Invalid type + } + response = self.client.post('/upload', content_type='multipart/form-data', data=data) + self.assertEqual(response.status_code, 400) + json_data = response.get_json() + self.assertEqual(json_data['error'], 'Invalid audio file type. Please upload an .mp3 file.') + + @patch('pronunciation_practice_tool.app.transcribe_audio_gcp') + def test_pronunciation_issues_storage_and_exercise_page(self, mock_transcribe_audio_gcp): + mock_transcribe_audio_gcp.return_value = "hello world" # Different from original + original_text = "hallo worlde" + + data = { + 'text-file': (io.BytesIO(original_text.encode('utf-8')), 'original.txt'), + 'audio-file': (io.BytesIO(b"dummy audio"), 'audio.mp3') + } + self.client.post('/upload', content_type='multipart/form-data', data=data) # Populate session + + # Check exercise page + response = self.client.get('/exercise') + self.assertEqual(response.status_code, 200) + self.assertIn(b"Targeted Pronunciation Practice", response.data) + # difflib will produce something like: + # - hallo worlde + # ? ^ ^ + # + hello world + # ? ^ ^ + # Issues stored: "hallo worlde", "hello world" + self.assertIn(b"hallo worlde", response.data) + self.assertIn(b"hello world", response.data) + + def test_exercise_page_no_issues(self): + # Access exercise page with an empty session + with self.client.session_transaction() as sess: + sess['pronunciation_issues'] = [] + + response = self.client.get('/exercise') + self.assertEqual(response.status_code, 200) + self.assertIn(b"No specific pronunciation issues were identified", response.data) + +if __name__ == '__main__': + unittest.main(argv=['first-arg-is-ignored'], exit=False) # Add these for running in some environments diff --git a/pronunciation_practice_tool/uploads/.gitkeep b/pronunciation_practice_tool/uploads/.gitkeep new file mode 100644 index 0000000..f003fa2 --- /dev/null +++ b/pronunciation_practice_tool/uploads/.gitkeep @@ -0,0 +1 @@ +# This file is to ensure the directory is tracked by git.