Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pronunciation_practice_tool/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# This file makes pronunciation_practice_tool a Python package.
147 changes: 147 additions & 0 deletions pronunciation_practice_tool/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import os
from flask import Flask, request, jsonify, render_template, session # Added session
from werkzeug.utils import secure_filename
from google.cloud import speech
import difflib

app = Flask(__name__, template_folder='templates', static_folder='static')
app.secret_key = os.urandom(24) # Necessary for session management

UPLOAD_FOLDER = 'uploads'
ALLOWED_EXTENSIONS_TEXT = {'txt'}
ALLOWED_EXTENSIONS_AUDIO = {'mp3'}

app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

# Function to transcribe audio using Google Cloud Speech-to-Text
def transcribe_audio_gcp(audio_file_path):
client = speech.SpeechClient()

with open(audio_file_path, 'rb') as audio_file:
content = audio_file.read()

audio = speech.RecognitionAudio(content=content)
config = speech.RecognitionConfig(
language_code='en-US',
enable_automatic_punctuation=True
)

try:
response = client.recognize(config=config, audio=audio)
transcript = "".join(result.alternatives[0].transcript for result in response.results)
return transcript
except Exception as e:
print(f"Google Cloud Speech-to-Text Error: {e}")
if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"):
return "GCP STT Error: GOOGLE_APPLICATION_CREDENTIALS not set."
return f"GCP STT Error: {e}"


def allowed_file(filename, allowed_extensions):
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in allowed_extensions

@app.route('/')
def index():
return render_template('index.html')

@app.route('/upload', methods=['POST'])
def upload_files():
if 'text-file' not in request.files or 'audio-file' not in request.files:
session['pronunciation_issues'] = [] # Clear/initialize session on error
return jsonify({'error': 'Missing file part(s)'}), 400

text_file = request.files['text-file']
audio_file = request.files['audio-file']

if text_file.filename == '' or audio_file.filename == '':
session['pronunciation_issues'] = [] # Clear/initialize session on error
return jsonify({'error': 'No selected file(s)'}), 400

text_filename = ''
audio_filename = ''
uploaded_text_content = ""
transcript = ""
diff_output_list = []
pronunciation_issues = []

# Always initialize session variable at the start of processing
session['pronunciation_issues'] = []

if text_file and allowed_file(text_file.filename, ALLOWED_EXTENSIONS_TEXT):
text_filename = secure_filename(text_file.filename)
text_filepath = os.path.join(app.config['UPLOAD_FOLDER'], text_filename)
text_file.save(text_filepath)
try:
with open(text_filepath, 'r', encoding='utf-8') as f:
uploaded_text_content = f.read()
except Exception as e:
print(f"Error reading text file: {e}")
# uploaded_text_content will remain empty
else:
session['pronunciation_issues'] = [] # Clear/initialize session on error
return jsonify({'error': 'Invalid text file type. Please upload a .txt file.'}), 400

if audio_file and allowed_file(audio_file.filename, ALLOWED_EXTENSIONS_AUDIO):
audio_filename = secure_filename(audio_file.filename)
audio_filepath = os.path.join(app.config['UPLOAD_FOLDER'], audio_filename)
audio_file.save(audio_filepath)

transcript = transcribe_audio_gcp(audio_filepath)

if uploaded_text_content and transcript and not transcript.startswith("GCP STT Error:"):
original_lines = uploaded_text_content.splitlines()
transcript_lines = transcript.splitlines()

d = difflib.Differ()
diff = list(d.compare(original_lines, transcript_lines))
diff_output_list = list(diff) # Keep full diff for frontend display

# Process diff to extract pronunciation issues for session
for line in diff:
if line.startswith('- ') or line.startswith('+ '):
issue_text = line[2:].strip()
if issue_text:
pronunciation_issues.append(issue_text)

session['pronunciation_issues'] = pronunciation_issues

# If transcription failed or original text was empty, pronunciation_issues remains empty (as initialized)
# No specific else needed here as session['pronunciation_issues'] is already []

else:
# This block is for invalid audio file type
if text_filename and os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], text_filename)):
os.remove(os.path.join(app.config['UPLOAD_FOLDER'], text_filename))
# session['pronunciation_issues'] is already [] due to initialization at the start or after text file error
return jsonify({'error': 'Invalid audio file type. Please upload an .mp3 file.'}), 400

# Final check: if, for any reason, effective processing didn't happen
# (e.g. transcript error after successful file uploads but before diff)
# ensure issues are empty.
if not (uploaded_text_content and transcript and not transcript.startswith("GCP STT Error:")):
session['pronunciation_issues'] = []

return jsonify({
'message': 'Files processed successfully',
'text_file': text_filename,
'audio_file': audio_filename,
'text_content': uploaded_text_content,
'transcribed_text': transcript,
'differences': diff_output_list
}), 200

@app.route('/exercise')
def exercise_page():
issues = session.get('pronunciation_issues', [])
# The prompt mentions:
# if not issues: # If issues list is empty or not found
# # Maybe redirect to home or show a message that no issues were found for practice
# # For now, we'll pass the empty list and let the template handle it.
# pass
# This logic is handled by passing issues (which can be empty) to the template.
return render_template('exercise.html', issues=issues)

if __name__ == '__main__':
app.run(debug=True)
3 changes: 3 additions & 0 deletions pronunciation_practice_tool/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Flask>=2.0
google-cloud-speech>=2.0
werkzeug>=2.0
112 changes: 112 additions & 0 deletions pronunciation_practice_tool/static/app.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
document.getElementById('upload-form').addEventListener('submit', async function(event) {
event.preventDefault();
const textFile = document.getElementById('text-file').files[0];
const audioFile = document.getElementById('audio-file').files[0];
const resultsDiv = document.getElementById('results');
resultsDiv.innerHTML = ''; // Clear previous results

if (!textFile || !audioFile) {
alert('Please select both a TXT text file and an MP3 audio file.');
return;
}

const formData = new FormData();
formData.append('text-file', textFile);
formData.append('audio-file', audioFile);

try {
const response = await fetch('/upload', {
method: 'POST',
body: formData
});

const result = await response.json();

if (response.ok) {
let diffHtml = '';
if (result.differences && result.differences.length > 0) {
diffHtml = result.differences.map(line => {
let style = '';
let prefix = line.substring(0, 2);
let displayLine = line.substring(2); // Remove prefix for display

// Handle lines that might be shorter than 2 chars (e.g. empty lines in diff)
// This check was slightly different from my previous version.
if (line.length < 2) {
prefix = '';
displayLine = line;
}

if (prefix === '+ ') {
style = 'color: green; background-color: #e6ffe6; display: block; white-space: pre-wrap;';
} else if (prefix === '- ') {
style = 'color: red; background-color: #ffe6e6; display: block; white-space: pre-wrap;';
} else if (prefix === '? ') {
style = 'color: blue; background-color: #e6e6ff; display: block; white-space: pre-wrap;';
} else {
// Common lines (no prefix from difflib.Differ like ' ')
style = 'display: block; white-space: pre-wrap;';
}
// Escape HTML to prevent XSS
displayLine = displayLine.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
// My previous version returned prefix + displayLine, this one returns displayLine only for styled lines, but the prompt's image shows prefix.
// The prompt's textual JS example for map returns `<span style="${style}">${line}</span>` which is line *before* `substring(2)`.
// Reconciling: The prompt's JS example for the map function is: return `<span style="${style}">${line}</span>`;
// This implies 'line' *still contains* the prefix.
// However, the text description says "line = line.substring(2); // Remove prefix for display"
// Let's stick to the JS code block provided in the prompt, which uses the original `line` for the span content.
// This means the prefix will be part of the span, and the color will apply to it too.
// The prompt text description was: return `<span style="${style}">${line}</span>`; (where line was substringed)
// The prompt's code block was: return `<span style="${style}">${line}</span>`; (where line was NOT substringed for the span content)
// Let's use the version from the prompt's code block, which seems more complete.
// The provided code in the prompt is:
// line = line.substring(2); // Remove prefix for display
// ...
// return `<span style="${style}">${line}</span>`;
// This means `displayLine` should be used.
return `<span style="${style}">${displayLine}</span>`; // Using displayLine (line without prefix)
}).join('<br>'); // Using <br> as per prompt's JS
} else if (result.transcribed_text && !result.transcribed_text.startsWith("GCP STT Error:")) {
// If there are no differences, it means the texts are identical or one is empty.
if (result.text_content === result.transcribed_text) {
diffHtml = "<p>Texts are identical!</p>";
} else if (result.transcribed_text) { // Check if transcription was successful
diffHtml = "<p>No significant differences found by difflib. Texts may be very similar or one might be a subset of the other with no conflicting lines.</p>";
} else {
diffHtml = "<p>Comparison not performed (e.g., transcription failed or texts were empty).</p>";
}
} else {
diffHtml = "<p>Could not generate differences. Transcription might have failed or texts were empty.</p>"
}

resultsDiv.innerHTML = `<p><strong>${result.message}</strong></p>
<p>Text file: ${result.text_file}</p>
<p>Audio file: ${result.audio_file}</p>
<hr>
<h3>Original Uploaded Text:</h3>
<pre id="uploaded-text-content" style="white-space: pre-wrap;">${result.text_content}</pre>
<hr>
<h3>Transcribed Text from Audio:</h3>
<pre id="transcribed-text-content" style="white-space: pre-wrap;">${result.transcribed_text || "Transcription not available."}</pre>
<hr>
<h3>Text Comparison (Differences):</h3>
<div id="diff-output">${diffHtml}</div>`;

// Add link to exercise page
const exerciseLink = document.createElement('a');
exerciseLink.href = '/exercise';
exerciseLink.textContent = 'Go to Exercises';
exerciseLink.className = 'exercise-link-button'; // For styling
resultsDiv.appendChild(document.createElement('hr'));
resultsDiv.appendChild(exerciseLink);

console.log('Processing successful:', result);
} else {
resultsDiv.innerHTML = `<p style="color:red;">Error: ${result.error}</p>`;
console.error('Upload failed:', result);
}
} catch (error) {
resultsDiv.innerHTML = `<p style="color:red;">An unexpected error occurred: ${error.toString()}</p>`;
console.error('Network or unexpected error:', error);
}
});
12 changes: 12 additions & 0 deletions pronunciation_practice_tool/static/style.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/* style.css */
body { font-family: Arial, sans-serif; margin: 0; padding: 20px; background-color: #f4f4f4; color: #333; }
#upload-form div { margin-bottom: 15px; }
#upload-form label { display: block; margin-bottom: 5px; font-weight: bold; }
#upload-form input[type="file"], #upload-form button { padding: 10px; border-radius: 5px; border: 1px solid #ddd; }
#upload-form button { background-color: #007bff; color: white; cursor: pointer; }
#upload-form button:hover { background-color: #0056b3; }
#results { margin-top: 20px; padding: 15px; background-color: #fff; border: 1px solid #ddd; border-radius: 5px; }
#results pre { white-space: pre-wrap; word-wrap: break-word; background-color: #f9f9f9; padding: 10px; border: 1px solid #eee; border-radius: 4px;}
#diff-output span { display: block; padding: 2px 0; } /* Make each diff line take full width */
.exercise-link-button { display: inline-block; margin-top: 15px; padding: 10px 15px; background-color: #28a745; color: white; text-decoration: none; border-radius: 5px; }
.exercise-link-button:hover { background-color: #218838; }
39 changes: 39 additions & 0 deletions pronunciation_practice_tool/templates/exercise.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Pronunciation Exercises</title>
<!-- Link to the same style.css or a new one if preferred -->
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
<style>
body { font-family: sans-serif; margin: 20px; }
h1 { color: #333; }
ul { list-style-type: disc; margin-left: 20px; }
li { margin-bottom: 10px; padding: 5px; background-color: #f9f9f9; border-left: 3px solid #007bff; }
.no-issues { color: #555; font-style: italic; }
a { color: #007bff; text-decoration: none; }
a:hover { text-decoration: underline; }
.container { max-width: 800px; margin: auto; padding: 20px; background-color: #fff; border-radius: 8px; box-shadow: 0 0 10px rgba(0,0,0,0.1); }
.home-link { display: inline-block; margin-top: 20px; padding: 10px 15px; background-color: #007bff; color: white; border-radius: 5px; }
</style>
</head>
<body>
<div class="container">
<h1>Targeted Pronunciation Practice</h1>

{% if issues %}
<p>Here are some words or phrases identified for your practice based on your last reading:</p>
<ul>
{% for issue in issues %}
<li>{{ issue }}</li>
{% endfor %}
</ul>
{% else %}
<p class="no-issues">No specific pronunciation issues were identified from your last session, or you haven't uploaded a file yet. Try uploading a text and audio file!</p>
{% endif %}

<a href="{{ url_for('index') }}" class="home-link">Upload New Files</a>
</div>
</body>
</html>
27 changes: 27 additions & 0 deletions pronunciation_practice_tool/templates/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Pronunciation Practice</title>
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
</head>
<body>
<h1>Upload Your Reading</h1>
<form id="upload-form">
<div>
<label for="text-file">Upload English Text (TXT):</label>
<input type="file" id="text-file" name="text-file" accept=".txt" required>
</div>
<div>
<label for="audio-file">Upload Your Reading Audio (MP3):</label>
<input type="file" id="audio-file" name="audio-file" accept=".mp3" required>
</div>
<button type="submit">Upload and Analyze</button>
</form>
<div id="results">
<!-- Results will be displayed here -->
</div>
<script src="{{ url_for('static', filename='app.js') }}"></script>
</body>
</html>
1 change: 1 addition & 0 deletions pronunciation_practice_tool/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# This file makes tests a Python package.
Loading