diff --git a/.envrc.sample b/.envrc.sample index 623037a..6ae45df 100644 --- a/.envrc.sample +++ b/.envrc.sample @@ -7,6 +7,9 @@ export AWS_REGION="us-east-1" export S3_ENDPOINT=http://docker.for.mac.localhost:9000 # Do not set if connecting to the real AWS S3 bucket export S3_BUCKET_NAME=pdf_accessibility_api # Default value for MinIO in the docker compose environment +# Alt Text Gem Info +export LLM_MODEL=default + #--------------------------------- # The below configurations are not # needed if using docker compose diff --git a/Dockerfile b/Dockerfile index 8fe3c1d..9fa8219 100644 --- a/Dockerfile +++ b/Dockerfile @@ -71,6 +71,7 @@ RUN RAILS_ENV=production \ AWS_ACCESS_KEY_ID=key \ AWS_SECRET_ACCESS_KEY=secret \ AWS_REGION=us-east-1 \ + LLM_MODEL=default \ bundle exec rails assets:precompile && \ rm -rf /app/.cache/ && \ rm -rf /app/node_modules/.cache/ && \ diff --git a/README.md b/README.md index 0eb24ed..0aae746 100644 --- a/README.md +++ b/README.md @@ -16,9 +16,9 @@ At its core, the PDF Accessibility API is an interface to an S3 bucket with: - an input directory, where the API places files to be processed by the PDF_Accessibility application - an output directory, where the PDF_Accessibility application places the processed files to be retrieved -The PDF Accessibility API acts as an intermediary to send and retrieve those files for clients. It has two major components: the API and the GUI. +The PDF Accessibility API acts as an intermediary to send and retrieve those files for clients. It has two major components: the API and the GUI. Additionally, there is the option to only generate alt-text for a given image. This option is currently only available through a GUI. -## API +## PDF Remediation – API Refer to the Swagger documentation for endpoint and webhook details at `/api-docs`. @@ -29,15 +29,21 @@ We use an `APIUser` model to store metadata for our API users and their associat - The client's `webhook_key` for authenticating with the client system when the final webhook request is sent. - An `email` and `name` to help identify the user. -## GUI +## PDF Remediation - GUI -The GUI is still a work in progress, but its main components are: +The PDF Remediation GUI's main components are: -- `/jobs` — a list of your jobs. -- `/jobs/new` — the page for uploading a file to remediate. -- `/jobs/{id}` — detailed information about a job (linked from `/jobs`). +- `/pdf_jobs` — a list of your jobs. +- `/pdf_jobs/new` — the page for uploading a file to remediate. +- `/pdf_jobs/{id}` — detailed information about a job (linked from `/pdf_jobs`). - `/sidekiq` — Sidekiq interface. +## Image Alt Text - GUI +There is also a standalone GUI just for images. This is for users who just want to generate alt-text for an image without going through the full - and pricy - PDF remediation process. +- `/image_jobs` — a list of image jobs, their links, and their status. +- `/image_jobs/new` — the upload page for a new image +- `/image_jobs/{id}` — detailed information about an image, including any generated alt-text. + ### Authentication and Authorization - The application uses a remote user header (default: `HTTP_X_AUTH_REQUEST_EMAIL`) to determine the current user, typically set by Azure. diff --git a/app/controllers/image_jobs_controller.rb b/app/controllers/image_jobs_controller.rb index f1e1bcd..cebc9d5 100644 --- a/app/controllers/image_jobs_controller.rb +++ b/app/controllers/image_jobs_controller.rb @@ -15,15 +15,18 @@ def new end def create - uploaded_io = params[:image] - object_key = "#{SecureRandom.hex(8)}_#{uploaded_io.original_filename}" + uploads_tmp_dir = Rails.root.join('tmp/uploads') + uploaded_file = params[:image] + object_key = "#{SecureRandom.uuid}_#{uploaded_file.original_filename}" + tmp_path = uploads_tmp_dir.join(object_key).to_s + File.binwrite(tmp_path, uploaded_file.read) job = current_user.image_jobs.build - job.output_object_key = object_key + job.output_object_key = uploaded_file.original_filename job.status = 'processing' job.uuid = SecureRandom.uuid job.save! - ImageAltTextJob.perform_later(job.uuid, uploaded_io.to_json) + ImageAltTextJob.perform_later(job.uuid, tmp_path) render json: { 'jobId' => job.id } end end diff --git a/app/javascript/controllers/job_controller.js b/app/javascript/controllers/job_controller.js index 3df719b..6d8438e 100644 --- a/app/javascript/controllers/job_controller.js +++ b/app/javascript/controllers/job_controller.js @@ -6,6 +6,7 @@ export default class extends Controller { static targets = ['outputObjectKey', 'status', 'finishedAt', + 'altText', 'downloadLink', 'processingErrorMessage']; @@ -32,6 +33,7 @@ export default class extends Controller { this.data.set('outputUrl', data.output_url || '') this.data.set('outputUrlExpired', data.output_url_expired || 'false') this.data.set('processingErrorMessage', data.processing_error_message || '') + this.data.set('altText', data.alt_text || '') this.renderResult() } diff --git a/app/jobs/api_remediation_job.rb b/app/jobs/api_remediation_job.rb index 4addb7a..cfc059e 100644 --- a/app/jobs/api_remediation_job.rb +++ b/app/jobs/api_remediation_job.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true class APIRemediationJob < ApplicationJob - include RemediationModule + include AppJobModule def perform(job_uuid, output_polling_timeout: OUTPUT_POLLING_TIMEOUT) job = PdfJob.find_by!(uuid: job_uuid) @@ -13,10 +13,10 @@ def perform(job_uuid, output_polling_timeout: OUTPUT_POLLING_TIMEOUT) s3.upload_to_input(file_path) poll_and_update(job_uuid, object_key, output_polling_timeout) rescue S3Handler::Error => e - record_failure_and_notify(job, "Failed to upload file to remediation input location: #{e.message}") + update_with_failure(job, "Failed to upload file to remediation input location: #{e.message}") rescue Down::Error => e # We may want to retry the download depending on the more specific nature of the failure. - record_failure_and_notify(job, "Failed to download file from source URL: #{e.message}") + update_with_failure(job, "Failed to download file from source URL: #{e.message}") ensure RemediationStatusNotificationJob.perform_later(job_uuid) tempfile&.close! diff --git a/app/jobs/concerns/remediation_module.rb b/app/jobs/concerns/app_job_module.rb similarity index 83% rename from app/jobs/concerns/remediation_module.rb rename to app/jobs/concerns/app_job_module.rb index f30ec47..a183109 100644 --- a/app/jobs/concerns/remediation_module.rb +++ b/app/jobs/concerns/app_job_module.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -module RemediationModule +module AppJobModule OUTPUT_POLLING_INTERVAL = 10 # This value was picked somewhat arbitrarily. We may want to adjust. OUTPUT_POLLING_TIMEOUT = 3600 # The default 1-hour timeout is also arbitrary and should probably be adjusted. PRESIGNED_URL_EXPIRES_IN = 84_000 @@ -15,14 +15,14 @@ def poll_and_update(job_uuid, object_key, output_polling_timeout) timer += OUTPUT_POLLING_INTERVAL if timer > output_polling_timeout - record_failure_and_notify(job, 'Timed out waiting for output file') + update_with_failure(job, 'Timed out waiting for output file') return true end end update_job(job, output_url, object_key) rescue S3Handler::Error => e # We may want to retry the upload depending on the more specific nature of the failure. - record_failure_and_notify(job, "Failed to upload file to remediation input location: #{e.message}") + update_with_failure(job, "Failed to upload file to remediation input location: #{e.message}") end private @@ -37,7 +37,7 @@ def update_job(job, output_url, object_key) ) end - def record_failure_and_notify(job, message) + def update_with_failure(job, message) job.update( status: 'failed', finished_at: Time.zone.now, diff --git a/app/jobs/gui_remediation_job.rb b/app/jobs/gui_remediation_job.rb index d3bf2d3..b021b11 100644 --- a/app/jobs/gui_remediation_job.rb +++ b/app/jobs/gui_remediation_job.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true class GUIRemediationJob < ApplicationJob - include RemediationModule + include AppJobModule def perform(job_uuid, object_key, output_polling_timeout: OUTPUT_POLLING_TIMEOUT) poll_and_update(job_uuid, object_key, output_polling_timeout) diff --git a/app/jobs/image_alt_text_job.rb b/app/jobs/image_alt_text_job.rb index fc22bcd..ff61957 100644 --- a/app/jobs/image_alt_text_job.rb +++ b/app/jobs/image_alt_text_job.rb @@ -1,11 +1,28 @@ # frozen_string_literal: true class ImageAltTextJob < ApplicationJob - def perform(job_uuid, uploaded_io, output_polling_timeout: OUTPUT_POLLING_TIMEOUT) - # To be implemented in #159 - # Open the file file in a temp/uploads path - # Call AltTextGem with path, prompt, llm_model - # Poll and reroute - # File.delete(tmp_path) if File.exist?(tmp_path) + include AppJobModule + + def perform(job_uuid, tmp_path) + client = AltText::Client.new( + access_key: ENV.fetch('AWS_ACCESS_KEY_ID', nil), + secret_key: ENV.fetch('AWS_SECRET_ACCESS_KEY', nil), + region: ENV.fetch('AWS_REGION', 'us-east-1') + ) + job = Job.find_by!(uuid: job_uuid) + alt_text = client.process_image( + tmp_path, + prompt: Rails.root.join('prompt.txt').read, + model_id: ENV.fetch('LLM_MODEL', 'nil') + ) + job.update( + status: 'completed', + finished_at: Time.zone.now, + alt_text: alt_text + ) + rescue StandardError => e + update_with_failure(job, e.message) + ensure + FileUtils.rm_f(tmp_path) end end diff --git a/app/models/upload_form.rb b/app/models/upload_form.rb deleted file mode 100644 index 4ac2e9f..0000000 --- a/app/models/upload_form.rb +++ /dev/null @@ -1,21 +0,0 @@ -# frozen_string_literal: true - -class UploadForm - UPLOADS_TMP_DIR = Rails.root.join('tmp/uploads') - - include ActiveModel::Validations - include ActiveModel::Model - - attr_accessor :file - - validates :file, presence: true - - def persist_to_tmp! - return unless valid? - - tmp_path = UPLOADS_TMP_DIR.join("#{SecureRandom.uuid}_#{file.original_filename}") - File.binwrite(tmp_path, file.read) - file.rewind if file.respond_to?(:rewind) - tmp_path.to_s - end -end diff --git a/app/views/image_jobs/show.html.erb b/app/views/image_jobs/show.html.erb index 5c16f8c..1d90244 100644 --- a/app/views/image_jobs/show.html.erb +++ b/app/views/image_jobs/show.html.erb @@ -10,12 +10,12 @@ data-job-alt-text="<%= @image_job.alt_text %>" data-job-processing-error-message="<%= @image_job.processing_error_message %>" id="<%= dom_id @image_job %>"> -
  • File Name: <%= @image_job.output_object_key %>
  • +
  • File Name: <%= @image_job.output_object_key %>
  • Job ID: <%= @image_job.uuid %>
  • Started At: <%= @image_job.created_at.getlocal.strftime("%b %e, %Y %l:%M %p") if @image_job.created_at %>
  • Finished At:
  • Status:
  • -
  • Alt Text:
  • +
  • Alt Text: <%= @image_job.alt_text %>
  • Errors:
  • diff --git a/docker-compose.yml b/docker-compose.yml index 1abcb86..7b35d0a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,6 +12,7 @@ x-web_env: &web_env AWS_SECRET_ACCESS_KEY: "${AWS_SECRET_ACCESS_KEY}" S3_BUCKET_NAME: "${S3_BUCKET_NAME}" S3_ENDPOINT: "${S3_ENDPOINT}" + LLM_MODEL: "${LLM_MODEL}" services: web: user: 3000:3000 diff --git a/prompt.txt b/prompt.txt new file mode 100644 index 0000000..b5b0af1 --- /dev/null +++ b/prompt.txt @@ -0,0 +1,22 @@ +You are a professional technical writer trained in crafting image alternative text for accessibility purposes. Your task is to describe the visual content of uploaded images in clear, concise, and objective language that supports screen reader users. Describe the visual content of the image in one concise, objective sentence (≤100 characters). +When given an image, follow these guidelines to create appropriate and effective alternative text descriptions. +1. Technical guidelines: + - Descriptions should not exceed 100 characters. + - Use precise, simple language and clear terminology. + - Use bias-free language: avoid assumptions about gender, ability, race, or age. +2. Language guidelines: + - Do not use ambiguous adjectives (e.g., “tranquil,” “vintage,” “rural”). + - Do not use subjective adjectives (e.g., "traditional," "rustic") + - Avoid assumptions or guessing unclear elements. + - Do not include “image of,” “photo of,” or similar phrases. + - Focus only on the visible, essential elements in the image. +3. Output guidelines: + - Assume one image per input and respond with one alt text string. +Please see the examples provided to help guide your description structure. + +"A lighthouse on a rocky coast under a cloudy sky." +"Three people walking along a path surrounded by green trees." +“A person wearing a beret and glasses eating from a bowl.” + +Generate the alt text description following these rules. + diff --git a/spec/jobs/api_remediation_job_spec.rb b/spec/jobs/api_remediation_job_spec.rb index ca9167f..6258c4f 100644 --- a/spec/jobs/api_remediation_job_spec.rb +++ b/spec/jobs/api_remediation_job_spec.rb @@ -48,7 +48,7 @@ class Tempfile expect(reloaded_job.finished_at).to be_within(1.minute).of(Time.zone.now) expect(reloaded_job.output_object_key).to match /[a-f0-9]{16}_file\.pdf/ expect(reloaded_job.output_url_expires_at).to be_within(1.minute) - .of(RemediationModule::PRESIGNED_URL_EXPIRES_IN.seconds.from_now) + .of(AppJobModule::PRESIGNED_URL_EXPIRES_IN.seconds.from_now) end it 'queues up a notification about the status of the job' do diff --git a/spec/jobs/gui_remediation_job_spec.rb b/spec/jobs/gui_remediation_job_spec.rb index 04fd347..c706543 100644 --- a/spec/jobs/gui_remediation_job_spec.rb +++ b/spec/jobs/gui_remediation_job_spec.rb @@ -37,7 +37,7 @@ expect(reloaded_job.finished_at).to be_within(1.minute).of(Time.zone.now) expect(reloaded_job.output_object_key).to match /[a-f0-9]{16}_testing\.pdf/ expect(reloaded_job.output_url_expires_at).to be_within(1.minute) - .of(RemediationModule::PRESIGNED_URL_EXPIRES_IN.seconds.from_now) + .of(AppJobModule::PRESIGNED_URL_EXPIRES_IN.seconds.from_now) end it 'does not queue up a notification about the status of the job' do diff --git a/spec/jobs/image_alt_text_job_spec.rb b/spec/jobs/image_alt_text_job_spec.rb new file mode 100644 index 0000000..ef91753 --- /dev/null +++ b/spec/jobs/image_alt_text_job_spec.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe ImageAltTextJob do + let!(:job) { create(:image_job) } + let!(:alt_text_response) { 'Generated Alt-text' } + let(:alt_text_gem) { + instance_spy( + AltText::Client, + process_image: alt_text_response + ) + } + let!(:file_path) { Rack::Test::UploadedFile.new(File.new("#{Rails.root}/spec/fixtures/files/lion.jpg"), + 'image/jpg', + original_filename: 'lion.jpg').path } + + before do + allow(AltText::Client).to receive(:new).and_return alt_text_gem + end + + describe '#perform' do + context 'when the job is called with job uuid and file' do + before do + described_class.perform_now(job.uuid, file_path) + end + + it 'calls the Alt Text gem' do + expect(alt_text_gem).to have_received(:process_image).with( + /.+\.jpg/, prompt: File.read('prompt.txt'), model_id: ENV.fetch('LLM_MODEL', nil) + ) + end + + it 'updates the alt_text of the given image job record' do + reloaded_job = job.reload + expect(reloaded_job.status).to eq 'completed' + expect(reloaded_job.finished_at).to be_within(1.minute).of(Time.zone.now) + expect(job.reload.alt_text).to eq(alt_text_response) + end + end + + context 'when an error occurs while uploading the image file' do + before do + allow(alt_text_gem).to receive(:process_image).and_raise(StandardError) + end + + it 'updates the status and metadata of the given image job record' do + described_class.perform_now(job.uuid, file_path) + reloaded_job = job.reload + expect(reloaded_job.status).to eq 'failed' + expect(reloaded_job.finished_at).to be_within(1.minute).of(Time.zone.now) + expect(reloaded_job.processing_error_message).to eq 'StandardError' + expect(job.reload.alt_text).to be_nil + end + end + end +end diff --git a/spec/models/upload_form_spec.rb b/spec/models/upload_form_spec.rb deleted file mode 100644 index 9f70fb1..0000000 --- a/spec/models/upload_form_spec.rb +++ /dev/null @@ -1,33 +0,0 @@ -# frozen_string_literal: true - -require 'rails_helper' - -RSpec.describe UploadForm do - it { is_expected.to validate_presence_of :file } - - describe '#persist_to_tmp!' do - let(:form) { described_class.new(file: file) } - let(:persisted_path) { form.persist_to_tmp! } - - context 'when a file is provided' do - let(:file) { fixture_file_upload('testing.pdf', 'application/pdf') } - - it 'returns a path to the uploaded file' do - expect(persisted_path).to match(%r{/app/tmp/uploads/[\w-]+_testing\.pdf}) - end - - it 'writes the file to the tmp directory' do - expect(File.exist?(persisted_path)).to be true - expect(File.binread(persisted_path)).to eq(file.read) - end - end - - context 'when no file is provided' do - let(:file) { nil } - - it 'returns nil' do - expect(persisted_path).to be_nil - end - end - end -end diff --git a/spec/requests/image_jobs/image_jobs_spec.rb b/spec/requests/image_jobs/image_jobs_spec.rb index 0364151..feb1d46 100644 --- a/spec/requests/image_jobs/image_jobs_spec.rb +++ b/spec/requests/image_jobs/image_jobs_spec.rb @@ -43,7 +43,7 @@ post( '/image_jobs', headers: valid_headers, params: { image: file_upload } ) - expect(ImageAltTextJob).to have_received(:perform_later) + expect(ImageAltTextJob).to have_received(:perform_later).with(gui_user.jobs.last.uuid, /.+lion\.jpg/) end it 'returns valid JSON' do diff --git a/spec/support/minio_helper.rb b/spec/support/minio_helper.rb index 913719f..1fa6f19 100644 --- a/spec/support/minio_helper.rb +++ b/spec/support/minio_helper.rb @@ -7,7 +7,7 @@ def with_minio_env(&) S3_BUCKET_NAME: 'pdf_accessibility_api', AWS_ACCESS_KEY_ID: 'pdf_accessibility_api', AWS_SECRET_ACCESS_KEY: 'pdf_accessibility_api', - AWS_REGION: 'foober' + AWS_REGION: 'us-east-1' }, &) end end