Skip to content

Load files into transformers via Celery #1131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions servicex_app/boot.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,10 @@ else
FLASK_APP=servicex_app/app.py flask db upgrade;
fi
[ -d "/default_users" ] && python3 servicex/cli/create_default_users.py

celery --broker="$RABBIT_MQ_URL" -A servicex_app.celery.server_tasks worker \
--loglevel=info \
--concurrency=5 &

exec gunicorn -b [::]:5000 $RELOAD --workers=5 --threads=1 --timeout 120 --log-level=warning --access-logfile /tmp/gunicorn.log --error-logfile - "servicex_app:create_app()"
# to log requests to stdout --access-logfile -
82 changes: 82 additions & 0 deletions servicex_app/servicex_app/celery/server_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Copyright (c) 2025, IRIS-HEP
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from celery import Celery
from celery.utils.log import get_task_logger
from ..celery_task_router import route_task
import os

app = Celery("servicex.celery", broker=os.environ["RABBIT_MQ_URL"])
app.conf.task_routes = (route_task,)

ADVERTISED_ENDPOINT = f"http://{os.environ['INSTANCE_NAME']}-servicex-app:8000/"

logger = get_task_logger(__name__)


def celery_task_name(request_id):
return f"transformer-{request_id}.transform_file"


@app.task
def add_files_to_processing_queue(request, files):
from ..models import TransformStatus

if TransformStatus.status_from_string(request["status"]).is_complete:
logger.debug(
"Rejecting file addition request, request is canceled",
extra={"task_id": celery_task_name(request["request_id"])},
)
return
for file_record in files:
app.send_task(
"transformer_sidecar.transform_file",
kwargs={
"request_id": request["request_id"],
"file_id": file_record["id"],
"paths": file_record["paths"].split(","),
"service_endpoint": ADVERTISED_ENDPOINT
+ "servicex/internal/transformation/"
+ request["request_id"],
"result_destination": request["result-destination"],
"result_format": request["result-format"],
},
)

logger.info(
"Added file to processing queue",
extra={
"paths": file_record["paths"].split(","),
"task_id": celery_task_name(request["request_id"]),
},
)

logger.info(
"Added files to processing queue",
extra={"num_files": len(files), "requestId": request["request_id"]},
)
9 changes: 5 additions & 4 deletions servicex_app/servicex_app/dataset_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from servicex_app.did_parser import DIDParser
from servicex_app.lookup_result_processor import LookupResultProcessor
from servicex_app.models import Dataset, DatasetFile, TransformRequest, DatasetStatus
from servicex_app.celery.server_tasks import add_files_to_processing_queue


class DatasetManager:
Expand Down Expand Up @@ -177,8 +178,8 @@ def publish_files(
self, request: TransformRequest, lookup_result_processor: LookupResultProcessor
) -> None:
request.files = len(self.dataset.files)
lookup_result_processor.add_files_to_processing_queue(
request, files=[file for file in self.dataset.files]
add_files_to_processing_queue.delay(
request.to_json(), files=[file.to_json() for file in self.dataset.files]
)

def add_files(
Expand All @@ -192,7 +193,7 @@ def add_files(

for request in requests:
request.files += len(files)
lookup_result_processor.add_files_to_processing_queue(
request, files=[file for file in files]
add_files_to_processing_queue.delay(
request.to_json(), files=[file.to_json() for file in files]
)
request.save_to_db()
7 changes: 7 additions & 0 deletions servicex_app/servicex_app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,13 @@ def __init__(self, string_name, is_complete):
self.string_name = string_name
self.is_complete = is_complete

@classmethod
def status_from_string(cls, string_name: str):
for typ in cls:
if typ.value[0] == string_name:
return typ
raise RuntimeError(f"No TransformStatus corresponding to {string_name}")


class TransformRequest(db.Model):
__tablename__ = "requests"
Expand Down
3 changes: 1 addition & 2 deletions servicex_app/servicex_app_test/web/test_auth_callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ def test_auth_callback_incoming_new(
response: Response = client.get(
url_for("auth_callback"), query_string={"code": "oauth-code"}
)
mock_oauth.authorize_redirect.assert_not_called()
mock_user_model.find_by_email.assert_called_once()
oauth_client.authorize_access_token.assert_called_once()
id_token = self._id_token()
assert mock_session.get("is_authenticated")
assert mock_session.get("name") == id_token["name"]
Expand Down
Loading