diff --git a/.github/workflows/spring-batch-s3.yml b/.github/workflows/spring-batch-s3.yml new file mode 100644 index 00000000..f972321b --- /dev/null +++ b/.github/workflows/spring-batch-s3.yml @@ -0,0 +1,27 @@ +name: Spring Batch S3 + +on: + pull_request: + paths: + - 'spring-batch-s3/**' + push: + paths: + - 'spring-batch-s3/**' + +env: + MAVEN_ARGS: -B -V -ntp -e -Djansi.passthrough=true -Dstyle.color=always + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Java + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: '21' + cache: 'maven' + - name: Build with Maven + run: ./mvnw $MAVEN_ARGS verify javadoc:javadoc + working-directory: spring-batch-s3 diff --git a/README.md b/README.md index 0bab8ea8..f3d786db 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ This project is part of the [Spring organization][] on GitHub. | [`spring-batch-geode`](spring-batch-geode) | Support for [Apache Geode] | TBA | [![Maven Central](https://img.shields.io/maven-central/v/org.springframework.batch.extensions/spring-batch-geode?label)](https://central.sonatype.com/artifact/org.springframework.batch.extensions/spring-batch-geode) | [![Spring Batch Geode](https://github.com/spring-projects/spring-batch-extensions/actions/workflows/spring-batch-geode.yml/badge.svg)](https://github.com/spring-projects/spring-batch-extensions/actions/workflows/spring-batch-geode.yml?query=branch%3Amain) | | [`spring-batch-neo4j`](spring-batch-neo4j) | Support for [Neo4j] | [@michael-simons](https://github.com/michael-simons) | [![Maven Central](https://img.shields.io/maven-central/v/org.springframework.batch.extensions/spring-batch-neo4j?label)](https://central.sonatype.com/artifact/org.springframework.batch.extensions/spring-batch-neo4j) | [![Spring Batch Neo4j](https://github.com/spring-projects/spring-batch-extensions/actions/workflows/spring-batch-neo4j.yml/badge.svg)](https://github.com/spring-projects/spring-batch-extensions/actions/workflows/spring-batch-neo4j.yml?query=branch%3Amain) | | [`spring-batch-notion`](spring-batch-notion) | Support for [Notion] | [@scordio](https://github.com/scordio) | [![Maven Central](https://img.shields.io/maven-central/v/org.springframework.batch.extensions/spring-batch-notion?label)](https://central.sonatype.com/artifact/org.springframework.batch.extensions/spring-batch-notion) | [![Spring Batch Notion](https://github.com/spring-projects/spring-batch-extensions/actions/workflows/spring-batch-notion.yml/badge.svg?branch=main)](https://github.com/spring-projects/spring-batch-extensions/actions/workflows/spring-batch-notion.yml?query=branch%3Amain) | +| [`spring-batch-s3`](spring-batch-s3) | Support for [Amazon S3] | [@andreacioni](https://github.com/andreacioni) | [![Maven Central](https://img.shields.io/maven-central/v/org.springframework.batch.extensions/spring-batch-s3?label)](https://central.sonatype.com/artifact/org.springframework.batch.extensions/spring-batch-s3) | [![Spring Batch S3](https://github.com/spring-projects/spring-batch-extensions/actions/workflows/spring-batch-s3.yml/badge.svg?branch=main)](https://github.com/spring-projects/spring-batch-extensions/actions/workflows/spring-batch-s3.yml?query=branch%3Amain) | ## Getting support @@ -101,6 +102,7 @@ noted differently for individual extension Modules, but this should be the rare **We look forward to your contributions!!** +[Amazon S3]: https://aws.amazon.com/s3/ [Apache Geode]: https://geode.apache.org [Apache License]: https://www.apache.org/licenses/LICENSE-2.0 [Contributor Guidelines]: CONTRIBUTING.md diff --git a/spring-batch-s3/.editorconfig b/spring-batch-s3/.editorconfig new file mode 100644 index 00000000..afe87677 --- /dev/null +++ b/spring-batch-s3/.editorconfig @@ -0,0 +1,13 @@ +root = true + +# Unix-style newlines with a newline ending every file +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true + +# Maven POM code convention +[pom.xml] +indent_size = 2 +indent_style = space +max_line_length = 205 diff --git a/spring-batch-s3/.gitignore b/spring-batch-s3/.gitignore new file mode 100644 index 00000000..5a71b37d --- /dev/null +++ b/spring-batch-s3/.gitignore @@ -0,0 +1,32 @@ +### Maven ### +target/ +.mvn/wrapper/maven-wrapper.jar +.flattened-pom.xml + +### STS ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache + +### IntelliJ IDEA ### +.idea +*.iws +*.iml +*.ipr + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ +build/ +!**/src/main/**/build/ +!**/src/test/**/build/ + +### VS Code ### +.vscode/ diff --git a/spring-batch-s3/.mvn/wrapper/maven-wrapper.properties b/spring-batch-s3/.mvn/wrapper/maven-wrapper.properties new file mode 100644 index 00000000..f95f1ee8 --- /dev/null +++ b/spring-batch-s3/.mvn/wrapper/maven-wrapper.properties @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +wrapperVersion=3.3.2 +distributionType=only-script +distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.8/apache-maven-3.9.8-bin.zip diff --git a/spring-batch-s3/README.adoc b/spring-batch-s3/README.adoc new file mode 100644 index 00000000..1ee1fee3 --- /dev/null +++ b/spring-batch-s3/README.adoc @@ -0,0 +1,157 @@ += spring-batch-s3 +:toc: +:icons: font +:source-highlighter: highlightjs + +https://spring.io/projects/spring-batch[Spring Batch] extension for https://aws.amazon.com/it/s3/[Amazon S3] which contains `S3ItemReader` and `S3ItemWriter` implementations for reading from and writing to S3 buckets, including support for multipart uploads. Other S3-compatible technologies (like as Google Cloud Storage, MinIO, etc.) may be supported, but they are not guaranteed to work. + +*Note*: these writers are based on the *AWS SDK V2*. + +== Installation + +To use the `spring-batch-s3` extension, you need to add the following dependency to your Maven or Gradle project: + +=== Maven + +[source,xml] +---- + + org.springframework.batch.extensions + spring-batch-s3 + ${spring-batch-extensions.version} + +---- + +=== Gradle + +[source,groovy] +---- +implementation 'org.springframework.batch.extensions:spring-batch-s3:${springBatchExtensionsVersion}' +---- + +== Known limitations + +* The `S3ItemReader` and `S3ItemWriter` are designed to work with the synchronous AWS S3 client (`S3Client`). They do not support the asynchronous client (`S3AsyncClient`) at this time. + +== Pre-requisites + +In order to set up these components you need to provide some additional beans in your Spring Batch configuration: + +* An `S3Client` bean to interact with AWS S3. +* In case you want to use the `S3ItemReader`: an instance of `S3Deserializer` for the data you want to read. +* In case you want to use the `S3ItemWriter`: an instance of `S3Serializer` for the data you want to write. + +There are two examples of implementation for both `S3Serializer` and `S3Deserializer` provided in this project: + +* `S3StringSerializer`: take a `String` as input and writes it to S3 as a UTF-8 encoded byte array. The write functions add a line termination character at the end of each string. +* `S3StringDeserializer`: takes a UTF-8 encoded byte array from S3 and converts it to a `String`. The implementation of this deserializer is *stateful* because lines may arrive in different chunks. + +More details in the JavaDocs of the classes. + +=== Configuration of the `S3Client` + +To use the `S3ItemReader` and `S3ItemWriter`, you need to configure the AWS S3 client by providing a `S3Client`. Checkout the https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/http-configuration-apache.html[AWS SDK for Java] documentation for more details on how to configure this. + +=== Configure `S3Serializer` + +`S3StringSerializer` is a simple implementation of `S3Serializer` that takes a `String` as input and writes it to S3 as a UTF-8 encoded byte array. You are encouraged to implement your own serializer if you need to handle different data types or formats. + +==== Java Config + +[source,java] +---- +@Bean +S3Serializer s3Serializer() { + return new S3StringSerializer(); +} +---- + +=== Configure `S3Deserializer` + +Similarly, `S3StringDeserializer` is a simple implementation of `S3Deserializer` that takes a UTF-8 encoded byte array from S3 and converts it to a `String`. You can implement your own deserializer if you need to handle different data types or formats. + +In case you don't want to implement your serializer checkout the "Alternatives readers" section below. + +==== Java Config + +[source,java] +---- +@Bean +S3Deserializer s3Deserializer() { + return new S3StringDeserializer(); +} +---- + +== Configuration of `S3ItemReader` + +Given the `S3Client` and `S3Deserializer` beans, you can now configure the `S3ItemReader`. + +=== Java Config + +To configure the `S3ItemReader`, you need to set up the AWS S3 client and specify the bucket and object key from which you want to read data. +[source,java] +---- +@Bean +ItemReader downloadItemReader() throws Exception { + return new S3ItemReader.Builder() + .s3Client(s3Client()) + .bucketName("bucket_name") + .objectKey("object_key") + .deserializer(s3Deserializer()) + .bufferSize((int) DataSize.ofMegabytes(1).toBytes()) // Default 128 Bytes + .build(); +} +---- + +There is also an additional option to set the `bufferSize` which is the size of the buffer used to read data from S3. The default value is 128 bytes, but you can increase it to improve memory consumption The bast value for this parameter is the average length of the lines in your file. + +=== Alternative reader + +Instead `S3ItemReader` you can also use `FlatFileItemReader` with `InputStreamResources` to read files from S3 as well. +To do so this package exposes a `S3InputStreamResource` that can be used for that purpose. Below an example: + +[source,java] +---- +@Bean +ItemReader itemReader() throws Exception { + final var inputStreamResource = new InputStreamResource( + new S3InputStream(s3Client(), + "bucket_name", + "object_key")); + + return new FlatFileItemReaderBuilder() + .name("itemReader") + .resource(inputStreamResource) + .lineMapper(new PassThroughLineMapper( )) + .build(); +} +---- + +== Configuration of `S3ItemWriter` + +Given the `S3Client` and `S3Serializer` beans, you can now configure the `S3ItemWriter`. + +=== Java Config + +To configure the `S3ItemWriter`, you need to set up the AWS S3 client and specify the bucket and object key to which you want to write data. +[source,java] +---- +@Bean +ItemWriter uploadItemWriter() throws IOException { + return new S3ItemWriter.Builder() + .s3Client(s3Client()) + .bucketName("bucket_name") + .objectKey("object_key") + .multipartUpload(true) // Default is false + .partSize((int) DataSize.ofMegabytes(10).toBytes()) // Default is 5 MB + .contentType("text/csv") // Default is application/octet-stream + .serializer(s3Serializer()) + .build(); +} +---- + +There are several additional options you can set for the `S3ItemWriter`: + +* `multipartUpload`: If set to `true`, the writer will use multipart upload for large files. The default is `false`. +* `partSize`: The size of each part in a multipart upload. The default is 5 MB. +* `contentType`: The content type of the uploaded file. The default is `application/octet-stream`. \ No newline at end of file diff --git a/spring-batch-s3/mvnw b/spring-batch-s3/mvnw new file mode 100755 index 00000000..19529ddf --- /dev/null +++ b/spring-batch-s3/mvnw @@ -0,0 +1,259 @@ +#!/bin/sh +# ---------------------------------------------------------------------------- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ---------------------------------------------------------------------------- + +# ---------------------------------------------------------------------------- +# Apache Maven Wrapper startup batch script, version 3.3.2 +# +# Optional ENV vars +# ----------------- +# JAVA_HOME - location of a JDK home dir, required when download maven via java source +# MVNW_REPOURL - repo url base for downloading maven distribution +# MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven +# MVNW_VERBOSE - true: enable verbose log; debug: trace the mvnw script; others: silence the output +# ---------------------------------------------------------------------------- + +set -euf +[ "${MVNW_VERBOSE-}" != debug ] || set -x + +# OS specific support. +native_path() { printf %s\\n "$1"; } +case "$(uname)" in +CYGWIN* | MINGW*) + [ -z "${JAVA_HOME-}" ] || JAVA_HOME="$(cygpath --unix "$JAVA_HOME")" + native_path() { cygpath --path --windows "$1"; } + ;; +esac + +# set JAVACMD and JAVACCMD +set_java_home() { + # For Cygwin and MinGW, ensure paths are in Unix format before anything is touched + if [ -n "${JAVA_HOME-}" ]; then + if [ -x "$JAVA_HOME/jre/sh/java" ]; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + JAVACCMD="$JAVA_HOME/jre/sh/javac" + else + JAVACMD="$JAVA_HOME/bin/java" + JAVACCMD="$JAVA_HOME/bin/javac" + + if [ ! -x "$JAVACMD" ] || [ ! -x "$JAVACCMD" ]; then + echo "The JAVA_HOME environment variable is not defined correctly, so mvnw cannot run." >&2 + echo "JAVA_HOME is set to \"$JAVA_HOME\", but \"\$JAVA_HOME/bin/java\" or \"\$JAVA_HOME/bin/javac\" does not exist." >&2 + return 1 + fi + fi + else + JAVACMD="$( + 'set' +e + 'unset' -f command 2>/dev/null + 'command' -v java + )" || : + JAVACCMD="$( + 'set' +e + 'unset' -f command 2>/dev/null + 'command' -v javac + )" || : + + if [ ! -x "${JAVACMD-}" ] || [ ! -x "${JAVACCMD-}" ]; then + echo "The java/javac command does not exist in PATH nor is JAVA_HOME set, so mvnw cannot run." >&2 + return 1 + fi + fi +} + +# hash string like Java String::hashCode +hash_string() { + str="${1:-}" h=0 + while [ -n "$str" ]; do + char="${str%"${str#?}"}" + h=$(((h * 31 + $(LC_CTYPE=C printf %d "'$char")) % 4294967296)) + str="${str#?}" + done + printf %x\\n $h +} + +verbose() { :; } +[ "${MVNW_VERBOSE-}" != true ] || verbose() { printf %s\\n "${1-}"; } + +die() { + printf %s\\n "$1" >&2 + exit 1 +} + +trim() { + # MWRAPPER-139: + # Trims trailing and leading whitespace, carriage returns, tabs, and linefeeds. + # Needed for removing poorly interpreted newline sequences when running in more + # exotic environments such as mingw bash on Windows. + printf "%s" "${1}" | tr -d '[:space:]' +} + +# parse distributionUrl and optional distributionSha256Sum, requires .mvn/wrapper/maven-wrapper.properties +while IFS="=" read -r key value; do + case "${key-}" in + distributionUrl) distributionUrl=$(trim "${value-}") ;; + distributionSha256Sum) distributionSha256Sum=$(trim "${value-}") ;; + esac +done <"${0%/*}/.mvn/wrapper/maven-wrapper.properties" +[ -n "${distributionUrl-}" ] || die "cannot read distributionUrl property in ${0%/*}/.mvn/wrapper/maven-wrapper.properties" + +case "${distributionUrl##*/}" in +maven-mvnd-*bin.*) + MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ + case "${PROCESSOR_ARCHITECTURE-}${PROCESSOR_ARCHITEW6432-}:$(uname -a)" in + *AMD64:CYGWIN* | *AMD64:MINGW*) distributionPlatform=windows-amd64 ;; + :Darwin*x86_64) distributionPlatform=darwin-amd64 ;; + :Darwin*arm64) distributionPlatform=darwin-aarch64 ;; + :Linux*x86_64*) distributionPlatform=linux-amd64 ;; + *) + echo "Cannot detect native platform for mvnd on $(uname)-$(uname -m), use pure java version" >&2 + distributionPlatform=linux-amd64 + ;; + esac + distributionUrl="${distributionUrl%-bin.*}-$distributionPlatform.zip" + ;; +maven-mvnd-*) MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ ;; +*) MVN_CMD="mvn${0##*/mvnw}" _MVNW_REPO_PATTERN=/org/apache/maven/ ;; +esac + +# apply MVNW_REPOURL and calculate MAVEN_HOME +# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/ +[ -z "${MVNW_REPOURL-}" ] || distributionUrl="$MVNW_REPOURL$_MVNW_REPO_PATTERN${distributionUrl#*"$_MVNW_REPO_PATTERN"}" +distributionUrlName="${distributionUrl##*/}" +distributionUrlNameMain="${distributionUrlName%.*}" +distributionUrlNameMain="${distributionUrlNameMain%-bin}" +MAVEN_USER_HOME="${MAVEN_USER_HOME:-${HOME}/.m2}" +MAVEN_HOME="${MAVEN_USER_HOME}/wrapper/dists/${distributionUrlNameMain-}/$(hash_string "$distributionUrl")" + +exec_maven() { + unset MVNW_VERBOSE MVNW_USERNAME MVNW_PASSWORD MVNW_REPOURL || : + exec "$MAVEN_HOME/bin/$MVN_CMD" "$@" || die "cannot exec $MAVEN_HOME/bin/$MVN_CMD" +} + +if [ -d "$MAVEN_HOME" ]; then + verbose "found existing MAVEN_HOME at $MAVEN_HOME" + exec_maven "$@" +fi + +case "${distributionUrl-}" in +*?-bin.zip | *?maven-mvnd-?*-?*.zip) ;; +*) die "distributionUrl is not valid, must match *-bin.zip or maven-mvnd-*.zip, but found '${distributionUrl-}'" ;; +esac + +# prepare tmp dir +if TMP_DOWNLOAD_DIR="$(mktemp -d)" && [ -d "$TMP_DOWNLOAD_DIR" ]; then + clean() { rm -rf -- "$TMP_DOWNLOAD_DIR"; } + trap clean HUP INT TERM EXIT +else + die "cannot create temp dir" +fi + +mkdir -p -- "${MAVEN_HOME%/*}" + +# Download and Install Apache Maven +verbose "Couldn't find MAVEN_HOME, downloading and installing it ..." +verbose "Downloading from: $distributionUrl" +verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName" + +# select .zip or .tar.gz +if ! command -v unzip >/dev/null; then + distributionUrl="${distributionUrl%.zip}.tar.gz" + distributionUrlName="${distributionUrl##*/}" +fi + +# verbose opt +__MVNW_QUIET_WGET=--quiet __MVNW_QUIET_CURL=--silent __MVNW_QUIET_UNZIP=-q __MVNW_QUIET_TAR='' +[ "${MVNW_VERBOSE-}" != true ] || __MVNW_QUIET_WGET='' __MVNW_QUIET_CURL='' __MVNW_QUIET_UNZIP='' __MVNW_QUIET_TAR=v + +# normalize http auth +case "${MVNW_PASSWORD:+has-password}" in +'') MVNW_USERNAME='' MVNW_PASSWORD='' ;; +has-password) [ -n "${MVNW_USERNAME-}" ] || MVNW_USERNAME='' MVNW_PASSWORD='' ;; +esac + +if [ -z "${MVNW_USERNAME-}" ] && command -v wget >/dev/null; then + verbose "Found wget ... using wget" + wget ${__MVNW_QUIET_WGET:+"$__MVNW_QUIET_WGET"} "$distributionUrl" -O "$TMP_DOWNLOAD_DIR/$distributionUrlName" || die "wget: Failed to fetch $distributionUrl" +elif [ -z "${MVNW_USERNAME-}" ] && command -v curl >/dev/null; then + verbose "Found curl ... using curl" + curl ${__MVNW_QUIET_CURL:+"$__MVNW_QUIET_CURL"} -f -L -o "$TMP_DOWNLOAD_DIR/$distributionUrlName" "$distributionUrl" || die "curl: Failed to fetch $distributionUrl" +elif set_java_home; then + verbose "Falling back to use Java to download" + javaSource="$TMP_DOWNLOAD_DIR/Downloader.java" + targetZip="$TMP_DOWNLOAD_DIR/$distributionUrlName" + cat >"$javaSource" <<-END + public class Downloader extends java.net.Authenticator + { + protected java.net.PasswordAuthentication getPasswordAuthentication() + { + return new java.net.PasswordAuthentication( System.getenv( "MVNW_USERNAME" ), System.getenv( "MVNW_PASSWORD" ).toCharArray() ); + } + public static void main( String[] args ) throws Exception + { + setDefault( new Downloader() ); + java.nio.file.Files.copy( java.net.URI.create( args[0] ).toURL().openStream(), java.nio.file.Paths.get( args[1] ).toAbsolutePath().normalize() ); + } + } + END + # For Cygwin/MinGW, switch paths to Windows format before running javac and java + verbose " - Compiling Downloader.java ..." + "$(native_path "$JAVACCMD")" "$(native_path "$javaSource")" || die "Failed to compile Downloader.java" + verbose " - Running Downloader.java ..." + "$(native_path "$JAVACMD")" -cp "$(native_path "$TMP_DOWNLOAD_DIR")" Downloader "$distributionUrl" "$(native_path "$targetZip")" +fi + +# If specified, validate the SHA-256 sum of the Maven distribution zip file +if [ -n "${distributionSha256Sum-}" ]; then + distributionSha256Result=false + if [ "$MVN_CMD" = mvnd.sh ]; then + echo "Checksum validation is not supported for maven-mvnd." >&2 + echo "Please disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2 + exit 1 + elif command -v sha256sum >/dev/null; then + if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | sha256sum -c >/dev/null 2>&1; then + distributionSha256Result=true + fi + elif command -v shasum >/dev/null; then + if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | shasum -a 256 -c >/dev/null 2>&1; then + distributionSha256Result=true + fi + else + echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." >&2 + echo "Please install either command, or disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2 + exit 1 + fi + if [ $distributionSha256Result = false ]; then + echo "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised." >&2 + echo "If you updated your Maven version, you need to update the specified distributionSha256Sum property." >&2 + exit 1 + fi +fi + +# unzip and move +if command -v unzip >/dev/null; then + unzip ${__MVNW_QUIET_UNZIP:+"$__MVNW_QUIET_UNZIP"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -d "$TMP_DOWNLOAD_DIR" || die "failed to unzip" +else + tar xzf${__MVNW_QUIET_TAR:+"$__MVNW_QUIET_TAR"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -C "$TMP_DOWNLOAD_DIR" || die "failed to untar" +fi +printf %s\\n "$distributionUrl" >"$TMP_DOWNLOAD_DIR/$distributionUrlNameMain/mvnw.url" +mv -- "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" "$MAVEN_HOME" || [ -d "$MAVEN_HOME" ] || die "fail to move MAVEN_HOME" + +clean || : +exec_maven "$@" diff --git a/spring-batch-s3/mvnw.cmd b/spring-batch-s3/mvnw.cmd new file mode 100644 index 00000000..249bdf38 --- /dev/null +++ b/spring-batch-s3/mvnw.cmd @@ -0,0 +1,149 @@ +<# : batch portion +@REM ---------------------------------------------------------------------------- +@REM Licensed to the Apache Software Foundation (ASF) under one +@REM or more contributor license agreements. See the NOTICE file +@REM distributed with this work for additional information +@REM regarding copyright ownership. The ASF licenses this file +@REM to you under the Apache License, Version 2.0 (the +@REM "License"); you may not use this file except in compliance +@REM with the License. You may obtain a copy of the License at +@REM +@REM http://www.apache.org/licenses/LICENSE-2.0 +@REM +@REM Unless required by applicable law or agreed to in writing, +@REM software distributed under the License is distributed on an +@REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@REM KIND, either express or implied. See the License for the +@REM specific language governing permissions and limitations +@REM under the License. +@REM ---------------------------------------------------------------------------- + +@REM ---------------------------------------------------------------------------- +@REM Apache Maven Wrapper startup batch script, version 3.3.2 +@REM +@REM Optional ENV vars +@REM MVNW_REPOURL - repo url base for downloading maven distribution +@REM MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven +@REM MVNW_VERBOSE - true: enable verbose log; others: silence the output +@REM ---------------------------------------------------------------------------- + +@IF "%__MVNW_ARG0_NAME__%"=="" (SET __MVNW_ARG0_NAME__=%~nx0) +@SET __MVNW_CMD__= +@SET __MVNW_ERROR__= +@SET __MVNW_PSMODULEP_SAVE=%PSModulePath% +@SET PSModulePath= +@FOR /F "usebackq tokens=1* delims==" %%A IN (`powershell -noprofile "& {$scriptDir='%~dp0'; $script='%__MVNW_ARG0_NAME__%'; icm -ScriptBlock ([Scriptblock]::Create((Get-Content -Raw '%~f0'))) -NoNewScope}"`) DO @( + IF "%%A"=="MVN_CMD" (set __MVNW_CMD__=%%B) ELSE IF "%%B"=="" (echo %%A) ELSE (echo %%A=%%B) +) +@SET PSModulePath=%__MVNW_PSMODULEP_SAVE% +@SET __MVNW_PSMODULEP_SAVE= +@SET __MVNW_ARG0_NAME__= +@SET MVNW_USERNAME= +@SET MVNW_PASSWORD= +@IF NOT "%__MVNW_CMD__%"=="" (%__MVNW_CMD__% %*) +@echo Cannot start maven from wrapper >&2 && exit /b 1 +@GOTO :EOF +: end batch / begin powershell #> + +$ErrorActionPreference = "Stop" +if ($env:MVNW_VERBOSE -eq "true") { + $VerbosePreference = "Continue" +} + +# calculate distributionUrl, requires .mvn/wrapper/maven-wrapper.properties +$distributionUrl = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionUrl +if (!$distributionUrl) { + Write-Error "cannot read distributionUrl property in $scriptDir/.mvn/wrapper/maven-wrapper.properties" +} + +switch -wildcard -casesensitive ( $($distributionUrl -replace '^.*/','') ) { + "maven-mvnd-*" { + $USE_MVND = $true + $distributionUrl = $distributionUrl -replace '-bin\.[^.]*$',"-windows-amd64.zip" + $MVN_CMD = "mvnd.cmd" + break + } + default { + $USE_MVND = $false + $MVN_CMD = $script -replace '^mvnw','mvn' + break + } +} + +# apply MVNW_REPOURL and calculate MAVEN_HOME +# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/ +if ($env:MVNW_REPOURL) { + $MVNW_REPO_PATTERN = if ($USE_MVND) { "/org/apache/maven/" } else { "/maven/mvnd/" } + $distributionUrl = "$env:MVNW_REPOURL$MVNW_REPO_PATTERN$($distributionUrl -replace '^.*'+$MVNW_REPO_PATTERN,'')" +} +$distributionUrlName = $distributionUrl -replace '^.*/','' +$distributionUrlNameMain = $distributionUrlName -replace '\.[^.]*$','' -replace '-bin$','' +$MAVEN_HOME_PARENT = "$HOME/.m2/wrapper/dists/$distributionUrlNameMain" +if ($env:MAVEN_USER_HOME) { + $MAVEN_HOME_PARENT = "$env:MAVEN_USER_HOME/wrapper/dists/$distributionUrlNameMain" +} +$MAVEN_HOME_NAME = ([System.Security.Cryptography.MD5]::Create().ComputeHash([byte[]][char[]]$distributionUrl) | ForEach-Object {$_.ToString("x2")}) -join '' +$MAVEN_HOME = "$MAVEN_HOME_PARENT/$MAVEN_HOME_NAME" + +if (Test-Path -Path "$MAVEN_HOME" -PathType Container) { + Write-Verbose "found existing MAVEN_HOME at $MAVEN_HOME" + Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD" + exit $? +} + +if (! $distributionUrlNameMain -or ($distributionUrlName -eq $distributionUrlNameMain)) { + Write-Error "distributionUrl is not valid, must end with *-bin.zip, but found $distributionUrl" +} + +# prepare tmp dir +$TMP_DOWNLOAD_DIR_HOLDER = New-TemporaryFile +$TMP_DOWNLOAD_DIR = New-Item -Itemtype Directory -Path "$TMP_DOWNLOAD_DIR_HOLDER.dir" +$TMP_DOWNLOAD_DIR_HOLDER.Delete() | Out-Null +trap { + if ($TMP_DOWNLOAD_DIR.Exists) { + try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null } + catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" } + } +} + +New-Item -Itemtype Directory -Path "$MAVEN_HOME_PARENT" -Force | Out-Null + +# Download and Install Apache Maven +Write-Verbose "Couldn't find MAVEN_HOME, downloading and installing it ..." +Write-Verbose "Downloading from: $distributionUrl" +Write-Verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName" + +$webclient = New-Object System.Net.WebClient +if ($env:MVNW_USERNAME -and $env:MVNW_PASSWORD) { + $webclient.Credentials = New-Object System.Net.NetworkCredential($env:MVNW_USERNAME, $env:MVNW_PASSWORD) +} +[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 +$webclient.DownloadFile($distributionUrl, "$TMP_DOWNLOAD_DIR/$distributionUrlName") | Out-Null + +# If specified, validate the SHA-256 sum of the Maven distribution zip file +$distributionSha256Sum = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionSha256Sum +if ($distributionSha256Sum) { + if ($USE_MVND) { + Write-Error "Checksum validation is not supported for maven-mvnd. `nPlease disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." + } + Import-Module $PSHOME\Modules\Microsoft.PowerShell.Utility -Function Get-FileHash + if ((Get-FileHash "$TMP_DOWNLOAD_DIR/$distributionUrlName" -Algorithm SHA256).Hash.ToLower() -ne $distributionSha256Sum) { + Write-Error "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised. If you updated your Maven version, you need to update the specified distributionSha256Sum property." + } +} + +# unzip and move +Expand-Archive "$TMP_DOWNLOAD_DIR/$distributionUrlName" -DestinationPath "$TMP_DOWNLOAD_DIR" | Out-Null +Rename-Item -Path "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" -NewName $MAVEN_HOME_NAME | Out-Null +try { + Move-Item -Path "$TMP_DOWNLOAD_DIR/$MAVEN_HOME_NAME" -Destination $MAVEN_HOME_PARENT | Out-Null +} catch { + if (! (Test-Path -Path "$MAVEN_HOME" -PathType Container)) { + Write-Error "fail to move MAVEN_HOME" + } +} finally { + try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null } + catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" } +} + +Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD" diff --git a/spring-batch-s3/pom.xml b/spring-batch-s3/pom.xml new file mode 100644 index 00000000..2573f694 --- /dev/null +++ b/spring-batch-s3/pom.xml @@ -0,0 +1,255 @@ + + + + 4.0.0 + + + org.springframework.boot + spring-boot-starter-parent + 3.5.3 + + + + org.springframework.batch.extensions + spring-batch-s3 + 0.1.0-SNAPSHOT + + Spring Batch S3 + Spring Batch extension for Amazon S3 + https://github.com/spring-projects/spring-batch-extensions/tree/main/spring-batch-s3 + 2025 + + + repo + Apache-2.0 + https://www.apache.org/licenses/LICENSE-2.0.txt + + + + + + andreacioni + Andrea Cioni + https://github.com/andreacioni + + + + + git://github.com/spring-projects/spring-batch-extensions.git + + git@github.com:spring-projects/spring-batch-extensions.git + + https://github.com/spring-projects/spring-batch-extensions + + + + 2.31.77 + 0.0.47 + + + + + + software.amazon.awssdk + bom + import + pom + ${aws.java.sdk.version} + + + + + + + + software.amazon.awssdk + s3 + + + software.amazon.awssdk + netty-nio-client + + + + + + + org.springframework.batch + spring-batch-core + + + + + org.assertj + assertj-core + test + + + org.junit.jupiter + junit-jupiter + test + + + org.mockito + mockito-core + test + + + + + + + io.spring.javaformat + spring-javaformat-maven-plugin + 0.0.46 + + + org.apache.maven.plugins + maven-compiler-plugin + + + -Xlint:all,deprecation + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + + jar + + attach-javadocs + + + + + org.apache.maven.plugins + maven-source-plugin + + + + jar + + attach-sources + + + + + org.apache.maven.plugins + maven-checkstyle-plugin + 3.6.0 + + + checkstyle + com.puppycrawl.tools + 9.3 + + + spring-javaformat-checkstyle + io.spring.javaformat + ${spring-javaformat.version} + + + + + + io/spring/javaformat/checkstyle/checkstyle.xml + + true + + + check + + checkstyle-validation + true + validate + + + + + org.codehaus.mojo + flatten-maven-plugin + 1.7.1 + + + + ossrh + + remove + + + + flatten + + flatten + process-resources + + + + clean + + flatten-clean + clean + + + + + com.mycila + license-maven-plugin + 4.6 + + + + + + src/**/*.java + + + + + 2025 + + + + + + check + + + + + + + diff --git a/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/S3ItemReader.java b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/S3ItemReader.java new file mode 100644 index 00000000..cd0a0719 --- /dev/null +++ b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/S3ItemReader.java @@ -0,0 +1,89 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3; + +import java.io.IOException; +import java.util.Arrays; + +import org.springframework.batch.extensions.s3.serializer.S3Deserializer; +import org.springframework.batch.extensions.s3.stream.S3InputStream; +import org.springframework.batch.item.ItemReader; +import org.springframework.batch.item.ItemStream; +import org.springframework.batch.item.ItemStreamException; + +/** + * An {@link ItemReader} that reads items from an S3 object using a specified + * deserializer. It uses an {@link S3InputStream} to read the data and a + * {@link S3Deserializer} to convert the byte array into the desired item type. + * + * @param the type of items to read + * @author Andrea Cioni + */ +public class S3ItemReader implements ItemReader, ItemStream { + + private static final int DEFAULT_BUFFER_SIZE_BYTES = 128; + + private final S3InputStream in; + + private final S3Deserializer deserializer; + + private int bufferSize = DEFAULT_BUFFER_SIZE_BYTES; + + public S3ItemReader(S3InputStream in, S3Deserializer deserializer) { + this.in = in; + this.deserializer = deserializer; + } + + @Override + public T read() throws Exception { + T item; + + //before reading more bytes from the input stream get all of the items + //that may be buffered inside the deserializer (deserializer is stateful!) + while ((item = this.deserializer.deserialize(new byte[]{})) != null) { + return item; + } + + int bytesRead; + byte[] buffer = new byte[this.bufferSize]; + while ((bytesRead = this.in.read(buffer)) != -1) { + item = this.deserializer.deserialize(Arrays.copyOf(buffer, bytesRead)); + if (item != null) { + return item; + } + } + return null; + } + + @Override + public void close() throws ItemStreamException { + try { + this.in.close(); + } + catch (IOException ex) { + throw new ItemStreamException(ex); + } + } + + public void setBufferSize(int bufferSize) { + this.bufferSize = bufferSize; + } + + public int getBufferSize() { + return this.bufferSize; + } +} diff --git a/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/S3ItemWriter.java b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/S3ItemWriter.java new file mode 100644 index 00000000..7d9922cd --- /dev/null +++ b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/S3ItemWriter.java @@ -0,0 +1,71 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3; + +import java.io.IOException; +import java.io.OutputStream; + +import org.springframework.batch.extensions.s3.serializer.S3Serializer; +import org.springframework.batch.extensions.s3.stream.S3MultipartOutputStream; +import org.springframework.batch.item.Chunk; +import org.springframework.batch.item.ItemStream; +import org.springframework.batch.item.ItemStreamException; +import org.springframework.batch.item.ItemWriter; +import org.springframework.util.ObjectUtils; + +/** + * An {@link ItemWriter} that writes items to an S3 object using a specified serializer. + * It uses an {@link S3MultipartOutputStream} to write the data and a {@link S3Serializer} + * to convert the item into a byte array. + * + * @param the type of items to write + * @author Andrea Cioni + */ +public class S3ItemWriter implements ItemWriter, ItemStream { + + private final OutputStream out; + + private final S3Serializer serializer; + + public S3ItemWriter(OutputStream out, S3Serializer serializer) { + this.out = out; + this.serializer = serializer; + } + + @Override + public void write(Chunk chunk) throws Exception { + for (T item : chunk.getItems()) { + byte[] serializedData = this.serializer.serialize(item); + if (!ObjectUtils.isEmpty(serializedData)) { + this.out.write(serializedData); + } + else { + throw new IllegalArgumentException("Serialized data is null or empty for item: " + item); + } + } + } + + @Override + public void close() throws ItemStreamException { + try { + this.out.close(); + } + catch (IOException ex) { + throw new ItemStreamException(ex); + } + } +} diff --git a/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/builder/S3ItemReaderBuilder.java b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/builder/S3ItemReaderBuilder.java new file mode 100644 index 00000000..0c08c017 --- /dev/null +++ b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/builder/S3ItemReaderBuilder.java @@ -0,0 +1,72 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3.builder; + +import software.amazon.awssdk.services.s3.S3Client; + +import org.springframework.batch.extensions.s3.S3ItemReader; +import org.springframework.batch.extensions.s3.serializer.S3Deserializer; +import org.springframework.batch.extensions.s3.stream.S3InputStream; + +public class S3ItemReaderBuilder { + private S3Client s3Client; + + private String bucketName; + + private String objectKey; + + private S3Deserializer deserializer; + + private Integer bufferSize; + + public S3ItemReaderBuilder s3Client(S3Client s3Client) { + this.s3Client = s3Client; + return this; + } + + public S3ItemReaderBuilder bucketName(String bucketName) { + this.bucketName = bucketName; + return this; + } + + public S3ItemReaderBuilder objectKey(String objectKey) { + this.objectKey = objectKey; + return this; + } + + public S3ItemReaderBuilder deserializer(S3Deserializer deserializer) { + this.deserializer = deserializer; + return this; + } + + public S3ItemReaderBuilder bufferSize(int bufferSize) { + this.bufferSize = bufferSize; + return this; + } + + public S3ItemReader build() throws Exception { + if (this.s3Client == null || this.bucketName == null || this.objectKey == null || this.deserializer == null) { + throw new IllegalArgumentException("S3Client, bucketName, objectKey, and deserializer must be provided"); + } + S3InputStream inputStream = new S3InputStream(this.s3Client, this.bucketName, this.objectKey); + S3ItemReader reader = new S3ItemReader<>(inputStream, this.deserializer); + if (this.bufferSize != null) { + reader.setBufferSize(this.bufferSize); + } + return reader; + } +} diff --git a/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/builder/S3ItemWriterBuilder.java b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/builder/S3ItemWriterBuilder.java new file mode 100644 index 00000000..e376ef16 --- /dev/null +++ b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/builder/S3ItemWriterBuilder.java @@ -0,0 +1,105 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3.builder; + +import java.io.IOException; +import java.io.OutputStream; + +import software.amazon.awssdk.services.s3.S3Client; + +import org.springframework.batch.extensions.s3.S3ItemWriter; +import org.springframework.batch.extensions.s3.serializer.S3Serializer; +import org.springframework.batch.extensions.s3.stream.S3MultipartOutputStream; +import org.springframework.batch.extensions.s3.stream.S3MultipartUploader; +import org.springframework.batch.extensions.s3.stream.S3OutputStream; + +public class S3ItemWriterBuilder { + private S3Client s3Client; + + private String bucket; + + private String key; + + private S3Serializer serializer; + + private boolean multipartUpload; + + private String contentType; + + private Integer partSize; + + public S3ItemWriterBuilder s3Client(S3Client s3Client) { + this.s3Client = s3Client; + return this; + } + + public S3ItemWriterBuilder bucketName(String bucketName) { + this.bucket = bucketName; + return this; + } + + public S3ItemWriterBuilder objectKey(String key) { + this.key = key; + return this; + } + + public S3ItemWriterBuilder serializer(S3Serializer serializer) { + this.serializer = serializer; + return this; + } + + public S3ItemWriterBuilder multipartUpload(boolean multipartUpload) { + this.multipartUpload = multipartUpload; + return this; + } + + public S3ItemWriterBuilder partSize(int partSize) { + this.partSize = partSize; + return this; + } + + public S3ItemWriterBuilder contentType(String contentType) { + this.contentType = contentType; + return this; + } + + public S3ItemWriter build() throws IOException { + if (this.s3Client == null || this.bucket == null || this.key == null || this.serializer == null) { + throw new IllegalArgumentException("S3Client, bucket, key, and serializer must be provided"); + } + OutputStream outputStream; + if (this.multipartUpload) { + S3MultipartUploader s3MultipartUploader = new S3MultipartUploader(this.s3Client, this.bucket, this.key); + if (this.contentType != null) { + s3MultipartUploader.setContentType(this.contentType); + } + if (this.partSize != null) { + s3MultipartUploader.setPartSize(this.partSize); + } + + outputStream = new S3MultipartOutputStream(s3MultipartUploader); + } + else { + outputStream = new S3OutputStream(this.s3Client, this.bucket, this.key); + if (this.contentType != null) { + ((S3OutputStream) outputStream).setContentType(this.contentType); + } + } + + return new S3ItemWriter<>(outputStream, this.serializer); + } +} diff --git a/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/builder/package-info.java b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/builder/package-info.java new file mode 100644 index 00000000..10cab50f --- /dev/null +++ b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/builder/package-info.java @@ -0,0 +1,28 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Provides support for reading items from Amazon S3 using a stream-based approach. This + * package includes classes for reading items from S3 objects, deserializing them, and + * handling the input stream efficiently. + * + *

+ * Classes in this package are designed to work with the AWS SDK for Java and provide a + * convenient way to read large datasets stored in S3 without loading them entirely into + * memory. + */ + +package org.springframework.batch.extensions.s3.builder; diff --git a/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/package-info.java b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/package-info.java new file mode 100644 index 00000000..9c7dbb3a --- /dev/null +++ b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/package-info.java @@ -0,0 +1,28 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Provides support for reading items from Amazon S3 using a stream-based approach. This + * package includes classes for reading items from S3 objects, deserializing them, and + * handling the input stream efficiently. + * + *

+ * Classes in this package are designed to work with the AWS SDK for Java and provide a + * convenient way to read large datasets stored in S3 without loading them entirely into + * memory. + */ + +package org.springframework.batch.extensions.s3; diff --git a/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/serializer/S3Deserializer.java b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/serializer/S3Deserializer.java new file mode 100644 index 00000000..aef87dfc --- /dev/null +++ b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/serializer/S3Deserializer.java @@ -0,0 +1,39 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3.serializer; + +/** + * A functional interface for serializing items to byte arrays for S3 storage. + * Implementations should provide a way to convert an item of type T into a byte array. + * + * @param the type of items to be serialized + * @author Andrea Cioni + */ +@FunctionalInterface +public interface S3Deserializer { + + /** + * Deserialize a byte array into an object of type T. It is not guaranteed that the + * call to this method will always return a non-null value. This can happen if the + * byte array is either empty or it doesn't represent a valid object of type T yet. + * For this reason the implementation of this method should be stateful. + * @param buffer the byte array to deserialize + * @return the deserialized object + */ + T deserialize(byte[] buffer); + +} diff --git a/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/serializer/S3Serializer.java b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/serializer/S3Serializer.java new file mode 100644 index 00000000..838bf393 --- /dev/null +++ b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/serializer/S3Serializer.java @@ -0,0 +1,31 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3.serializer; + +/** + * A functional interface for serializing items to byte arrays for S3 storage. + * Implementations should provide a way to convert an item of type T into a byte array. + * + * @param the type of items to be serialized + * @author Andrea Cioni + */ +@FunctionalInterface +public interface S3Serializer { + + byte[] serialize(T item); + +} diff --git a/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/serializer/S3StringDeserializer.java b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/serializer/S3StringDeserializer.java new file mode 100644 index 00000000..ce024d8c --- /dev/null +++ b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/serializer/S3StringDeserializer.java @@ -0,0 +1,65 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3.serializer; + +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; + +/** + * Simple deserializer for String items from S3. It reads lines from a byte array, + * handling both \n and \r\n line endings. + * This is intended to be used with S3ItemReader to read text data from S3 objects. + * + * @author Andrea Cioni + */ +public class S3StringDeserializer implements S3Deserializer { + + final Charset charset; + + private StringBuilder stringBuilder = new StringBuilder(); + + public S3StringDeserializer() { + this.charset = StandardCharsets.UTF_8; + } + + public S3StringDeserializer(Charset charset) { + this.charset = charset; + } + + @Override + public String deserialize(byte[] buffer) { + String incoming = new String(buffer, this.charset); + this.stringBuilder.append(incoming); + + int newlineIdx = this.stringBuilder.indexOf("\n"); + if (newlineIdx == -1) { + return null; + } + + // Handle both \n and \r\n line endings + int lineEnd = newlineIdx; + if (newlineIdx > 0 && this.stringBuilder.charAt(newlineIdx - 1) == '\r') { + lineEnd--; + } + + String line = this.stringBuilder.substring(0, lineEnd); + this.stringBuilder = new StringBuilder(this.stringBuilder.substring(newlineIdx + 1)); + + return line; + } + +} diff --git a/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/serializer/S3StringSerializer.java b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/serializer/S3StringSerializer.java new file mode 100644 index 00000000..6669adf9 --- /dev/null +++ b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/serializer/S3StringSerializer.java @@ -0,0 +1,36 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3.serializer; + +import java.nio.charset.StandardCharsets; + +/** + * Simple serializer for String items to be used with S3. This serializer takes a String + * item, appends a newline character, and converts it to a byte array using UTF-8 + * encoding. This is intended to be used with S3ItemWriter to write text data to S3 + * objects. + * + * @author Andrea Cioni + */ +public class S3StringSerializer implements S3Serializer { + + @Override + public byte[] serialize(String item) { + return (item + "\n").getBytes(StandardCharsets.UTF_8); + } + +} diff --git a/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/serializer/package-info.java b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/serializer/package-info.java new file mode 100644 index 00000000..44839b25 --- /dev/null +++ b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/serializer/package-info.java @@ -0,0 +1,27 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Provides support for reading items from Amazon S3 using a stream-based approach. This + * package includes classes for reading items from S3 objects, deserializing them, and + * handling the input stream efficiently. + * + *

+ * Classes in this package are designed to work with the AWS SDK for Java and provide a + * convenient way to read large datasets stored in S3 without loading them entirely into + * memory. + */ +package org.springframework.batch.extensions.s3.serializer; diff --git a/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/Defaults.java b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/Defaults.java new file mode 100644 index 00000000..eea4061b --- /dev/null +++ b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/Defaults.java @@ -0,0 +1,27 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3.stream; + +import org.springframework.util.unit.DataSize; + +final class Defaults { + static final int DEFAULT_PART_SIZE = (int) DataSize.ofMegabytes(5L).toBytes(); + + static final String DEFAULT_CONTENT_TYPE = "application/octet-stream"; + + private Defaults() { } +} diff --git a/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/S3InputStream.java b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/S3InputStream.java new file mode 100644 index 00000000..35398ac5 --- /dev/null +++ b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/S3InputStream.java @@ -0,0 +1,78 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3.stream; + +import java.io.IOException; +import java.io.InputStream; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; + +/** + * An {@link InputStream} that reads data from an S3 object. It uses the AWS SDK for Java + * to retrieve the object from S3. Is safe to use this stream for reading large files as + * it doesn't load the entire file into memory. + * + * @author Andrea Cioni + */ +public class S3InputStream extends InputStream { + + private static final Logger logger = LoggerFactory.getLogger(S3InputStream.class); + + private final S3Client s3; + + private final String bucketName; + + private final String objectKey; + + private InputStream inputStream; + + public S3InputStream(S3Client s3, String bucketName, String objectKey) { + this.s3 = s3; + this.bucketName = bucketName; + this.objectKey = objectKey; + } + + @Override + public int read() throws IOException { + if (this.inputStream == null) { + this.inputStream = openS3InputStream(); + } + return this.inputStream.read(); + } + + @Override + public void close() throws IOException { + logger.debug("Closing stream"); + if (this.inputStream != null) { + this.inputStream.close(); + } + logger.debug("Stream closed"); + super.close(); + } + + private InputStream openS3InputStream() { + GetObjectRequest getObjectRequest = GetObjectRequest.builder() + .bucket(this.bucketName) + .key(this.objectKey) + .build(); + return this.s3.getObject(getObjectRequest); + } + +} diff --git a/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/S3MultipartOutputStream.java b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/S3MultipartOutputStream.java new file mode 100644 index 00000000..86ccac4f --- /dev/null +++ b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/S3MultipartOutputStream.java @@ -0,0 +1,123 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3.stream; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.PipedInputStream; +import java.io.PipedOutputStream; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.s3.S3Client; + +/** + * An {@link OutputStream} that writes data to an S3 object using multipart upload. It + * uses a {@link PipedInputStream} and a {@link PipedOutputStream} to allow writing data + * asynchronously while uploading it in parts. This stream is suitable for large file + * uploads. + * + * @author Andrea Cioni + */ +public class S3MultipartOutputStream extends OutputStream { + + private static final Logger logger = LoggerFactory.getLogger(S3MultipartOutputStream.class); + + private final PipedInputStream pipedInputStream; + + private final PipedOutputStream pipedOutputStream; + + private ExecutorService singleThreadExecutor; + + private volatile boolean uploading; + + private final S3Uploader multipartUpload; + + public S3MultipartOutputStream(S3Client s3Client, String bucketName, String key) throws IOException { + this(new S3MultipartUploader(s3Client, bucketName, key)); + } + + public S3MultipartOutputStream(S3Uploader s3Uploader) throws IOException { + this.pipedInputStream = new PipedInputStream(); + this.pipedOutputStream = new PipedOutputStream(this.pipedInputStream); + this.uploading = false; + this.multipartUpload = s3Uploader; + } + + @Override + public void write(int b) throws IOException { + if (!this.uploading) { + this.uploading = true; + + startUpload(); + } + this.pipedOutputStream.write(b); + } + + private void startUpload() { + if (this.singleThreadExecutor == null) { + this.singleThreadExecutor = Executors.newSingleThreadExecutor(); + } + + this.singleThreadExecutor.execute(() -> { + try { + this.multipartUpload.upload(this.pipedInputStream); + } + catch (IOException ex) { + logger.error("Error during multipart upload", ex); + throw new RuntimeException(ex); + } + finally { + try { + this.pipedInputStream.close(); + } + catch (IOException ex) { + logger.error("Error closing piped input stream", ex); + } + } + }); + this.singleThreadExecutor.shutdown(); + } + + @Override + public void close() throws IOException { + logger.debug("Closing output stream"); + + this.pipedOutputStream.close(); + + if (this.uploading) { + try { + if (!this.singleThreadExecutor.awaitTermination(10L, TimeUnit.SECONDS)) { + logger.warn("Multipart upload thread did not finish in time"); + } + } + catch (InterruptedException ex) { + logger.error("Multipart upload thread interrupted", ex); + } + } + + logger.debug("Output stream closed"); + super.close(); + } + + public void setSingleThreadExecutor(ExecutorService singleThreadExecutor) { + this.singleThreadExecutor = singleThreadExecutor; + } +} diff --git a/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/S3MultipartUploader.java b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/S3MultipartUploader.java new file mode 100644 index 00000000..c0878829 --- /dev/null +++ b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/S3MultipartUploader.java @@ -0,0 +1,201 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3.stream; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.CompletedMultipartUpload; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; + +/** + * A utility class for performing multipart uploads to Amazon S3. It reads data from an + * input stream and uploads it in parts to a specified S3 bucket and key.
+ * Reference: Uploading + * streams to Amazon S3 using the AWS SDK for Java 2.x + * + * @author Andrea Cioni + */ +public class S3MultipartUploader implements S3Uploader { + + private static final Logger logger = LoggerFactory.getLogger(S3MultipartUploader.class); + + private final S3Client s3Client; + + private final String bucket; + + private final String key; + + private int partSize = Defaults.DEFAULT_PART_SIZE; + + private String contentType = Defaults.DEFAULT_CONTENT_TYPE; + + public S3MultipartUploader(S3Client s3Client, String bucket, String key) { + this.s3Client = s3Client; + this.bucket = bucket; + this.key = key; + } + + /** + * Reads from the input stream into the buffer, attempting to fill the buffer + * completely or until the end of the stream is reached. + * @param inputStream the input stream to read from + * @param buffer the buffer to fill + * @return the number of bytes read, or -1 if the end of the stream is reached before + * any bytes are read + * @throws IOException if an I/O error occurs + */ + private static int readFullyOrToEnd(InputStream inputStream, byte[] buffer) throws IOException { + int totalBytesRead = 0; + int bytesRead; + while (totalBytesRead < buffer.length) { + bytesRead = inputStream.read(buffer, totalBytesRead, buffer.length - totalBytesRead); + if (bytesRead == -1) { + break; + } + totalBytesRead += bytesRead; + } + return (totalBytesRead > 0) ? totalBytesRead : -1; + } + + @Override + public long upload(InputStream inputStream) throws IOException { + String uploadId; + long totalBytesRead = 0; + + try { + CreateMultipartUploadRequest createMultipartUploadRequest = CreateMultipartUploadRequest.builder() + .bucket(this.bucket) + .key(this.key) + .contentType(this.contentType) + .build(); + + CreateMultipartUploadResponse createResponse = this.s3Client + .createMultipartUpload(createMultipartUploadRequest); + uploadId = createResponse.uploadId(); + logger.debug("Started multipart upload with ID: {}", uploadId); + + List completedParts = new ArrayList<>(); + int partNumber = 1; + byte[] buffer = new byte[this.partSize]; + int bytesRead; + + try { + while ((bytesRead = readFullyOrToEnd(inputStream, buffer)) > 0) { + totalBytesRead += bytesRead; + UploadPartRequest uploadPartRequest = UploadPartRequest.builder() + .bucket(this.bucket) + .key(this.key) + .uploadId(uploadId) + .partNumber(partNumber) + .build(); + + RequestBody requestBody; + if (bytesRead < this.partSize) { + byte[] lastPartBuffer = new byte[bytesRead]; + System.arraycopy(buffer, 0, lastPartBuffer, 0, bytesRead); + requestBody = RequestBody.fromBytes(lastPartBuffer); + } + else { + requestBody = RequestBody.fromBytes(buffer); + } + + UploadPartResponse uploadPartResponse = this.s3Client.uploadPart(uploadPartRequest, requestBody); + CompletedPart part = CompletedPart.builder() + .partNumber(partNumber) + .eTag(uploadPartResponse.eTag()) + .build(); + completedParts.add(part); + + logger.debug("Uploaded part {} with size {} bytes", partNumber, bytesRead); + partNumber++; + } + + CompletedMultipartUpload completedMultipartUpload = CompletedMultipartUpload.builder() + .parts(completedParts) + .build(); + + CompleteMultipartUploadRequest completeRequest = CompleteMultipartUploadRequest.builder() + .bucket(this.bucket) + .key(this.key) + .uploadId(uploadId) + .multipartUpload(completedMultipartUpload) + .build(); + + CompleteMultipartUploadResponse completeResponse = this.s3Client + .completeMultipartUpload(completeRequest); + logger.debug("Multipart upload completed. Object URL: {}", completeResponse.location()); + } + catch (Exception ex) { + logger.error("Error during multipart upload: {}", ex.getMessage(), ex); + if (uploadId != null) { + AbortMultipartUploadRequest abortRequest = AbortMultipartUploadRequest.builder() + .bucket(this.bucket) + .key(this.key) + .uploadId(uploadId) + .build(); + this.s3Client.abortMultipartUpload(abortRequest); + logger.warn("Multipart upload aborted"); + } + throw ex; + } + finally { + try { + inputStream.close(); + } + catch (IOException ex) { + logger.error("Error closing input stream: {}", ex.getMessage(), ex); + } + } + } + finally { + this.s3Client.close(); + } + + return totalBytesRead; + } + + public int getPartSize() { + return this.partSize; + } + + public void setPartSize(int partSize) { + this.partSize = partSize; + } + + public String getContentType() { + return this.contentType; + } + + public void setContentType(String contentType) { + this.contentType = contentType; + } +} diff --git a/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/S3OutputStream.java b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/S3OutputStream.java new file mode 100644 index 00000000..2117f24b --- /dev/null +++ b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/S3OutputStream.java @@ -0,0 +1,119 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3.stream; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.PipedInputStream; +import java.io.PipedOutputStream; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.http.ContentStreamProvider; +import software.amazon.awssdk.services.s3.S3Client; + +/** + * An {@link OutputStream} that writes data directly to an S3 object with a specified MIME + * type (default is application/octet-stream). This stream load the data in-memory and + * uploads it to S3 as it is written. It uses a {@link PipedInputStream} and a + * {@link PipedOutputStream} to allow writing data asynchronously while uploading it + * directly to S3. Is it not safe to use this stream with large file uploads, as it does + * not handle multipart uploads or large data efficiently. For this use case, check out + * {@link S3MultipartOutputStream}. + * + * @author Andrea Cioni + */ +public class S3OutputStream extends OutputStream { + + private static final Logger logger = LoggerFactory.getLogger(S3OutputStream.class); + + private final S3Client s3; + + private final String bucketName; + + private final String key; + + private final PipedInputStream pipedInputStream; + + private final PipedOutputStream pipedOutputStream; + + private ExecutorService singleThreadExecutor; + + private volatile boolean uploading; + + private String contentType = Defaults.DEFAULT_CONTENT_TYPE; + + public S3OutputStream(S3Client s3, String bucketName, String key) throws IOException { + this.s3 = s3; + this.bucketName = bucketName; + this.key = key; + this.pipedInputStream = new PipedInputStream(); + this.pipedOutputStream = new PipedOutputStream(this.pipedInputStream); + this.uploading = false; + } + + @Override + public void write(int b) throws IOException { + if (!this.uploading) { + this.uploading = true; + runUploadThread(); + } + this.pipedOutputStream.write(b); + } + + private void runUploadThread() { + if (this.singleThreadExecutor == null) { + this.singleThreadExecutor = Executors.newSingleThreadExecutor(); + } + + this.singleThreadExecutor.execute(() -> { + try { + RequestBody body = RequestBody + .fromContentProvider(ContentStreamProvider.fromInputStream(this.pipedInputStream), this.contentType); + this.s3.putObject((builder) -> builder.bucket(this.bucketName).key(this.key), body); + } + finally { + try { + this.pipedInputStream.close(); + } + catch (IOException ex) { + logger.error("Error closing piped input stream", ex); + } + } + }); + this.singleThreadExecutor.shutdown(); + } + + @Override + public void close() throws IOException { + logger.debug("Closing output stream"); + this.pipedOutputStream.close(); + logger.debug("Output stream closed"); + super.close(); + } + + public void setContentType(String contentType) { + this.contentType = contentType; + } + + public String getContentType() { + return this.contentType; + } +} diff --git a/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/S3Uploader.java b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/S3Uploader.java new file mode 100644 index 00000000..559e6702 --- /dev/null +++ b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/S3Uploader.java @@ -0,0 +1,26 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3.stream; + +import java.io.IOException; +import java.io.InputStream; + +public interface S3Uploader { + + long upload(InputStream inputStream) throws IOException; + +} diff --git a/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/package-info.java b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/package-info.java new file mode 100644 index 00000000..46a0d5bb --- /dev/null +++ b/spring-batch-s3/src/main/java/org/springframework/batch/extensions/s3/stream/package-info.java @@ -0,0 +1,27 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Provides support for reading items from Amazon S3 using a stream-based approach. This + * package includes classes for reading items from S3 objects, deserializing them, and + * handling the input stream efficiently. + * + *

+ * Classes in this package are designed to work with the AWS SDK for Java and provide a + * convenient way to read large datasets stored in S3 without loading them entirely into + * memory. + */ +package org.springframework.batch.extensions.s3.stream; diff --git a/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/S3ItemReaderTests.java b/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/S3ItemReaderTests.java new file mode 100644 index 00000000..3e8f1350 --- /dev/null +++ b/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/S3ItemReaderTests.java @@ -0,0 +1,171 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import org.springframework.batch.extensions.s3.serializer.S3Deserializer; +import org.springframework.batch.extensions.s3.serializer.S3StringDeserializer; +import org.springframework.batch.extensions.s3.stream.S3InputStream; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.BDDMockito.given; +import static org.mockito.BDDMockito.then; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; + +class S3ItemReaderTests { + + private S3Deserializer mockDeserializer; + + private S3InputStream s3InputStream; + + private S3StringDeserializer stringDeserializer; + + @BeforeEach + void setUp() { + this.stringDeserializer = new S3StringDeserializer(); + this.mockDeserializer = mock(S3Deserializer.class); + this.s3InputStream = mock(S3InputStream.class); + } + + @Test + void testReadReturnsDeserializedItemWithStreamMock() throws Exception { + byte[] data = "test".getBytes(); + // given + given(this.s3InputStream.read(any(byte[].class))).willReturn(data.length, -1); + given(this.mockDeserializer.deserialize(any(byte[].class))).willReturn(null, "item"); + + S3ItemReader reader = new S3ItemReader<>(this.s3InputStream, this.mockDeserializer); + + // when + String result = reader.read(); + + // then + assertThat(result).isEqualTo("item"); + then(this.s3InputStream).should(times(1)).read(any(byte[].class)); + then(this.mockDeserializer).should(times(2)).deserialize(any(byte[].class)); + } + + @Test + void testReadReturnsDeserializedItem() throws Exception { + byte[] data = "item\n".getBytes(); + + // given + given(this.mockDeserializer.deserialize(any(byte[].class))) + .willReturn(null); + given(this.s3InputStream.read(any(byte[].class))).willAnswer((invocation) -> { + byte[] buffer = invocation.getArgument(0); + System.arraycopy(data, 0, buffer, 0, data.length); + return data.length; + }).willReturn(-1); + + S3ItemReader reader = new S3ItemReader<>(this.s3InputStream, this.stringDeserializer); + + // when + String result = reader.read(); + + // then + assertThat(result).isEqualTo("item"); + then(this.s3InputStream).should(times(1)).read(any(byte[].class)); + } + + @Test + void testReadReturnsNullWhenNoData() throws Exception { + // given + given(this.s3InputStream.read(any(byte[].class))).willReturn(-1); + + S3ItemReader reader = new S3ItemReader<>(this.s3InputStream, this.mockDeserializer); + + // when + String result = reader.read(); + + // then + assertThat(result).isNull(); + } + + @Test + void testReadReturnsMultipleItems() throws Exception { + byte[] data1 = "item1\n".getBytes(); + byte[] data2 = "item2\n".getBytes(); + + // given + given(this.s3InputStream.read(any(byte[].class))) + .willAnswer((invocation) -> { + byte[] buffer = invocation.getArgument(0); + System.arraycopy(data1, 0, buffer, 0, data1.length); + return data1.length; + }); + given(this.mockDeserializer.deserialize(any(byte[].class))) + .willReturn("item1") + .willReturn("item2") + .willReturn(null); // No more items + given(this.s3InputStream.read(any(byte[].class))) + .willAnswer((invocation) -> { + byte[] buffer = invocation.getArgument(0); + System.arraycopy(data2, 0, buffer, 0, data2.length); + return data2.length; + }) + .willReturn(-1); // End of stream + S3ItemReader reader = new S3ItemReader<>(this.s3InputStream, this.mockDeserializer); + String result1 = reader.read(); + String result2 = reader.read(); + String result3 = reader.read(); + // then + + assertThat(result1).isEqualTo("item1"); + assertThat(result2).isEqualTo("item2"); + assertThat(result3).isNull(); + then(this.s3InputStream).should(times(2)).read(any(byte[].class)); + then(this.mockDeserializer).should(times(4)).deserialize(any(byte[].class)); + } + + @Test + void testReadReturnsMultipleItemsInSingleDeserialization() throws Exception { + byte[] data = "item1\nitem2\n".getBytes(); + + // given + given(this.s3InputStream.read(any(byte[].class))) + .willAnswer((invocation) -> { + byte[] buffer = invocation.getArgument(0); + System.arraycopy(data, 0, buffer, 0, data.length); + return data.length; + }).willAnswer((invocation) -> -1); + + given(this.mockDeserializer.deserialize(any(byte[].class))) + .willReturn(null) // buffer is empty + .willReturn("item1") + .willReturn("item2") + .willReturn(null); // End of stream + + S3ItemReader reader = new S3ItemReader<>(this.s3InputStream, this.mockDeserializer); + String result1 = reader.read(); + String result2 = reader.read(); + String result3 = reader.read(); + + // then + + assertThat(result1).isEqualTo("item1"); + assertThat(result2).isEqualTo("item2"); + assertThat(result3).isNull(); + then(this.s3InputStream).should(times(2)).read(any(byte[].class)); + then(this.mockDeserializer).should(times(4)).deserialize(any(byte[].class)); + } + +} diff --git a/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/S3ItemWriterTests.java b/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/S3ItemWriterTests.java new file mode 100644 index 00000000..744ba934 --- /dev/null +++ b/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/S3ItemWriterTests.java @@ -0,0 +1,101 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3; + +import java.io.IOException; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import org.springframework.batch.extensions.s3.serializer.S3Serializer; +import org.springframework.batch.extensions.s3.stream.S3MultipartOutputStream; +import org.springframework.batch.item.Chunk; +import org.springframework.batch.item.ItemStreamException; + +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.mockito.BDDMockito.given; +import static org.mockito.BDDMockito.then; +import static org.mockito.BDDMockito.willThrow; +import static org.mockito.Mockito.mock; + +class S3ItemWriterTests { + + private S3Serializer serializer; + + private S3MultipartOutputStream outputStream; + + @BeforeEach + void setUp() { + this.serializer = mock(S3Serializer.class); + this.outputStream = mock(S3MultipartOutputStream.class); + } + + @Test + void testWrite_success() throws Exception { + String item = "test"; + byte[] data = item.getBytes(); + // given + given(this.serializer.serialize(item)).willReturn(data); + + S3ItemWriter writer = new S3ItemWriter<>(this.outputStream, this.serializer); + Chunk chunk = Chunk.of(item); + + // when + writer.write(chunk); + + // then + then(this.serializer).should().serialize(item); + then(this.outputStream).should().write(data); + } + + @Test + void testWrite_throwsOnNullOrEmpty() { + String item = "bad"; + // given + given(this.serializer.serialize(item)).willReturn(null); + + S3ItemWriter writer = new S3ItemWriter<>(this.outputStream, this.serializer); + Chunk chunk = Chunk.of(item); + + // when/then + assertThatThrownBy(() -> writer.write(chunk)) + .isInstanceOf(IllegalArgumentException.class); + } + + @Test + void testClose_success() throws Exception { + S3ItemWriter writer = new S3ItemWriter<>(this.outputStream, this.serializer); + + // when + writer.close(); + + // then + then(this.outputStream).should().close(); + } + + @Test + void testClose_throwsItemStreamException() throws Exception { + // given + willThrow(new IOException("close error")).given(this.outputStream).close(); + S3ItemWriter writer = new S3ItemWriter<>(this.outputStream, this.serializer); + + // when/then + assertThatThrownBy(writer::close) + .isInstanceOf(ItemStreamException.class); + } + +} diff --git a/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/serializer/S3StringDeserializerTests.java b/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/serializer/S3StringDeserializerTests.java new file mode 100644 index 00000000..b6309f31 --- /dev/null +++ b/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/serializer/S3StringDeserializerTests.java @@ -0,0 +1,70 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3.serializer; + + +import java.nio.charset.StandardCharsets; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +class S3StringDeserializerTests { + + @Test + void testDeserializeSingleLine() { + S3StringDeserializer deserializer = new S3StringDeserializer(); + String input = "testString\n"; + String result = deserializer.deserialize(input.getBytes(StandardCharsets.UTF_8)); + assertThat(result).isEqualTo("testString"); + } + + @Test + void testDeserializeMultipleLines() { + S3StringDeserializer deserializer = new S3StringDeserializer(); + String input = "line1\nline2\n"; + String result1 = deserializer.deserialize(input.getBytes(StandardCharsets.UTF_8)); + assertThat(result1).isEqualTo("line1"); + String result2 = deserializer.deserialize(new byte[0]); + assertThat(result2).isEqualTo("line2"); + } + + @Test + void testDeserializeWithCarriageReturn() { + S3StringDeserializer deserializer = new S3StringDeserializer(); + String input = "line1\r\n"; + String result = deserializer.deserialize(input.getBytes(StandardCharsets.UTF_8)); + assertThat(result).isEqualTo("line1"); + } + + @Test + void testDeserializePartialInput() { + S3StringDeserializer deserializer = new S3StringDeserializer(); + String part1 = "partial"; + String part2 = "Line\n"; + assertThat(deserializer.deserialize(part1.getBytes(StandardCharsets.UTF_8))).isNull(); + String result = deserializer.deserialize(part2.getBytes(StandardCharsets.UTF_8)); + assertThat(result).isEqualTo("partialLine"); + } + + @Test + void testDeserializeEmptyInput() { + S3StringDeserializer deserializer = new S3StringDeserializer(); + assertThat(deserializer.deserialize("".getBytes(StandardCharsets.UTF_8))).isNull(); + } + +} diff --git a/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/serializer/S3StringSerializerTests.java b/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/serializer/S3StringSerializerTests.java new file mode 100644 index 00000000..f3a643f4 --- /dev/null +++ b/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/serializer/S3StringSerializerTests.java @@ -0,0 +1,62 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3.serializer; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatCode; + +class S3StringSerializerTests { + + @Test + void testSerialize() { + S3StringSerializer serializer = new S3StringSerializer(); + String input = "testString"; + byte[] result = serializer.serialize(input); + + String expected = "testString\n"; + assertThat(result).isEqualTo(expected.getBytes(java.nio.charset.StandardCharsets.UTF_8)); + } + + @Test + void testSerializeEmptyString() { + S3StringSerializer serializer = new S3StringSerializer(); + String input = ""; + byte[] result = serializer.serialize(input); + + String expected = "\n"; + assertThat(result).isEqualTo(expected.getBytes(java.nio.charset.StandardCharsets.UTF_8)); + } + + @Test + void testSerializeNull() { + S3StringSerializer serializer = new S3StringSerializer(); + assertThatCode(() -> serializer.serialize(null)).doesNotThrowAnyException(); + } + + @Test + void testSerializeWithSpecialCharacters() { + S3StringSerializer serializer = new S3StringSerializer(); + String input = "test\nstring\r\nwith special characters!@#$%^&*()"; + byte[] result = serializer.serialize(input); + + String expected = "test\nstring\r\nwith special characters!@#$%^&*()\n"; + assertThat(result).isEqualTo(expected.getBytes(java.nio.charset.StandardCharsets.UTF_8)); + } + +} diff --git a/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/stream/S3InputStreamTests.java b/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/stream/S3InputStreamTests.java new file mode 100644 index 00000000..cbf6541b --- /dev/null +++ b/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/stream/S3InputStreamTests.java @@ -0,0 +1,69 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3.stream; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.BDDMockito.given; +import static org.mockito.BDDMockito.then; + +class S3InputStreamTests { + + private S3Client s3Client; + + private final byte[] data = { 1, 2, 3, 4 }; + + @BeforeEach + void setUp() { + this.s3Client = Mockito.mock(S3Client.class); + } + + @Test + void testRead() throws IOException { + InputStream mockStream = new ByteArrayInputStream(this.data); + ResponseInputStream responseInputStream = new ResponseInputStream<>( + GetObjectResponse.builder().build(), mockStream); + // given + given(this.s3Client.getObject(any(GetObjectRequest.class))).willReturn(responseInputStream); + + String key = "test-key"; + String bucket = "test-bucket"; + // when + try (S3InputStream s3InputStream = new S3InputStream(this.s3Client, bucket, key)) { + for (byte b : this.data) { + assertThat(s3InputStream.read()).isEqualTo(b); + } + assertThat(s3InputStream.read()).isEqualTo(-1); + } + + // then + then(this.s3Client).should().getObject(any(GetObjectRequest.class)); + } + +} diff --git a/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/stream/S3MultipartOutputStreamTests.java b/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/stream/S3MultipartOutputStreamTests.java new file mode 100644 index 00000000..82419d5b --- /dev/null +++ b/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/stream/S3MultipartOutputStreamTests.java @@ -0,0 +1,90 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3.stream; + +import java.io.IOException; +import java.io.InputStream; +import java.util.concurrent.TimeUnit; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.ArgumentCaptor; +import software.amazon.awssdk.services.s3.S3Client; + +import org.springframework.util.unit.DataSize; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.BDDMockito.given; +import static org.mockito.BDDMockito.then; +import static org.mockito.Mockito.mock; + +class S3MultipartOutputStreamTests { + + private S3Client s3Client; + + private S3Uploader multipartUploadMock; + + @BeforeEach + void setUp() throws IOException { + this.s3Client = mock(S3Client.class); + this.multipartUploadMock = mock(S3Uploader.class); + + given(this.multipartUploadMock.upload(any())).willAnswer((invocation) -> { + TimeUnit.MILLISECONDS.sleep(100); // Simulate some delay for upload + return 1L; + }); + } + + @Test + void testWriteSingleByteTriggersUpload() throws IOException { + int testByte = 42; + + try (S3MultipartOutputStream out = new S3MultipartOutputStream(this.multipartUploadMock)) { + // when + out.write(testByte); + + ArgumentCaptor captor = ArgumentCaptor.forClass(InputStream.class); + + // then + then(this.multipartUploadMock).should().upload(captor.capture()); + assertThat(captor.getValue().available()).as("InputStream should contain one byte").isEqualTo(1); + } + } + + @Test + void testConstructorWithDefaultPartSize() throws IOException { + S3MultipartOutputStream out = new S3MultipartOutputStream(this.s3Client, "bucket", "key"); + out.close(); + } + + @Test + void testConstructorWithCustomPartSize() throws IOException { + int customPartSize = (int) DataSize.ofMegabytes(10).toBytes(); + var s3Uploader = new S3MultipartUploader(this.s3Client, "bucket", "key"); + s3Uploader.setPartSize(customPartSize); + S3MultipartOutputStream out = new S3MultipartOutputStream(s3Uploader); + out.close(); + } + + @Test + void testConstructorWithS3UploadOutputStream() throws IOException { + S3MultipartOutputStream out = new S3MultipartOutputStream(this.multipartUploadMock); + out.close(); + } + +} diff --git a/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/stream/S3MultipartUploaderTests.java b/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/stream/S3MultipartUploaderTests.java new file mode 100644 index 00000000..0d4cd7f5 --- /dev/null +++ b/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/stream/S3MultipartUploaderTests.java @@ -0,0 +1,101 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3.stream; + +import java.io.ByteArrayInputStream; +import java.io.IOException; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; + +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.BDDMockito.given; +import static org.mockito.BDDMockito.then; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; + +class S3MultipartUploaderTests { + + private S3Client s3Client; + + private S3MultipartUploader s3MultipartUploader; + + @BeforeEach + void setUp() { + this.s3Client = mock(S3Client.class); + var s3Uploader = new S3MultipartUploader(this.s3Client, "bucket", "key"); + s3Uploader.setPartSize(5); + this.s3MultipartUploader = s3Uploader; + } + + @Test + void testUpload_SuccessfulUpload() throws IOException { + byte[] data = "HelloWorld!".getBytes(); // 11 bytes, 3 parts, 2 of 5 bytes each and one of 1 byte + ByteArrayInputStream inputStream = new ByteArrayInputStream(data); + + // given + given(this.s3Client.createMultipartUpload(any(CreateMultipartUploadRequest.class))) + .willReturn(CreateMultipartUploadResponse.builder().uploadId("uploadId").build()); + + given(this.s3Client.uploadPart(any(UploadPartRequest.class), any(RequestBody.class))) + .willReturn(UploadPartResponse.builder().eTag("etag1").build(), + UploadPartResponse.builder().eTag("etag2").build(), + UploadPartResponse.builder().eTag("etag3").build()); + + given(this.s3Client.completeMultipartUpload(any(CompleteMultipartUploadRequest.class))) + .willReturn(CompleteMultipartUploadResponse.builder().location("url").build()); + + // when + this.s3MultipartUploader.upload(inputStream); + + // then + then(this.s3Client).should().createMultipartUpload(any(CreateMultipartUploadRequest.class)); + then(this.s3Client).should(times(3)).uploadPart(any(UploadPartRequest.class), any(RequestBody.class)); + then(this.s3Client).should().completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + then(this.s3Client).should().close(); + } + + @Test + void testUpload_AbortOnException() { + byte[] data = "HelloWorld".getBytes(); + ByteArrayInputStream inputStream = new ByteArrayInputStream(data); + + // given + given(this.s3Client.createMultipartUpload(any(CreateMultipartUploadRequest.class))) + .willReturn(CreateMultipartUploadResponse.builder().uploadId("uploadId").build()); + + given(this.s3Client.uploadPart(any(UploadPartRequest.class), any(RequestBody.class))) + .willThrow(new RuntimeException("Upload failed")); + + // when/then + assertThatThrownBy(() -> this.s3MultipartUploader.upload(inputStream)) + .isInstanceOf(RuntimeException.class); + then(this.s3Client).should().abortMultipartUpload(any(AbortMultipartUploadRequest.class)); + then(this.s3Client).should().close(); + } + +} diff --git a/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/stream/S3OutputStreamTests.java b/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/stream/S3OutputStreamTests.java new file mode 100644 index 00000000..e62f6f6d --- /dev/null +++ b/spring-batch-s3/src/test/java/org/springframework/batch/extensions/s3/stream/S3OutputStreamTests.java @@ -0,0 +1,58 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.batch.extensions.s3.stream; + +import java.io.IOException; +import java.util.function.Consumer; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.S3Client; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.timeout; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +class S3OutputStreamTests { + + private S3Client s3Client; + + @BeforeEach + void setUp() { + this.s3Client = mock(S3Client.class); + } + + @Test + void testWriteAndUpload() throws IOException, InterruptedException { + byte[] data = { 10, 20, 30, 40 }; + doReturn(null).when(this.s3Client).putObject(any(Consumer.class), any(RequestBody.class)); + + String bucket = "test-bucket"; + String key = "test-key"; + try (S3OutputStream out = new S3OutputStream(this.s3Client, bucket, key)) { + out.write(data); + } + + verify(this.s3Client, timeout(200)).putObject(any(Consumer.class), any(RequestBody.class)); + verify(this.s3Client, times(1)).putObject(any(Consumer.class), any(RequestBody.class)); + } + +}