lex(String input) throws LexingException;
+
+ /**
+ * Thrown when the input cannot be tokenised according to the Web Link lexical rules.
+ */
+ class LexingException extends RuntimeException {
+
+ public LexingException(String message) {
+ super(message);
+ }
+
+ public LexingException(String message, Throwable cause) {
+ super(message, cause);
+ }
+ }
+
+}
diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkParameter.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkParameter.java
new file mode 100644
index 000000000..0d3c6fec9
--- /dev/null
+++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkParameter.java
@@ -0,0 +1,71 @@
+package life.qbic.datamanager.signposting.http;
+
+/**
+ * A parameter for the HTTP Link header attribute.
+ *
+ * Based on RFC 8288, a parameter with only a name is valid.
+ *
+ *
+ * {@code
+ * // ABNF notation for web links
+ * Link = #link-value
+ * link-value = "<" URI-Reference ">" *( OWS ";" OWS link-param )
+ * link-param = token BWS [ "=" BWS ( token / quoted-string ) ]
+ *
+ * // valid parameter examples
+ * "Link: ; rel; param1;"
+ * "Link: ; rel="self"; param1="";"
+ * }
+ *
+ *
+ * It is important that different parameter serialisation cases are handled correctly.
+ *
+ * The following example shows three distinct cases that must be preserved during de-serialisation:
+ *
+ *
+ * {@code
+ * x="" // empty double-quoted string
+ * x="y" // double-quoted with content
+ * x=y // token value
+ * x // parameter name only
+ * }
+ *
+ *
+ * These are all valid parameter serialisations.
+ *
+ *
+ */
+public record WebLinkParameter(String name, String value) {
+
+ /**
+ * Creates a new web link parameter with the provided name and value.
+ *
+ * @param name the name of the web link parameter
+ * @param value the value of the web link parameter
+ */
+ public static WebLinkParameter create(String name, String value) {
+ return new WebLinkParameter(name, value);
+ }
+
+ /**
+ * Creates a new web link parameter without a value.
+ *
+ * @param name the name of the parameter
+ */
+ public static WebLinkParameter withoutValue(String name) {
+ return new WebLinkParameter(name, null);
+ }
+
+ /**
+ * Checks if the web link parameter has a value.
+ *
+ * The method will return {@code true} only when a value (including an empty one) has been
+ * provided.
+ *
+ * @return {@code true}, if the parameter has a value (including an empty one). Returns
+ * {@code false}, if no value has been provided
+ */
+ public boolean hasValue() {
+ return value != null;
+ }
+}
diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkParser.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkParser.java
new file mode 100644
index 000000000..007828529
--- /dev/null
+++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkParser.java
@@ -0,0 +1,59 @@
+package life.qbic.datamanager.signposting.http;
+
+import java.util.List;
+import life.qbic.datamanager.signposting.http.lexing.WebLinkToken;
+import life.qbic.datamanager.signposting.http.parsing.RawLinkHeader;
+
+/**
+ * A parser that checks structural integrity of an HTTP Link header entry in compliance with RFC 8288.
+ *
+ * A web link parser is able to process tokens from web link lexing and convert the tokens to raw
+ * link headers after structural validation, which can be seen as an AST (abstract syntax tree).
+ *
+ * Note: Implementations must not perform semantic validation, this is concern of
+ * {@link WebLinkValidator} implementations.
+ *
+ * In case of structural violations, implementations of the {@link WebLinkParser} interface must
+ * throw a {@link StructureException}.
+ *
+ * RFC 8288 section 3 describes the serialization of the Link HTTP header attribute:
+ *
+ *
+ * {@code
+ * Link = #link-value
+ * link-value = "<" URI-Reference ">" *( OWS ";" OWS link-param )
+ * link-param = token BWS [ "=" BWS ( token / quoted-string ) ]
+ * }
+ *
+ *
+ * The {@link WebLinkParser} interface can process {@link WebLinkToken}, which are the output of
+ * lexing raw character values into known token values. See {@link WebLinkLexer} for details to
+ * lexers.
+ */
+public interface WebLinkParser {
+
+ /**
+ * Parses a list of {@link WebLinkToken} and performs structural validation based on the RFC 8288
+ * serialisation requirement.
+ *
+ * The returned value is an AST of a raw link header with a list of raw web link items that can be
+ * used for semantic validation.
+ *
+ * @param tokens a list of web link tokens to process
+ * @return a raw link header parsed from the web link tokens
+ * @throws NullPointerException if the token list is {@code null}
+ * @throws StructureException if any structural violation occurred
+ */
+ RawLinkHeader parse(List tokens) throws NullPointerException, StructureException;
+
+ /**
+ * Indicates a structural violation of the RFC 8288 web link serialisation requirement.
+ */
+ class StructureException extends RuntimeException {
+
+ public StructureException(String message) {
+ super(message);
+ }
+ }
+}
diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkProcessor.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkProcessor.java
new file mode 100644
index 000000000..9aa524ede
--- /dev/null
+++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkProcessor.java
@@ -0,0 +1,210 @@
+package life.qbic.datamanager.signposting.http;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+import life.qbic.datamanager.signposting.http.WebLinkLexer.LexingException;
+import life.qbic.datamanager.signposting.http.WebLinkParser.StructureException;
+import life.qbic.datamanager.signposting.http.WebLinkValidator.Issue;
+import life.qbic.datamanager.signposting.http.WebLinkValidator.IssueReport;
+import life.qbic.datamanager.signposting.http.WebLinkValidator.ValidationResult;
+import life.qbic.datamanager.signposting.http.lexing.SimpleWebLinkLexer;
+import life.qbic.datamanager.signposting.http.parsing.SimpleWebLinkParser;
+import life.qbic.datamanager.signposting.http.validation.Rfc8288WebLinkValidator;
+
+/**
+ * Configurable processor for raw web link strings from the HTTP Link header field.
+ *
+ * The underlying standard is RFC 8288
+ *
+ */
+public class WebLinkProcessor {
+
+ private final WebLinkLexer lexer;
+ private final WebLinkParser parser;
+ private final List validators;
+
+ private WebLinkProcessor() {
+ this.lexer = null;
+ this.parser = null;
+ this.validators = null;
+ }
+
+ private WebLinkProcessor(
+ WebLinkLexer selectedLexer,
+ WebLinkParser selectedParser,
+ List selectedValidators) {
+ this.lexer = Objects.requireNonNull(selectedLexer);
+ this.parser = Objects.requireNonNull(selectedParser);
+ this.validators = List.copyOf(Objects.requireNonNull(selectedValidators));
+ }
+
+ /**
+ * Processes a raw link header string and returns a validation result with the final web links and
+ * an issue report.
+ *
+ * The processor performs different steps until the validation result returns:
+ *
+ *
+ * - Tokenization: the raw string gets translated into enumerated token values
+ * - Parsing: the token collection gets structurally parsed and checked, the result is an AST of raw link values
+ * - Validation: one or more validation steps to semantically check the raw web links
+ *
+ *
+ * The caller is advised to check the {@link ValidationResult#report()} in case issues have been recorded.
+ *
+ * By contract of the validation interface, validators MUST record issues as errors in case there are severe semantically
+ * deviations from the model the validator represents. Warnings can be investigated, but clients
+ * can expect to continue to use the returned web links.
+ *
+ * @param rawLinkHeader the serialized raw link header value
+ * @return a validation result with the web links and an issue report with recorded findings of
+ * warnings and errors.
+ * @throws LexingException in case the header contains invalid characters (during
+ * tokenizing)
+ * @throws StructureException in case the header does not have the expected structure (during
+ * parsing)
+ * @throws NullPointerException in case the raw link header is {@code null}
+ */
+ public ValidationResult process(String rawLinkHeader)
+ throws LexingException, StructureException, NullPointerException {
+ var header = Objects.requireNonNull(rawLinkHeader);
+ var tokenizedHeader = lexer.lex(header);
+ var parsedHeader = parser.parse(tokenizedHeader);
+
+ var aggregatedIssues = new ArrayList();
+ ValidationResult cachedValidationResult = null;
+ for (WebLinkValidator validator : validators) {
+ cachedValidationResult = validator.validate(parsedHeader);
+ aggregatedIssues.addAll(cachedValidationResult.report().issues());
+ }
+
+ if (cachedValidationResult == null) {
+ throw new IllegalStateException(
+ "No validation result was found after processing: " + rawLinkHeader);
+ }
+
+ return new ValidationResult(cachedValidationResult.weblinks(),
+ new IssueReport(aggregatedIssues));
+ }
+
+ /**
+ * Builder for a {@link WebLinkProcessor}.
+ *
+ * The builder allows for flexible configuration of the different processing steps:
+ *
+ *
+ * - Tokenization: the raw string gets translated into enumerated token values
+ * - Parsing: the token collection gets structurally parsed and checked, the result is an AST of raw link values
+ * - Validation: one or more validation steps to semantically check the raw web links
+ *
+ *
+ * It is possible to create a default processor by simply omitting any configuration:
+ *
+ *
+ * {@code
+ * // Creates a processor with default configuration
+ * WebLinkProcessor defaultProcessor = new Builder.build()
+ * }
+ *
+ *
+ * The default components are:
+ *
+ *
+ * - lexer: {@link SimpleWebLinkLexer}
+ * - parser: {@link SimpleWebLinkParser}
+ * - validator: {@link Rfc8288WebLinkValidator}
+ *
+ *
+ * The RFC 8282 validator will only be used if no validator has been provided. If you want
+ * to combine the RFC validator with additional ones, you can do so:
+ *
+ *
+ * {@code
+ *
+ * WebLinkProcessor customProcessor =
+ * new Builder.withValidator(Rfc8288WebLinkValidator.create())
+ * .withValidator(new MyCustomValidator())
+ * .build()
+ * }
+ *
+ */
+ public static class Builder {
+
+ private WebLinkLexer configuredLexer;
+
+ private WebLinkParser configuredParser;
+
+ private final List configuredValidators = new ArrayList<>();
+
+ /**
+ * Configures a different lexer from the default that shall be used in the processing.
+ *
+ * @param lexer the lexer to be used in the processing
+ * @return the builder instance
+ */
+ public Builder withLexer(WebLinkLexer lexer) {
+ configuredLexer = lexer;
+ return this;
+ }
+
+ /**
+ * Configures a different lexer from the default that shall be used in the processing.
+ *
+ * @param lexer the lexer to be used in the processing
+ * @return the builder instance
+ */
+ public Builder withParser(WebLinkParser parser) {
+ configuredParser = parser;
+ return this;
+ }
+
+ /**
+ * Configures a different lexer from the default that shall be used in the processing.
+ *
+ * Multiple validators can be configured by calling this method repeatedly. The validators are
+ * called in the order they have been configured on the builder.
+ *
+ *
+ * {@code
+ * var processor = Builder.withValidator(first) // first validator
+ * .withValidator(other) // appends next validator
+ * .build()
+ * }
+ *
+ *
+ * @param validator the validator to be used in the processing
+ * @return the builder instance
+ */
+ public Builder withValidator(WebLinkValidator validator) {
+ configuredValidators.add(validator);
+ return this;
+ }
+
+ /**
+ * Creates instance of a web link processor object based on the configuration.
+ *
+ * @return the configured web link processor
+ */
+ public WebLinkProcessor build() {
+ var selectedLexer = configuredLexer == null ? defaultLexer() : configuredLexer;
+ var selectedParser = configuredParser == null ? defaultParser() : configuredParser;
+ var selectedValidators =
+ configuredValidators.isEmpty() ? List.of(defaultValidator()) : configuredValidators;
+
+ return new WebLinkProcessor(selectedLexer, selectedParser, selectedValidators);
+ }
+
+ private WebLinkParser defaultParser() {
+ return SimpleWebLinkParser.create();
+ }
+
+ private static WebLinkLexer defaultLexer() {
+ return SimpleWebLinkLexer.create();
+ }
+
+ private static WebLinkValidator defaultValidator() {
+ return Rfc8288WebLinkValidator.create();
+ }
+ }
+}
diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkTokenType.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkTokenType.java
new file mode 100644
index 000000000..4b1ac1472
--- /dev/null
+++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkTokenType.java
@@ -0,0 +1,53 @@
+package life.qbic.datamanager.signposting.http;
+
+/**
+ * Enumeration for being used to describe different token types for the
+ */
+public enum WebLinkTokenType {
+
+ /**
+ * "<"
+ */
+ LT,
+
+ /**
+ * ">"
+ */
+ GT,
+
+ /**
+ * ";"
+ */
+ SEMICOLON,
+
+ /**
+ * "="
+ */
+ EQUALS,
+
+ /**
+ * ","
+ */
+ COMMA,
+
+ /**
+ * A URI-Reference between "<" and ">". The angle brackets themselves are represented by LT and GT
+ * tokens.
+ */
+ URI,
+
+ /**
+ * An unquoted token (e.g. parameter name, token value).
+ */
+ IDENT,
+
+ /**
+ * A quoted-string value without the surrounding quotes.
+ */
+ QUOTED,
+
+ /**
+ * End-of-input marker.
+ */
+ EOF
+}
diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkValidator.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkValidator.java
new file mode 100644
index 000000000..9b5f141d1
--- /dev/null
+++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkValidator.java
@@ -0,0 +1,117 @@
+package life.qbic.datamanager.signposting.http;
+
+import java.util.List;
+import life.qbic.datamanager.signposting.http.parsing.RawLinkHeader;
+
+/**
+ * Performs validation of raw web link headers.
+ *
+ * Validator are expected to consume output of a {@link WebLinkParser} and convert the web link
+ * information into reusable web link objects.
+ *
+ * Implementations of the {@link WebLinkValidator} interface must perform semantic validation only.
+ *
+ * Implementations also must not interrupt the validation on violations but provide the information
+ * in the attached {@link IssueReport} of the {@link ValidationResult}.
+ */
+public interface WebLinkValidator {
+
+ /**
+ * Validates the given raw link header against the semantic integrity of the validator type.
+ *
+ * Violations on the semantic level must be recorded in the returned issue list with type
+ * {@link IssueType#ERROR}. In the presence of any error, at least one web link entry is faulty
+ * and appropriate error handling is advised.
+ *
+ * Warnings shall indicate less strict deviations of the specification and must result in usable
+ * web link objects. If no errors are provided, the client must be able to be safely continue to
+ * use the web link object in the semantic scope that the validator guarantees.
+ *
+ * The implementation MUST NOT interrupt the validation in case any error is recorded. Validation
+ * shall always complete successfully and the method return the validation result.
+ *
+ * @param rawLinkHeader the raw link header
+ * @return the validation result with a list of web link objects and an {@link IssueReport}.
+ * @throws NullPointerException if the raw link header is {@code null}
+ */
+ ValidationResult validate(RawLinkHeader rawLinkHeader) throws NullPointerException;
+
+ /**
+ * A summary of the validation with the final web links for further use and an issue report with
+ * validation warnings or violations.
+ *
+ * @param weblinks a collection of web links that have been converted from validation
+ * @param report a container for recorded issues during validation
+ */
+ record ValidationResult(List weblinks, IssueReport report) {
+
+ public ValidationResult {
+ weblinks = List.copyOf(weblinks);
+ }
+
+ public boolean containsIssues() {
+ return !report.isEmpty();
+ }
+ }
+
+ /**
+ * A container for recorded issues during validation.
+ *
+ * @param issues the issues found during validation
+ */
+ record IssueReport(List issues) {
+
+ public boolean hasErrors() {
+ return issues.stream().anyMatch(Issue::isError);
+ }
+
+ public boolean hasWarnings() {
+ return issues.stream().anyMatch(Issue::isWarning);
+ }
+
+ public boolean isEmpty() {
+ return issues.isEmpty();
+ }
+ }
+
+ /**
+ * Describes any deviations from a semantic model either as warning or error.
+ *
+ * @param message a descriptive message that helps clients to process the issue
+ * @param type the severity level of the issue. {@link IssueType#ERROR} shall be used to
+ * indicate serious violations from the semantic model that would lead to wrong
+ * interpretation by the client. For less severe deviations the
+ * {@link IssueType#WARNING} can be used.
+ */
+ record Issue(String message, IssueType type) {
+
+ public static Issue warning(String message) {
+ return new Issue(message, IssueType.WARNING);
+ }
+
+ public static Issue error(String message) {
+ return new Issue(message, IssueType.ERROR);
+ }
+
+ public boolean isWarning() {
+ return type.equals(IssueType.WARNING);
+ }
+
+ public boolean isError() {
+ return type.equals(IssueType.ERROR);
+ }
+ }
+
+ /**
+ * An enumeration of different issue types.
+ *
+ *
+ * - ERROR - Deviation from the semantic level that brakes interpretation, a specification or contract
+ * - WARNING - Deviation from the semantic level that does not brake interpretation, specification or a contract
+ *
+ */
+ enum IssueType {
+ WARNING,
+ ERROR
+ }
+}
diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/lexing/SimpleWebLinkLexer.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/lexing/SimpleWebLinkLexer.java
new file mode 100644
index 000000000..4247fbaa5
--- /dev/null
+++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/lexing/SimpleWebLinkLexer.java
@@ -0,0 +1,206 @@
+package life.qbic.datamanager.signposting.http.lexing;
+
+import java.util.ArrayList;
+import java.util.List;
+import life.qbic.datamanager.signposting.http.WebLinkLexer;
+import life.qbic.datamanager.signposting.http.WebLinkTokenType;
+
+/**
+ * Simple scanning lexer for RFC 8288 Web Link serialisations.
+ *
+ * This implementation:
+ *
+ * - Skips ASCII whitespace (OWS/BWS) between tokens
+ * - Treats URIs as everything between "<" and ">"
+ * - Treats unquoted tokens as IDENT
+ * - Produces QUOTED tokens for quoted-string values (without the quotes)
+ * - Emits an EOF token at the end of input
+ *
+ *
+ * Parsing and semantic validation are handled by later stages.
+ */
+public final class SimpleWebLinkLexer implements WebLinkLexer {
+
+ private SimpleWebLinkLexer() {}
+
+ public static SimpleWebLinkLexer create() {
+ return new SimpleWebLinkLexer();
+ }
+
+
+ @Override
+ public List lex(String input) throws LexingException {
+ return new Scanner(input).scan();
+ }
+
+ /**
+ * Internal scanner doing a single left-to-right pass over the input.
+ */
+ private static final class Scanner {
+
+ private final String input;
+ private final int length;
+ private int pos = 0;
+
+ private final List tokens = new ArrayList<>();
+
+ Scanner(String input) {
+ this.input = input != null ? input : "";
+ this.length = this.input.length();
+ }
+
+ List scan() {
+ while (!eof()) {
+ char c = peek();
+
+ if (isWhitespace(c)) {
+ consumeWhitespace();
+ continue;
+ }
+
+ int start = pos;
+
+ switch (c) {
+ case '<' -> readUri(start);
+ case '>' -> {
+ advance();
+ tokens.add(WebLinkToken.of(WebLinkTokenType.GT, ">", start));
+ }
+ case ';' -> {
+ advance();
+ tokens.add(WebLinkToken.of(WebLinkTokenType.SEMICOLON, ";", start));
+ }
+ case '=' -> {
+ advance();
+ tokens.add(WebLinkToken.of(WebLinkTokenType.EQUALS, "=", start));
+ }
+ case ',' -> {
+ advance();
+ tokens.add(WebLinkToken.of(WebLinkTokenType.COMMA, ",", start));
+ }
+ case '"' -> readQuoted(start);
+ default -> readIdent(start);
+ }
+ }
+
+ tokens.add(WebLinkToken.of(WebLinkTokenType.EOF, "", pos));
+ return tokens;
+ }
+
+ /**
+ * Reads a URI-Reference between "<" and ">". Emits three tokens: LT, URI, GT.
+ */
+ private void readUri(int start) {
+ // consume "<"
+ advance();
+ tokens.add(WebLinkToken.of(WebLinkTokenType.LT, "<", start));
+
+ int uriStart = pos;
+
+ while (!eof()) {
+ char c = peek();
+ if (c == '>') {
+ break;
+ }
+ advance();
+ }
+
+ if (eof()) {
+ throw new LexingException(
+ "Unterminated URI reference: missing '>' for '<' at position " + start);
+ }
+
+ String uriText = input.substring(uriStart, pos);
+ tokens.add(WebLinkToken.of(WebLinkTokenType.URI, uriText, uriStart));
+
+ // consume ">"
+ int gtPos = pos;
+ advance();
+ tokens.add(WebLinkToken.of(WebLinkTokenType.GT, ">", gtPos));
+ }
+
+ /**
+ * Reads a quoted-string, without including the surrounding quotes. Does not yet handle escape
+ * sequences; that can be extended later.
+ */
+ private void readQuoted(int start) {
+ // consume opening quote
+ advance();
+
+ int contentStart = pos;
+
+ while (!eof()) {
+ char c = peek();
+ if (c == '"') {
+ break;
+ }
+ // TODO: handle quoted-pair / escaping if needed
+ advance();
+ }
+
+ if (eof()) {
+ throw new LexingException(
+ "Unterminated quoted-string starting at position " + start);
+ }
+
+ String content = input.substring(contentStart, pos);
+
+ // consume closing quote
+ advance();
+
+ tokens.add(WebLinkToken.of(WebLinkTokenType.QUOTED, content, contentStart));
+ }
+
+ /**
+ * Reads an unquoted token (IDENT) until a delimiter or whitespace is reached.
+ */
+ private void readIdent(int start) {
+ while (!eof()) {
+ char c = peek();
+ if (isDelimiter(c) || isWhitespace(c)) {
+ break;
+ }
+ advance();
+ }
+
+ String text = input.substring(start, pos);
+ if (!text.isEmpty()) {
+ tokens.add(WebLinkToken.of(WebLinkTokenType.IDENT, text, start));
+ }
+ }
+
+ private void consumeWhitespace() {
+ while (!eof() && isWhitespace(peek())) {
+ advance();
+ }
+ }
+
+ private boolean isWhitespace(char c) {
+ // OWS/BWS: space or horizontal tab are most important;
+ // here we also accept CR/LF defensively.
+ return c == ' ' || c == '\t' || c == '\r' || c == '\n';
+ }
+
+ /**
+ * Characters that delimit IDENT tokens.
+ */
+ private boolean isDelimiter(char c) {
+ return switch (c) {
+ case '<', '>', ';', '=', ',', '"' -> true;
+ default -> false;
+ };
+ }
+
+ private boolean eof() {
+ return pos >= length;
+ }
+
+ private char peek() {
+ return input.charAt(pos);
+ }
+
+ private void advance() {
+ pos++;
+ }
+ }
+}
diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/lexing/WebLinkToken.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/lexing/WebLinkToken.java
new file mode 100644
index 000000000..dc6747aae
--- /dev/null
+++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/lexing/WebLinkToken.java
@@ -0,0 +1,25 @@
+package life.qbic.datamanager.signposting.http.lexing;
+
+import life.qbic.datamanager.signposting.http.WebLinkTokenType;
+
+/**
+ * Single token produced by a WebLinkLexer.
+ *
+ * @param type the token type
+ * @param text the raw text content for this token (without decorations like quotes)
+ * @param position the zero-based character offset in the input where this token starts
+ */
+public record WebLinkToken(
+ WebLinkTokenType type,
+ String text,
+ int position) {
+
+ public static WebLinkToken of(WebLinkTokenType type, String text, int position) {
+ return new WebLinkToken(type, text, position);
+ }
+
+ @Override
+ public String toString() {
+ return type + "('" + text + "' @" + position + ")";
+ }
+}
diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/RawLink.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/RawLink.java
new file mode 100644
index 000000000..7d9ff7ddb
--- /dev/null
+++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/RawLink.java
@@ -0,0 +1,14 @@
+package life.qbic.datamanager.signposting.http.parsing;
+
+import java.util.List;
+
+/**
+ *
+ *
+ *
+ *
+ * @since
+ */
+public record RawLink(String rawURI, List rawParameters) {
+
+}
diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/RawLinkHeader.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/RawLinkHeader.java
new file mode 100644
index 000000000..fa5036fb5
--- /dev/null
+++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/RawLinkHeader.java
@@ -0,0 +1,14 @@
+package life.qbic.datamanager.signposting.http.parsing;
+
+import java.util.List;
+
+/**
+ *
+ *
+ *
+ *
+ * @since
+ */
+public record RawLinkHeader(List rawLinks) {
+
+}
diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/RawParam.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/RawParam.java
new file mode 100644
index 000000000..3abdfac93
--- /dev/null
+++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/RawParam.java
@@ -0,0 +1,43 @@
+package life.qbic.datamanager.signposting.http.parsing;
+
+/**
+ *
+ *
+ *
+ *
+ * @since
+ */
+public record RawParam(String name, String value) {
+
+ /**
+ * Creates an withoutValue raw parameter, that only has a name.
+ *
+ * A call to {@link #value()} will return {@code null} for withoutValue parameters.
+ *
+ * @param name the name of the parameter
+ * @return an withoutValue raw parameter with a name only
+ */
+ public static RawParam emptyParameter(String name) {
+ return new RawParam(name, null);
+ }
+
+ /**
+ * Creates a raw parameter with name and value.
+ *
+ * The client must not pass withoutValue or blank values as parameter value, but shall call
+ * {@link #emptyParameter(String)} explicitly. Alternatively, the client can also pass
+ * {@code null} for value, to indicate an withoutValue parameter.
+ *
+ * @param name the name of the parameter
+ * @param value the value of the parameter
+ * @return a raw parameter
+ * @throws IllegalArgumentException in case the value is withoutValue or blank
+ */
+ public static RawParam withValue(String name, String value) throws IllegalArgumentException {
+ if (value != null && value.isBlank()) {
+ throw new IllegalArgumentException("Value cannot be blank");
+ }
+ return new RawParam(name, value);
+ }
+
+}
diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/SimpleWebLinkParser.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/SimpleWebLinkParser.java
new file mode 100644
index 000000000..0f526a592
--- /dev/null
+++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/SimpleWebLinkParser.java
@@ -0,0 +1,312 @@
+package life.qbic.datamanager.signposting.http.parsing;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Objects;
+import life.qbic.datamanager.signposting.http.WebLinkParser;
+import life.qbic.datamanager.signposting.http.lexing.WebLinkToken;
+import life.qbic.datamanager.signposting.http.WebLinkTokenType;
+
+/**
+ * Parses serialized information used in Web Linking as described in RFC 8288.
+ *
+ * The implementation is based on the Link Serialisation in HTTP Headers, section 3 of the
+ * RFC 8288.
+ *
+ * Note: the implementation of this class is NOT thread-safe.
+ *
+ *
+ *
+ * Link = #link-value
link-value = "<" URI-Reference ">" *( OWS ";" OWS link-param )
+ * link-param = token BWS [ "=" BWS ( token / quoted-string ) ]
+ *
+ *
+ */
+public class SimpleWebLinkParser implements WebLinkParser {
+
+ private int currentPosition = 0;
+
+ private List tokens;
+
+ private SimpleWebLinkParser() {
+ }
+
+ /**
+ * Creates a new SimpleWebLinkParser object instance.
+ *
+ * @return the new SimpleWebLinkParser
+ */
+ public static SimpleWebLinkParser create() {
+ return new SimpleWebLinkParser();
+ }
+
+
+ /**
+ * Parses a list of lexed web link tokens to a raw link header value. The parser only performs
+ * structural validation, not semantic validation.
+ *
+ * The template for structural validation is the serialisation description in ABNF for RFC 8288
+ * Section 3.
+ *
+ *
+ * Parser contract:
+ *
+ *
+ * - The token list must contain an EOF token
+ * - The last token item must be an EOF token, based on ascending sorting by position
+ *
+ *
+ * In case the contract is violated, a structure exception is thrown.
+ *
+ * @param tokens a list of tokens to parse as raw web link header
+ * @return a raw web link header, structurally validated against RFC 8288
+ * @throws NullPointerException if the tokens list is {@code null}
+ * @throws StructureException if the tokens violate the structure of a valid web link token
+ */
+ @Override
+ public RawLinkHeader parse(List tokens)
+ throws NullPointerException, StructureException {
+ Objects.requireNonNull(tokens);
+
+ if (tokens.isEmpty()) {
+ throw new StructureException(
+ "A link header entry must have at least one web link. Tokens were withoutValue.");
+ }
+
+ // Always reset the internal state on every parse() call
+ reset();
+
+ this.tokens = tokens.stream()
+ .sorted(Comparator.comparingInt(WebLinkToken::position))
+ .toList();
+
+ // Validate contract
+ ensureEOF("Lexer did not append EOF token");
+
+ if (this.tokens.get(currentPosition).type() == WebLinkTokenType.EOF) {
+ throw new StructureException(
+ "A link header entry must have at least one web link. Tokens started with EOF.");
+ }
+
+ var collectedLinks = new ArrayList();
+
+ var parsedLink = parseLinkValue();
+ collectedLinks.add(parsedLink);
+ // While there is ',' (COMMA) present, parse another link value
+ while (current().type() == WebLinkTokenType.COMMA) {
+ next();
+ if (currentIsEof()) {
+ throw new StructureException(
+ "Unexpected trailing comma: expected another link-value after ','.");
+ }
+ collectedLinks.add(parseLinkValue());
+ }
+
+ // Last consumed token must be always EOF to ensure that the token stream has been consumed
+ expectCurrent(WebLinkTokenType.EOF);
+
+ return new RawLinkHeader(collectedLinks);
+ }
+
+ /**
+ * Resets the internal state of the parser instance
+ */
+ private void reset() {
+ currentPosition = 0;
+ }
+
+ /**
+ * Checks if the last token in the token list is an EOF token. To keep the parser robust and
+ * simple, this is part of the contract and the parser shall fail early if the contract is
+ * violated.
+ *
+ * @param errorMessage the message to provide in the exception
+ * @throws IllegalStateException if the last token of the list ist not an EOF token
+ */
+ private void ensureEOF(String errorMessage) throws IllegalStateException {
+ if (tokens.getLast().type() != WebLinkTokenType.EOF) {
+ throw new IllegalStateException(errorMessage);
+ }
+ }
+
+ /**
+ * Parses a single web link value, which must contain a target (URI). Optionally, the web link can
+ * have one or more parameters.
+ *
+ * If the target has a trailing ',' (COMMA), no further parameters are expected.
+ *
+ * The correctness of the parameter structure with a precedent ';' (SEMICOLON) after the target is
+ * concern of the {@link #parseParameters()} method, since it is part of the parameter list
+ * description.
+ *
+ * @return a raw web link value with target and optionally one or more parameters
+ */
+ private RawLink parseLinkValue() {
+ var parsedLinkValue = parseUriReference();
+ if (current().type() != WebLinkTokenType.COMMA) {
+ return new RawLink(parsedLinkValue, parseParameters());
+ }
+ return new RawLink(parsedLinkValue, List.of());
+ }
+
+ /**
+ * Parses parameters beginning from the current token position (inclusive).
+ *
+ * Based on the serialisation description of RFC 8288 for link-values, params must have a
+ * precedent ';' (SEMICOLON). If the start position on method call is not a semicolon, an
+ * exception will be thrown.
+ *
+ * In case the link-value has no parameters at all (e.g. multiple web links with targets (URI)
+ * only), this method should not be called in the first place.
+ *
+ * @return a list of raw parameters with param name and value
+ */
+ private List parseParameters() {
+ var parameters = new ArrayList();
+ if (currentIsEof()) {
+ return parameters;
+ }
+ // expected separator for a parameter entry is ';' (semicolon) based on RFC 8288 section 3
+ expectCurrent(WebLinkTokenType.SEMICOLON);
+ next();
+
+ // now one or more parameters can follow
+ while (current().type() != WebLinkTokenType.COMMA) {
+ RawParam parameter = parseParameter();
+ parameters.add(parameter);
+ // If the current token is no ';' (SEMICOLON), no additional parameters are expected
+ if (current().type() != WebLinkTokenType.SEMICOLON) {
+ break;
+ }
+ next();
+ }
+ return parameters;
+ }
+
+ private RawParam parseParameter() throws StructureException {
+ expectCurrent(WebLinkTokenType.IDENT);
+ var paramName = current().text();
+
+ next();
+
+ // Checks for withoutValue parameter
+ if (currentIsEof()
+ || current().type() == WebLinkTokenType.COMMA
+ || current().type() == WebLinkTokenType.SEMICOLON
+ ) {
+ return RawParam.emptyParameter(paramName);
+ }
+
+ // Next token must be "=" (equals)
+ // RFC 8288: token BWS [ "=" BWS (token / quoted-string ) ]
+ expectCurrent(WebLinkTokenType.EQUALS);
+
+ next();
+
+ expectCurrentAny(WebLinkTokenType.IDENT, WebLinkTokenType.QUOTED);
+ var rawParamValue = current().text();
+
+ next();
+
+ return RawParam.withValue(paramName, rawParamValue);
+ }
+
+ /**
+ * Evaluates if the current token is an EOF token.
+ *
+ * @return {@code true}, if the current token is an EOF token, else {@code false}
+ */
+ private boolean currentIsEof() {
+ return current().type() == WebLinkTokenType.EOF;
+ }
+
+ /**
+ * Checks the current token and throws an exception, if it is not of the expected type.
+ *
+ * @param token the expected token
+ * @throws StructureException if the current token does not match the expected one
+ */
+ private void expectCurrent(WebLinkTokenType token) throws StructureException {
+ if (current().type() != token) {
+ throw new StructureException(
+ "Expected %s but found %s('%s') at position %d".formatted(token, current().type(),
+ current().text(), current().position()));
+ }
+ }
+
+ /**
+ * Checks if the current token matches any (at least one) expected token.
+ *
+ * If no expected type is provided, the method will throw a
+ * {@link life.qbic.datamanager.signposting.http.WebLinkParser.StructureException}.
+ *
+ * @param expected zero or more expected token types.
+ * @throws StructureException if the current token does not match any expected token
+ */
+ private void expectCurrentAny(WebLinkTokenType... expected) throws StructureException {
+ var matches = Arrays.stream(expected)
+ .anyMatch(type -> type.equals(current().type()));
+
+ if (!matches) {
+ var expectedNames = Arrays.stream(expected)
+ .map(Enum::name)
+ .reduce((a, b) -> a + ", " + b)
+ .orElse("");
+ throw new StructureException(
+ "Expected any of [%s] but found %s('%s') at position %d"
+ .formatted(expectedNames, current().type(), current().text(), current().position()));
+ }
+ }
+
+ /**
+ * Will use the token from the current position with {@link this#current()} and try to parse the
+ * raw URI value. After successful return the current position is advanced to the next token in
+ * the list.
+ *
+ * @return the raw value of the URI
+ */
+ private String parseUriReference() {
+ var uriValue = "";
+
+ // URI value must start with '<'
+ expectCurrent(WebLinkTokenType.LT);
+ next();
+
+ // URI reference expected
+ expectCurrent(WebLinkTokenType.URI);
+ uriValue = current().text();
+ next();
+
+ // URI value must end with '>'
+ expectCurrent(WebLinkTokenType.GT);
+
+ next();
+ return uriValue;
+ }
+
+ /**
+ * Returns the token on the current position.
+ *
+ * @return the token on the current position.
+ */
+ private WebLinkToken current() {
+ return tokens.get(currentPosition);
+ }
+
+ /**
+ * Returns the next token from the current position. If the current position is already the last
+ * token of the token list, the last token will be returned.
+ *
+ * By contract, the parser expects the last item to be an EOF token (see
+ * {@link WebLinkTokenType#EOF}). So the last item in the token list will always be an EOF token.
+ */
+ private WebLinkToken next() {
+ if (currentPosition < tokens.size() - 1) {
+ currentPosition++;
+ }
+ return current();
+ }
+}
diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/validation/Rfc8288WebLinkValidator.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/validation/Rfc8288WebLinkValidator.java
new file mode 100644
index 000000000..ce2115cfc
--- /dev/null
+++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/validation/Rfc8288WebLinkValidator.java
@@ -0,0 +1,182 @@
+package life.qbic.datamanager.signposting.http.validation;
+
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Pattern;
+import life.qbic.datamanager.signposting.http.WebLinkParameter;
+import life.qbic.datamanager.signposting.http.WebLinkValidator;
+import life.qbic.datamanager.signposting.http.WebLink;
+import life.qbic.datamanager.signposting.http.parsing.RawLink;
+import life.qbic.datamanager.signposting.http.parsing.RawLinkHeader;
+import life.qbic.datamanager.signposting.http.parsing.RawParam;
+
+/**
+ * Validation against RFC 8288 Web Linking.
+ *
+ * Violations against the specification will be recorded as
+ * {@link WebLinkValidator.IssueType#ERROR}. In the presence of at
+ * least one error, the web link MUST be regarded invalid and clients shall not continue to work
+ * with the link, but treat it as exception.
+ *
+ * The implementation also records issues as
+ * {@link WebLinkValidator.IssueType#WARNING}, in case the finding
+ * is not strictly against the RFC 8288, but e.g. a type usage is deprecated or when parameters have
+ * been skipped when the specification demands for it. A warning results in a still usable web link,
+ * but it is advised to investigate any findings.
+ *
+ */
+public class Rfc8288WebLinkValidator implements WebLinkValidator {
+
+ // Defined in https://www.rfc-editor.org/rfc/rfc7230, section 3.2.6
+ private static final Pattern ALLOWED_TOKEN_CHARS = Pattern.compile(
+ "^[!#$%&'*+-.^_`|~0-9A-Za-z]+$");
+
+ private Rfc8288WebLinkValidator() {}
+
+ public static WebLinkValidator create() {
+ return new Rfc8288WebLinkValidator();
+ }
+
+ @Override
+ public ValidationResult validate(RawLinkHeader rawLinkHeader) {
+ var recordedIssues = new ArrayList();
+
+ var webLinks = new ArrayList();
+ for (RawLink rawLink : rawLinkHeader.rawLinks()) {
+ var webLink = validate(rawLink, recordedIssues);
+ if (webLink != null) {
+ webLinks.add(webLink);
+ }
+ }
+ return new ValidationResult(webLinks, new IssueReport(List.copyOf(recordedIssues)));
+ }
+
+ /**
+ * Validation entry point for a single raw link. Any findings must be recorded in the provided
+ * issue list. Only issue additions are allowed.
+ *
+ * In case the target is not a valid URI, the returned web link is {@code null}.
+ *
+ * @param rawLink the raw link information from parsing
+ * @param recordedIssues a list to record negative findings as warnings and errors
+ * @return a web link object, or {@code null}, in case the target is not a valid URI
+ */
+ private WebLink validate(RawLink rawLink, List recordedIssues) {
+ URI uri = null;
+ try {
+ uri = URI.create(rawLink.rawURI());
+ } catch (IllegalArgumentException e) {
+ recordedIssues.add(
+ Issue.error("Invalid URI '%s': %s".formatted(rawLink.rawURI(), e.getMessage())));
+ }
+ var parameters = validateAndConvertParams(rawLink.rawParameters(), recordedIssues);
+
+ if (uri == null) {
+ return null;
+ }
+ return new WebLink(uri, parameters);
+ }
+
+ /**
+ * Validates a list of raw parameters and creates a list of link parameters that can be used to
+ * build the final web link object.
+ *
+ * Any error or warning will be recorded in the provided recorded issue list.
+ *
+ * @param rawParams a list of raw parameter values
+ * @param recordedIssues a list of recorded issues to add more findings during validation
+ * @return a list of converted link parameters
+ */
+ private List validateAndConvertParams(
+ List rawParams, List recordedIssues) {
+ var params = new ArrayList();
+ var seenParams = new HashSet();
+ for (RawParam rawParam : rawParams) {
+ validateParam(rawParam, recordedIssues);
+ validateParamOccurrenceAndAddLink(rawParam, seenParams, params, recordedIssues);
+ }
+ return params;
+ }
+
+ /**
+ * Validates a given raw parameter against known constraints and assumptions in the RFC 8288
+ * specification.
+ *
+ * Currently, checks:
+ *
+ *
+ * - the parameter name MUST contain allowed characters only (see token definition)
+ *
+ *
+ * @param rawParam the raw parameter to be validated
+ * @param recordedIssues a list of issues to record more findings
+ */
+ private void validateParam(RawParam rawParam, List recordedIssues) {
+ if (tokenContainsInvalidChars(rawParam.name())) {
+ recordedIssues.add(
+ Issue.error("Invalid parameter name '%s': Only the characters '%s' are allowed".formatted(
+ rawParam.name(), ALLOWED_TOKEN_CHARS.pattern())));
+ }
+ }
+
+ /**
+ * Looks for the presence of invalid chars.
+ *
+ * Allowed token chars are defined by RFC
+ * 7230, section 3.2.6.
+ *
+ * @param token the token to be checked for invalid characters
+ * @return true, if the token violates the token character specification, else false
+ */
+ private static boolean tokenContainsInvalidChars(String token) {
+ return !ALLOWED_TOKEN_CHARS.matcher(token).matches();
+ }
+
+ /**
+ * Validates parameter occurrence rules and honors the RFC 8288 specification for skipping
+ * parameter entries.
+ *
+ * Sofar multiple definitions are only allowed for the "hreflang" parameter.
+ *
+ * Note: occurrences after the first are ignored and issue a warning. This is a strict requirement
+ * from the RFC 8288 and must be honored.
+ *
+ * @param rawParam the raw parameter value
+ * @param recordedParameterNames a set to check, if a parameter has been already seen in the link
+ * @param parameters a list of converted link parameters for the final web link
+ * object
+ * @param recordedIssues a list of issue records to add new findings
+ */
+ private void validateParamOccurrenceAndAddLink(
+ RawParam rawParam,
+ Set recordedParameterNames,
+ List parameters,
+ List recordedIssues) {
+ var rfcParamOptional = RfcLinkParameter.from(rawParam.name());
+
+ if (rfcParamOptional.isPresent()) {
+ var rfcParam = rfcParamOptional.get();
+ // the "hreflang" parameter is the only parameter that is allowed to occur more than once
+ // see RFC 8288 for the parameter multiplicity definition
+ if (recordedParameterNames.contains(rawParam.name()) && !rfcParam.equals(
+ RfcLinkParameter.HREFLANG)) {
+ recordedIssues.add(Issue.warning(
+ "Parameter '%s' is not allowed multiple times. Skipped parameter.".formatted(
+ rfcParam.rfcValue())));
+ return;
+ }
+ }
+ recordedParameterNames.add(rawParam.name());
+
+ WebLinkParameter webLinkParameter;
+ if (rawParam.value() == null || rawParam.value().isEmpty()) {
+ webLinkParameter = WebLinkParameter.withoutValue(rawParam.name());
+ } else {
+ webLinkParameter = WebLinkParameter.create(rawParam.name(), rawParam.value());
+ }
+ parameters.add(webLinkParameter);
+ }
+}
diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/validation/RfcLinkParameter.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/validation/RfcLinkParameter.java
new file mode 100644
index 000000000..d56c6a2a2
--- /dev/null
+++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/validation/RfcLinkParameter.java
@@ -0,0 +1,66 @@
+package life.qbic.datamanager.signposting.http.validation;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Optional;
+
+/**
+ * Standard parameters for the {@code Link} HTTP header.
+ *
+ *
+ * - "anchor" - see RFC 8288 section 3.2 (“Link Context”)
+ * - "hreflang" - see RFC 8288 section 3.4.1 (“The hreflang Target Attribute”)
+ * - "media" - see RFC 8288 section 3.4.2 (“The media Target Attribute”)
+ * - "rel" - see RFC 8288 section 3.3 (“Relation Types”)
+ * - "rev" - see RFC 8288 section 3.3 (historical note)
+ * - "title" - see RFC 8288 section 3.4.4 (“The title Target Attribute”)
+ * - "title*" - see RFC 8288 section 3.4.4 references RFC 5987 (“Character Set and Language Encoding for HTTP Header Field Parameters”)
+ * - "type" - see RFC 8288 section 3.4.3 (“The type Target Attribute”)
+ *
+ */
+public enum RfcLinkParameter {
+
+ ANCHOR("anchor"),
+ HREFLANG("hreflang"),
+ MEDIA("media"),
+ REL("rel"),
+ REV("rev"),
+ TITLE("title"),
+ TITLE_MULT("title*"),
+ TYPE("type");
+
+ private final String value;
+
+ private static final Map LOOKUP = new HashMap<>();
+
+ static {
+ for (RfcLinkParameter p : RfcLinkParameter.values()) {
+ LOOKUP.put(p.value, p);
+ }
+ }
+
+ RfcLinkParameter(String value) {
+ this.value = value;
+ }
+
+ /**
+ * Returns the RFC compliant value of the parameter name.
+ *
+ * @return the alpha-value of the link parameter
+ */
+ public String rfcValue() {
+ return value;
+ }
+
+ /**
+ * Creates an RfcLinkParameter from a given value, if the value belongs to any existing enum of
+ * this type.
+ *
+ * @param value the value to match the corresponding enum value
+ * @return the corresponding enum in an Optional, of returns Optional.withoutValue()
+ */
+ public static Optional from(String value) {
+ return Optional.ofNullable(LOOKUP.getOrDefault(value, null));
+ }
+
+}
diff --git a/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/WebLinkProcessorSpec.groovy b/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/WebLinkProcessorSpec.groovy
new file mode 100644
index 000000000..0932d90a0
--- /dev/null
+++ b/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/WebLinkProcessorSpec.groovy
@@ -0,0 +1,400 @@
+package life.qbic.datamanager.signposting.http
+
+import life.qbic.datamanager.signposting.http.WebLinkLexer.LexingException
+import life.qbic.datamanager.signposting.http.lexing.WebLinkToken
+import life.qbic.datamanager.signposting.http.parsing.RawLinkHeader
+import life.qbic.datamanager.signposting.http.WebLinkValidator.Issue
+import life.qbic.datamanager.signposting.http.WebLinkValidator.IssueReport
+import life.qbic.datamanager.signposting.http.WebLinkValidator.ValidationResult
+import spock.lang.Specification
+import spock.lang.Unroll
+
+class WebLinkProcessorSpec extends Specification {
+
+ // ---------------------------------------------------------------------------
+ // Helpers – ADAPT CONSTRUCTORS HERE
+ // ---------------------------------------------------------------------------
+
+ /**
+ * Create a minimal but real WebLinkToken list.
+ *
+ */
+ static List dummyTokens() {
+ return List.of(
+ new WebLinkToken(WebLinkTokenType.URI, "https://example.org", 0)
+ )
+ }
+
+ /**
+ * Create a minimal but real RawLinkHeader.
+ * Adjust constructor to your actual RawLinkHeader definition.
+ *
+ * Example assumption:
+ * public record RawLinkHeader(List rawLinks) { }
+ */
+ static RawLinkHeader dummyParsedHeader() {
+ return new RawLinkHeader(List.of())
+ }
+
+ /**
+ * Create a minimal but real WebLink instance.
+ * Adjust constructor to your actual WebLink record/class.
+ *
+ * Example assumption:
+ * public record WebLink(URI reference, Map parameters) { }
+ */
+ static WebLink dummyWebLink(String id) {
+ return new WebLink(
+ URI.create("https://example.org/" + id),
+ List.of()
+ )
+ }
+
+ // ---------------------------------------------------------------------------
+ // Tests
+ // ---------------------------------------------------------------------------
+
+ def "default processor can process minimal valid link header"() {
+ given:
+ def processor = new WebLinkProcessor.Builder().build()
+ def input = ""
+
+ when:
+ def result = processor.process(input)
+
+ then:
+ result != null
+ result.weblinks() != null
+ result.report() != null
+ }
+
+ /**
+ * When a custom lexer is provided, it must be used instead of the default one.
+ */
+ def "processor uses configured lexer instead of default"() {
+ given:
+ def lexer = Mock(WebLinkLexer)
+ def parser = Mock(WebLinkParser)
+ def validator = Mock(WebLinkValidator)
+
+ def tokens = dummyTokens()
+ def parsedHeader = dummyParsedHeader()
+ def validationResult = new ValidationResult(List.of(), new IssueReport(List.of()))
+
+ and:
+ def processor = new WebLinkProcessor.Builder()
+ .withLexer(lexer)
+ .withParser(parser)
+ .withValidator(validator)
+ .build()
+
+ when:
+ def result = processor.process("")
+
+ then:
+ 1 * lexer.lex("") >> tokens
+ 1 * parser.parse(tokens) >> parsedHeader
+ 1 * validator.validate(parsedHeader) >> validationResult
+
+ and:
+ result.weblinks().isEmpty()
+ !result.report().hasErrors()
+ }
+
+ /**
+ * When a custom parser is provided, it must be used instead of the default one.
+ */
+ def "processor uses configured parser instead of default"() {
+ given:
+ def lexer = Mock(WebLinkLexer)
+ def parser = Mock(WebLinkParser)
+ def validator = Mock(WebLinkValidator)
+
+ def tokens = dummyTokens()
+ def parsedHeader = dummyParsedHeader()
+ def validationResult = new ValidationResult(List.of(), new IssueReport(List.of()))
+
+ and:
+ def processor = new WebLinkProcessor.Builder()
+ .withLexer(lexer)
+ .withParser(parser)
+ .withValidator(validator)
+ .build()
+
+ when:
+ def result = processor.process("")
+
+ then:
+ 1 * lexer.lex("") >> tokens
+ 1 * parser.parse(tokens) >> parsedHeader
+ 1 * validator.validate(parsedHeader) >> validationResult
+
+ and:
+ result != null
+ }
+
+ def "builder injects default validator when none configured"() {
+ given:
+ def processor = new WebLinkProcessor.Builder().build()
+ def input = ""
+
+ when:
+ def result = processor.process(input)
+
+ then:
+ result != null
+ result.weblinks() != null
+ result.report() != null
+ }
+
+ def "aggregates issues from multiple validators and uses last validator's weblinks"() {
+ given:
+ def lexer = Mock(WebLinkLexer)
+ def parser = Mock(WebLinkParser)
+ def validator1 = Mock(WebLinkValidator)
+ def validator2 = Mock(WebLinkValidator)
+
+ def tokens = dummyTokens()
+ def parsedHeader = dummyParsedHeader()
+
+ def link1 = dummyWebLink("v1")
+ def link2 = dummyWebLink("v2")
+
+ def issue1 = Issue.error("first")
+ def issue2 = Issue.warning("second")
+
+ def result1 = new ValidationResult(List.of(link1), new IssueReport(List.of(issue1)))
+ def result2 = new ValidationResult(List.of(link2), new IssueReport(List.of(issue2)))
+
+ and:
+ def processor = new WebLinkProcessor.Builder()
+ .withLexer(lexer)
+ .withParser(parser)
+ .withValidator(validator1)
+ .withValidator(validator2)
+ .build()
+
+ when:
+ def result = processor.process("")
+
+ then:
+ 1 * lexer.lex("") >> tokens
+ 1 * parser.parse(tokens) >> parsedHeader
+ 1 * validator1.validate(parsedHeader) >> result1
+ 1 * validator2.validate(parsedHeader) >> result2
+
+ and:
+ result.weblinks() == List.of(link2)
+ result.report().issues().containsAll(List.of(issue1, issue2))
+ result.report().issues().size() == 2
+ }
+
+ @Unroll
+ def "process throws NullPointerException for null input (#caseName)"() {
+ given:
+ def processor = new WebLinkProcessor.Builder().build()
+
+ when:
+ processor.process(input)
+
+ then:
+ thrown(NullPointerException)
+
+ where:
+ caseName | input
+ "null header" | null
+ }
+
+ def "lexer exception is propagated and prevents parser and validators from running"() {
+ given:
+ def lexer = Mock(WebLinkLexer)
+ def parser = Mock(WebLinkParser)
+ def validator = Mock(WebLinkValidator)
+
+ def processor = new WebLinkProcessor.Builder()
+ .withLexer(lexer)
+ .withParser(parser)
+ .withValidator(validator)
+ .build()
+
+ when:
+ processor.process("> { throw new LexingException("boom") }
+ 0 * parser._
+ 0 * validator._
+
+ and:
+ thrown(LexingException)
+ }
+
+ def "parser exception is propagated and prevents validators from running"() {
+ given:
+ def lexer = Mock(WebLinkLexer)
+ def parser = Mock(WebLinkParser)
+ def validator = Mock(WebLinkValidator)
+
+ def tokens = dummyTokens()
+ def processor = new WebLinkProcessor.Builder()
+ .withLexer(lexer)
+ .withParser(parser)
+ .withValidator(validator)
+ .build()
+
+ when:
+ processor.process("")
+
+ then:
+ 1 * lexer.lex("") >> tokens
+ 1 * parser.parse(tokens) >> { throw new RuntimeException("parse error") }
+ 0 * validator._
+
+ and:
+ thrown(RuntimeException)
+ }
+
+ def "validator exception is propagated and stops further validators"() {
+ given:
+ def lexer = Mock(WebLinkLexer)
+ def parser = Mock(WebLinkParser)
+ def validator1 = Mock(WebLinkValidator)
+ def validator2 = Mock(WebLinkValidator)
+
+ def tokens = dummyTokens()
+ def parsedHeader = dummyParsedHeader()
+
+ def processor = new WebLinkProcessor.Builder()
+ .withLexer(lexer)
+ .withParser(parser)
+ .withValidator(validator1)
+ .withValidator(validator2)
+ .build()
+
+ when:
+ processor.process("")
+
+ then:
+ 1 * lexer.lex("") >> tokens
+ 1 * parser.parse(tokens) >> parsedHeader
+ 1 * validator1.validate(parsedHeader) >> { throw new RuntimeException("validator boom") }
+ 0 * validator2._
+
+ and:
+ thrown(RuntimeException)
+ }
+
+ def "throws IllegalStateException when no validator produces a result (defensive branch)"() {
+ given:
+ def lexer = Mock(WebLinkLexer)
+ def parser = Mock(WebLinkParser)
+
+ def tokens = dummyTokens()
+ def parsedHeader = dummyParsedHeader()
+
+ def processor = new WebLinkProcessor.Builder()
+ .withLexer(lexer)
+ .withParser(parser)
+ .build()
+
+ and:
+ def validatorsField = WebLinkProcessor.getDeclaredField("validators")
+ validatorsField.accessible = true
+ validatorsField.set(processor, List.of()) // simulate broken internal state
+
+ when:
+ processor.process("")
+
+ then:
+ 1 * lexer.lex("") >> tokens
+ 1 * parser.parse(tokens) >> parsedHeader
+
+ and:
+ def ex = thrown(IllegalStateException)
+ ex.message.contains("No validation result was found")
+ }
+
+ def "external mutation of issue list from validator does not break aggregated result"() {
+ given:
+ def lexer = Mock(WebLinkLexer)
+ def parser = Mock(WebLinkParser)
+ def validator = Mock(WebLinkValidator)
+
+ def tokens = dummyTokens()
+ def parsedHeader = dummyParsedHeader()
+
+ def mutableIssues = new ArrayList()
+ mutableIssues.add(Issue.error("original"))
+
+ def validationResult = new ValidationResult(
+ List.of(dummyWebLink("l1")),
+ new IssueReport(mutableIssues)
+ )
+
+ def processor = new WebLinkProcessor.Builder()
+ .withLexer(lexer)
+ .withParser(parser)
+ .withValidator(validator)
+ .build()
+
+ when:
+ def result = processor.process("")
+
+ then:
+ 1 * lexer.lex("") >> tokens
+ 1 * parser.parse(tokens) >> parsedHeader
+ 1 * validator.validate(parsedHeader) >> validationResult
+
+ and:
+ result.report().issues().size() == 1
+
+ when:
+ mutableIssues.clear()
+
+ then:
+ result.report().issues().size() == 1
+ }
+
+ def "external mutation of weblink list from validator does not alter processor result"() {
+ given:
+ def lexer = Mock(WebLinkLexer)
+ def parser = Mock(WebLinkParser)
+ def validator = Mock(WebLinkValidator)
+
+ def tokens = dummyTokens()
+ def parsedHeader = dummyParsedHeader()
+
+ def mutableWebLinks = new ArrayList()
+ def link = dummyWebLink("foo")
+ mutableWebLinks.add(link)
+
+ def validationResult = new ValidationResult(
+ mutableWebLinks,
+ new IssueReport(List.of())
+ )
+
+ def processor = new WebLinkProcessor.Builder()
+ .withLexer(lexer)
+ .withParser(parser)
+ .withValidator(validator)
+ .build()
+
+ when:
+ def result = processor.process("")
+
+ then:
+ 1 * lexer.lex("") >> tokens
+ 1 * parser.parse(tokens) >> parsedHeader
+ 1 * validator.validate(parsedHeader) >> validationResult
+
+ and:
+ result.weblinks().size() == 1
+ result.weblinks().first() == link
+
+ when:
+ mutableWebLinks.clear()
+
+ then:
+ result.weblinks().size() == 1
+ }
+}
diff --git a/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/WebLinkSpec.groovy b/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/WebLinkSpec.groovy
new file mode 100644
index 000000000..23ef8e972
--- /dev/null
+++ b/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/WebLinkSpec.groovy
@@ -0,0 +1,23 @@
+package life.qbic.datamanager.signposting.http
+
+import spock.lang.Specification
+
+class WebLinkSpec extends Specification {
+
+ def "An empty parameter key must throw an FormatException"() {
+ given:
+ var someURI = URI.create("myuri")
+
+ and:
+ var someParameters = new HashMap>()
+ someParameters.put("someKey", "someValue")
+ someParameters.put("", "anotherValue")
+
+ when:
+ WebLink.create(someURI, someParameters)
+
+ then:
+ thrown(FormatException.class)
+ }
+
+}
diff --git a/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/lexing/WebLinkLexerSpec.groovy b/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/lexing/WebLinkLexerSpec.groovy
new file mode 100644
index 000000000..c40b3a8a1
--- /dev/null
+++ b/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/lexing/WebLinkLexerSpec.groovy
@@ -0,0 +1,234 @@
+package life.qbic.datamanager.signposting.http.lexing
+
+
+import life.qbic.datamanager.signposting.http.WebLinkLexer
+import life.qbic.datamanager.signposting.http.WebLinkLexer.LexingException
+import life.qbic.datamanager.signposting.http.WebLinkTokenType;
+import spock.lang.Specification
+
+/**
+ * Specification for a {@link WebLinkLexer} implementation.
+ *
+ * These tests verify that a raw Web Link (RFC 8288) serialisation
+ * is correctly tokenised into a sequence of {@link WebLinkToken}s,
+ * ending with an EOF token, and that malformed input causes a
+ * {@link LexingException}.
+ *
+ */
+class WebLinkLexerSpec extends Specification {
+
+ // Adjust to your concrete implementation
+ WebLinkLexer lexer = new SimpleWebLinkLexer()
+
+ /**
+ * Minimal working example: just a URI reference in angle brackets.
+ *
+ * ABNF: link-value = "<" URI-Reference ">" *( ...)
+ */
+ def "lexes minimal link with URI only"() {
+ given:
+ def input = ""
+
+ when:
+ def tokens = lexer.lex(input)
+
+ then: "token sequence matches < URI > EOF"
+ tokens*.type() == [
+ WebLinkTokenType.LT,
+ WebLinkTokenType.URI,
+ WebLinkTokenType.GT,
+ WebLinkTokenType.EOF
+ ]
+
+ and: "URI token text is the raw reference"
+ tokens[1].text() == "https://example.org/resource"
+ }
+
+ /**
+ * Single parameter with a token value.
+ *
+ * Example: ; rel=self
+ */
+ def "lexes link with single token parameter"() {
+ given:
+ def input = "; rel=self"
+
+ when:
+ def tokens = lexer.lex(input)
+
+ then:
+ tokens*.type() == [
+ WebLinkTokenType.LT,
+ WebLinkTokenType.URI,
+ WebLinkTokenType.GT,
+ WebLinkTokenType.SEMICOLON,
+ WebLinkTokenType.IDENT, // rel
+ WebLinkTokenType.EQUALS,
+ WebLinkTokenType.IDENT, // self
+ WebLinkTokenType.EOF
+ ]
+
+ and:
+ tokens[1].text() == "https://example.org"
+ tokens[4].text() == "rel"
+ tokens[6].text() == "self"
+ }
+
+ /**
+ * Single parameter with a quoted-string value.
+ *
+ * Example: ; title="A title"
+ */
+ def "lexes link with quoted-string parameter value"() {
+ given:
+ def input = '; title="A title"'
+
+ when:
+ def tokens = lexer.lex(input)
+
+ then:
+ tokens*.type() == [
+ WebLinkTokenType.LT,
+ WebLinkTokenType.URI,
+ WebLinkTokenType.GT,
+ WebLinkTokenType.SEMICOLON,
+ WebLinkTokenType.IDENT, // title
+ WebLinkTokenType.EQUALS,
+ WebLinkTokenType.QUOTED, // "A title"
+ WebLinkTokenType.EOF
+ ]
+
+ and: "quoted token text does not contain quotes"
+ tokens[6].text() == "A title"
+ }
+
+ /**
+ * Empty quoted-string is valid: title="".
+ *
+ * RFC 7230 §3.2.6 allows zero-length quoted-string.
+ */
+ def "lexes parameter with empty quoted-string value"() {
+ given:
+ def input = '; title=""'
+
+ when:
+ def tokens = lexer.lex(input)
+
+ then:
+ tokens*.type() == [
+ WebLinkTokenType.LT,
+ WebLinkTokenType.URI,
+ WebLinkTokenType.GT,
+ WebLinkTokenType.SEMICOLON,
+ WebLinkTokenType.IDENT, // title
+ WebLinkTokenType.EQUALS,
+ WebLinkTokenType.QUOTED, // ""
+ WebLinkTokenType.EOF
+ ]
+
+ and:
+ tokens[6].text() == ""
+ }
+
+ /**
+ * Whitespace (OWS/BWS) must be allowed around separators and '='.
+ *
+ * Example: <...> ; rel = "self"
+ */
+ def "ignores optional whitespace around separators and equals"() {
+ given:
+ def input = ' ; rel = "self" '
+
+ when:
+ def tokens = lexer.lex(input)
+
+ then: "same token sequence as without whitespace"
+ tokens*.type() == [
+ WebLinkTokenType.LT,
+ WebLinkTokenType.URI,
+ WebLinkTokenType.GT,
+ WebLinkTokenType.SEMICOLON,
+ WebLinkTokenType.IDENT,
+ WebLinkTokenType.EQUALS,
+ WebLinkTokenType.QUOTED,
+ WebLinkTokenType.EOF
+ ]
+
+ and:
+ tokens[4].text() == "rel"
+ tokens[6].text() == "self"
+ }
+
+ /**
+ * Multiple link-values separated by a comma at the header field level.
+ *
+ * Example: ; rel=self, ; rel=next
+ *
+ * The lexer should emit a COMMA token between the two link-values.
+ */
+ def "lexes multiple link-values separated by comma"() {
+ given:
+ def input = '; rel=self, ; rel=next'
+
+ when:
+ def tokens = lexer.lex(input)
+
+ then:
+ tokens*.type() == [
+ WebLinkTokenType.LT,
+ WebLinkTokenType.URI,
+ WebLinkTokenType.GT,
+ WebLinkTokenType.SEMICOLON,
+ WebLinkTokenType.IDENT,
+ WebLinkTokenType.EQUALS,
+ WebLinkTokenType.IDENT,
+ WebLinkTokenType.COMMA,
+ WebLinkTokenType.LT,
+ WebLinkTokenType.URI,
+ WebLinkTokenType.GT,
+ WebLinkTokenType.SEMICOLON,
+ WebLinkTokenType.IDENT,
+ WebLinkTokenType.EQUALS,
+ WebLinkTokenType.IDENT,
+ WebLinkTokenType.EOF
+ ]
+
+ and:
+ tokens[1].text() == "https://example.org/a"
+ tokens[6].text() == "self"
+ tokens[9].text() == "https://example.org/b"
+ tokens[14].text() == "next"
+ }
+
+ /**
+ * Unterminated quoted-string should be rejected by the lexer.
+ *
+ * Example: title="unterminated
+ */
+ def "throws on unterminated quoted string"() {
+ given:
+ def input = '; title="unterminated'
+
+ when:
+ lexer.lex(input)
+
+ then:
+ thrown(LexingException)
+ }
+
+ /**
+ * Unterminated URI reference (missing closing '>') should be rejected.
+ *
+ * Example: with zero link-params.
+ * Spec: RFC 8288 Section 3 (“Link Serialisation in HTTP Headers”), ABNF link-value = "<" URI-Reference ">" *(...); * allows zero params.
+ */
+ def "Minimal working serialized link, no parameters"() {
+ given:
+ var validSerialisation = ""
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ var result = weblinkParser.parse(lexer.lex(validSerialisation))
+
+ then:
+ noExceptionThrown()
+ result != null
+ }
+
+ /**
+ * Why valid: link-param is token BWS [ "=" BWS token ]; both rel and self are tokens.
+ * Spec: RFC 8288 Section 3; RFC 7230 section 3.2.6 defines token.
+ */
+ def "Single parameter, token value"() {
+ given:
+ var validSerialisation = "; rel=self"
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ var result = weblinkParser.parse(lexer.lex(validSerialisation))
+
+ then:
+ noExceptionThrown()
+ result != null
+ }
+
+ /**
+ * Why valid: link-param value may be token / quoted-string; both forms equivalent.
+ * Spec: RFC 8288 section 3 (note on token vs quoted-string equivalence); RFC 7230 section 3.2.6 for quoted-string.
+ */
+ def "Single parameter, quoted-string value"() {
+ given:
+ var validSerialisation = '; rel="self"'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ var result = weblinkParser.parse(lexer.lex(validSerialisation))
+
+ then:
+ noExceptionThrown()
+ result != null
+ }
+
+ /**
+ * Why valid: ABNF allows zero or more ";" link-param after URI.
+ * Spec: RFC 8288 section 3, *( OWS ";" OWS link-param ).
+ */
+ def "Multiple parameters"() {
+ given:
+ var validSerialisation = '; rel="self"; type="application/json"'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ var result = weblinkParser.parse(lexer.lex(validSerialisation))
+
+ then:
+ noExceptionThrown()
+ result != null
+ }
+
+ /**
+ * Why valid: OWS and BWS allow optional whitespace around separators and =.
+ * Spec: RFC 8288 section 3 (uses OWS/BWS); RFC 7230 section 3.2.3 (OWS), section 3.2.4 (BWS concept).
+ */
+ def "Whitespace around semi-colon and ="() {
+ given:
+ var validSerialisation = ' ; rel = "self" ; type = application/json'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ var result = weblinkParser.parse(lexer.lex(validSerialisation))
+
+ then:
+ noExceptionThrown()
+ result != null
+ }
+
+ /**
+ * Why valid: link-param = token BWS [ "=" BWS ( token / quoted-string ) ]; the [ ... ] part is optional, so no = is allowed.
+ * Spec: RFC 8288 section 3, link-param ABNF (optional value).
+ */
+ def "Parameter without value"() {
+ given:
+ var validSerialisation = "; rel"
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ var result = weblinkParser.parse(lexer.lex(validSerialisation))
+
+ then:
+ noExceptionThrown()
+ result != null
+ }
+
+ /**
+ * Why valid: Empty string is a valid quoted-string.
+ * Spec: RFC 7230 section 3.2.6 (quoted-string can contain zero or more qdtext).
+ */
+ def "Parameter with empty quoted string"() {
+ given:
+ var validSerialisation = '; title=""'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ var result = weblinkParser.parse(lexer.lex(validSerialisation))
+
+ then:
+ noExceptionThrown()
+ result != null
+ }
+
+ /**
+ * Why valid: rel value is defined as a space-separated list of link relation types.
+ * Spec: RFC 8288 section 3.3 (“Relation Types”), which describes rel as a list of relation types.
+ */
+ def "Multiple rel values in one parameter"() {
+ given:
+ var validSerialisation = '; rel="self describedby item"'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ var result = weblinkParser.parse(lexer.lex(validSerialisation))
+
+ then:
+ noExceptionThrown()
+ result != null
+ }
+
+ /**
+ * Why valid: URI-Reference may be relative, resolved against base URI.
+ * Spec: RFC 8288 section 3 (uses URI-Reference); RFC 3986 section 4.1 (“URI Reference”).
+ */
+ def "Relative URI"() {
+ given:
+ var validSerialisation = '; rel="item"'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ var result = weblinkParser.parse(lexer.lex(validSerialisation))
+
+ then:
+ noExceptionThrown()
+ result != null
+ }
+
+ /**
+ * Why valid: At the header level, field-content is opaque to RFC 8288; title is a defined target attribute and its value is a quoted-string.
+ * Spec: RFC 8288 section 3 (defines title as a target attribute); RFC 7230 section 3.2 (header fields treat value as opaque except for defined syntax).
+ */
+ def "Non-ASCII in quoted-string title"() {
+ given:
+ var validSerialisation = '; title="Données de recherche"'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ var result = weblinkParser.parse(lexer.lex(validSerialisation))
+
+ then:
+ noExceptionThrown()
+ result != null
+ }
+
+ /**
+ * Why valid: link-value uses standard link-param names; rel="linkset" and type="application/linkset+json" are ordinary parameters.
+ * Spec: RFC 8288 section 3 (general link-param usage); linkset relation and media type from the Linkset draft (compatible with RFC 8288).
+ */
+ def "Linkset type example"() {
+ given:
+ var validSerialisation = '; rel="linkset"; type="application/linkset+json"'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ var result = weblinkParser.parse(lexer.lex(validSerialisation))
+
+ then:
+ noExceptionThrown()
+ result != null
+ }
+
+ /**
+ * Why valid: Link = #link-value; #rule allows 1+ link-values separated by commas in a single header field.
+ * Spec: RFC 8288 section 3 (Link = #link-value); RFC 7230 section 7 (“ABNF list extension: #rule”).
+ */
+ def "Multiple link-values in one header"() {
+ given:
+ var validSerialisation = '; rel="self", ; rel="next"'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ var result = weblinkParser.parse(lexer.lex(validSerialisation))
+
+ then:
+ noExceptionThrown()
+ result != null
+ }
+
+ def "Multiple links without parameters"() {
+ given:
+ var validSerialisation = ', '
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ var result = weblinkParser.parse(lexer.lex(validSerialisation))
+
+ then:
+ noExceptionThrown()
+ result != null
+ }
+
+ /**
+ * Why valid: type parameter carries a media-type; application/ld+json fits token syntax and media-type grammar.
+ * Spec: RFC 8288 section 3 (defines type parameter); RFC 7231 section 3.1.1.1 (media-type grammar uses tokens).
+ */
+ def "Parameter value as token with slash"() {
+ given:
+ var validSerialisation = '; type=application/ld+json'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ var result = weblinkParser.parse(lexer.lex(validSerialisation))
+
+ then:
+ noExceptionThrown()
+ result != null
+ }
+
+ /**
+ * Why valid: anchor is a registered link-parameter giving the context URI; its value is a quoted-string.
+ * Spec: RFC 8288 section 3.2 (“Target Attributes”) defines anchor; RFC 7230 section 3.2.6 for quoted-string.
+ */
+ def "Anchor parameter"() {
+ given:
+ var validSerialisation = '; rel="self"; anchor="https://example.org/records/123"'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ var result = weblinkParser.parse(lexer.lex(validSerialisation))
+
+ then:
+ noExceptionThrown()
+ result != null
+ }
+
+ /**
+ * Why valid: link-param value may be token or quoted-string; mixing quoted and unquoted values is allowed.
+ * Spec: RFC 8288 section 3 (token / quoted-string equivalence for link-param values); RFC 7230 section 3.2.6.
+ */
+ def "Mixed quoting styles in parameters"() {
+ given:
+ var validSerialisation = '; rel=self; type="application/json"'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ var result = weblinkParser.parse(lexer.lex(validSerialisation))
+
+ then:
+ noExceptionThrown()
+ result != null
+ }
+
+ /**
+ * Why invalid: A trailing comma indicates an withoutValue link value, which is invalid.
+ * Spec: RFC 8288 Section 3, link-value = "<" URI-Reference ">" *( OWS ";" OWS link-param )”
+ */
+ def "No trailing comma allowed for multiple link values"() {
+ given:
+ var validSerialisation = ','
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ weblinkParser.parse(lexer.lex(validSerialisation))
+
+ then:
+ thrown(WebLinkParser.StructureException.class)
+ }
+
+ def "No trailing semicolon allowed for multiple link values"() {
+
+ given:
+ var validSerialisation = ';'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ weblinkParser.parse(lexer.lex(validSerialisation))
+
+ then:
+ thrown(WebLinkParser.StructureException.class)
+ }
+
+
+ /**
+ * Why invalid: link-value must start with "<" URI-Reference ">"; a bare URI with params does not match link-value syntax.
+ * Spec: RFC 8288 Section 3, link-value = "<" URI-Reference ">" *( ... ).
+ */
+ def "Invalid: Missing angle brackets around URI"() {
+ given:
+ var invalidSerialisation = 'https://example.org/resource; rel="self"'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ weblinkParser.parse(lexer.lex(invalidSerialisation))
+
+ then:
+ thrown(WebLinkParser.StructureException.class)
+ }
+
+ /**
+ * Why invalid: link-value requires a "" prefix; parameters alone do not form a valid link-value.
+ * Spec: RFC 8288 Section 3, link-value ABNF.
+ */
+ def "Invalid: Parameters without URI"() {
+ given:
+ var invalidSerialisation = 'rel="self"'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ weblinkParser.parse(lexer.lex(invalidSerialisation))
+
+ then:
+ thrown(WebLinkParser.StructureException.class)
+
+ }
+
+ /**
+ * Why invalid: link-param must start with token; an withoutValue name before equal sign violates token = 1*tchar.
+ * Spec: RFC 8288 section 3, link-param = token ...; RFC 7230 section 3.2.6 (token = 1*tchar).
+ */
+ def "Invalid: Empty parameter name"() {
+ given:
+ var invalidSerialisation = '; =self'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ weblinkParser.parse(lexer.lex(invalidSerialisation))
+
+ then:
+ thrown(WebLinkParser.StructureException.class)
+ }
+
+ /**
+ * Why invalid: Each ";" must be followed by a link-param; ";;" introduces an withoutValue parameter without a token.
+ * Spec: RFC 8288 section 3, *( OWS ";" OWS link-param ) requires a link-param after each ";".
+ */
+ def "Invalid: Double semicolon introduces empty parameter"() {
+ given:
+ var invalidSerialisation = ';; rel="self"'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ weblinkParser.parse(lexer.lex(invalidSerialisation))
+
+ then:
+ thrown(WebLinkParser.StructureException.class)
+ }
+
+ /**
+ * Why invalid: Comma is not allowed in token; parameter name containing "," violates token = 1*tchar.
+ * Spec: RFC 7230 section 3.2.6 (tchar set does not include ",").
+ */
+ def "Invalid: Parameter name with illegal character"() {
+ given:
+ var invalidSerialisation = '; re,l="self"'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ weblinkParser.parse(lexer.lex(invalidSerialisation))
+
+ then:
+ thrown(WebLinkParser.StructureException.class)
+ }
+
+
+ /**
+ * Why invalid: link-param requires a token before "="; "=" without a parameter name violates link-param syntax.
+ * Spec: RFC 8288 section 3, link-param = token BWS [ "=" ... ]; RFC 7230 section 3.2.6 (token required).
+ */
+ def "Invalid: Parameter with only equals sign and no name"() {
+ given:
+ var invalidSerialisation = '; = "self"'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ weblinkParser.parse(lexer.lex(invalidSerialisation))
+
+ then:
+ thrown(WebLinkParser.StructureException.class)
+ }
+
+ /**
+ * Why invalid: link-value must start with ""; placing parameters before the URI does not match the ABNF.
+ * Spec: RFC 8288 section 3, link-value = "<" URI-Reference ">" *( ... ).
+ */
+ def "Invalid: Parameters before URI"() {
+ given:
+ var invalidSerialisation = 'rel="self"; '
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ weblinkParser.parse(lexer.lex(invalidSerialisation))
+
+ then:
+ thrown(WebLinkParser.StructureException.class)
+ }
+
+ /**
+ * Why invalid: URI must be enclosed in "<" and ">"; bare URI with parameters is not a valid link-value.
+ * Spec: RFC 8288 section 3, "<" URI-Reference ">" is mandatory in link-value.
+ */
+ def "Invalid: URI not enclosed in angle brackets"() {
+ given:
+ var invalidSerialisation = 'https://example.org/resource; rel="self"'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ weblinkParser.parse(lexer.lex(invalidSerialisation))
+
+ then:
+ thrown(WebLinkParser.StructureException.class)
+ }
+
+
+ /**
+ * Why invalid: After ">" only OWS ";" OWS link-param is allowed; arbitrary token "foo" between ">" and ";" violates link-value syntax.
+ * Spec: RFC 8288 section 3, link-value = "<" URI-Reference ">" *( OWS ";" OWS link-param ).
+ */
+ def "Invalid: Garbage between URI and first parameter"() {
+ given:
+ var invalidSerialisation = ' foo ; rel="self"'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ weblinkParser.parse(lexer.lex(invalidSerialisation))
+
+ then:
+ thrown(WebLinkParser.StructureException.class)
+ }
+
+ /**
+ * Why invalid: #link-value requires 1+ elements separated by commas; a leading comma introduces an withoutValue element.
+ * Spec: RFC 8288 section 3 (Link = #link-value); RFC 7230 section 7 (#rule does not allow withoutValue list elements).
+ */
+ def "Invalid: Leading comma in Link header list"() {
+ given:
+ var invalidSerialisation = ', ; rel="self"'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ weblinkParser.parse(lexer.lex(invalidSerialisation))
+
+ then:
+ thrown(WebLinkParser.StructureException.class)
+ }
+
+ /**
+ * Why invalid: #link-value requires 1+ elements separated by commas; a trailing comma implies an withoutValue last element.
+ * Spec: RFC 8288 section 3 (Link = #link-value); RFC 7230 section 7 (#rule does not allow withoutValue list elements).
+ */
+ def "Invalid: Trailing comma in Link header list"() {
+ given:
+ var invalidSerialisation = '; rel="self",'
+
+ and:
+ var weblinkParser = SimpleWebLinkParser.create()
+
+ and:
+ var lexer = new SimpleWebLinkLexer()
+
+ when:
+ weblinkParser.parse(lexer.lex(invalidSerialisation))
+
+ then:
+ thrown(WebLinkParser.StructureException.class)
+ }
+
+}
diff --git a/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/validation/Rfc8288ValidatorSpec.groovy b/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/validation/Rfc8288ValidatorSpec.groovy
new file mode 100644
index 000000000..24b9cc83b
--- /dev/null
+++ b/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/validation/Rfc8288ValidatorSpec.groovy
@@ -0,0 +1,274 @@
+package life.qbic.datamanager.signposting.http.validation
+
+import life.qbic.datamanager.signposting.http.WebLinkValidator
+import life.qbic.datamanager.signposting.http.WebLink
+import life.qbic.datamanager.signposting.http.parsing.RawLink
+import life.qbic.datamanager.signposting.http.parsing.RawLinkHeader
+import life.qbic.datamanager.signposting.http.parsing.RawParam
+import spock.lang.Specification
+
+/**
+ * Specification for {@link Rfc8288WebLinkValidator}.
+ *
+ * Covers basic RFC 8288 semantics:
+ *
+ * - Valid URIs create {@link WebLink} instances without issues.
+ * - Invalid URIs create error {@link WebLinkValidator.Issue}s and no WebLink for that entry.
+ * - Multiple links are all validated; one invalid URI does not stop validation.
+ * - Unknown / extension parameters are preserved and do not cause issues.
+ *
+ *
+ * @since
+ */
+class Rfc8288ValidatorSpec extends Specification {
+
+ /**
+ * Valid single link with a syntactically correct absolute URI
+ * should yield one WebLink and no issues.
+ */
+ def "single valid link produces one WebLink and no issues"() {
+ given:
+ def rawHeader = new RawLinkHeader([
+ new RawLink("https://example.org/resource", [])
+ ])
+
+ and:
+ def validator = new Rfc8288WebLinkValidator()
+
+ when:
+ WebLinkValidator.ValidationResult result = validator.validate(rawHeader)
+
+ then: "no issues are reported"
+ !result.containsIssues()
+ !result.report().hasErrors()
+ !result.report().hasWarnings()
+
+ and: "exactly one WebLink is produced with the expected URI and withoutValue params"
+ result.weblinks().size() == 1
+ WebLink link = result.weblinks().first()
+ link.reference().toString() == "https://example.org/resource"
+ link.params().isEmpty()
+ }
+
+ /**
+ * A link with an invalid URI string should not yield a WebLink instance,
+ * but should record at least one error Issue.
+ */
+ def "single invalid URI produces error issue and no WebLinks"() {
+ given:
+ // 'not a uri' will fail URI.create(...)
+ def rawHeader = new RawLinkHeader([
+ new RawLink("not a uri", [])
+ ])
+
+ and:
+ def validator = new Rfc8288WebLinkValidator()
+
+ when:
+ WebLinkValidator.ValidationResult result = validator.validate(rawHeader)
+
+ then: "an error is reported"
+ result.containsIssues()
+ result.report().hasErrors()
+
+ and: "no WebLinks are produced for invalid URIs"
+ result.weblinks().isEmpty()
+ }
+
+ /**
+ * When there are multiple links and one has an invalid URI,
+ * the validator should still validate all links and produce
+ * WebLinks for the valid ones.
+ */
+ def "multiple links - one invalid URI does not prevent valid WebLinks"() {
+ given:
+ def rawHeader = new RawLinkHeader([
+ new RawLink("not a uri", []),
+ new RawLink("https://example.org/valid", [])
+ ])
+
+ and:
+ def validator = new Rfc8288WebLinkValidator()
+
+ when:
+ WebLinkValidator.ValidationResult result = validator.validate(rawHeader)
+
+ then: "at least one error is reported for the invalid entry"
+ result.containsIssues()
+ result.report().hasErrors()
+
+ and: "the valid URI still yields a WebLink"
+ result.weblinks().size() == 1
+ result.weblinks().first().reference().toString() == "https://example.org/valid"
+ }
+
+ /**
+ * Unknown / extension parameters should be preserved on the WebLink
+ * and must not trigger errors at RFC 8288 level.
+ *
+ * Example: Link: ; foo="bar"
+ */
+ def "unknown extension parameters are preserved and do not cause issues"() {
+ given:
+ def params = [new RawParam("x-custom", "value")] // arbitrary extension parameter
+ def rawHeader = new RawLinkHeader([
+ new RawLink("https://example.org/with-param", params)
+ ])
+
+ and:
+ def validator = new Rfc8288WebLinkValidator()
+
+ when:
+ WebLinkValidator.ValidationResult result = validator.validate(rawHeader)
+
+ then: "no errors are reported for unknown parameters"
+ !result.report().hasErrors()
+
+ and: "at RFC level, we do not warn about extension parameters either (optional; adjust if you decide to warn)"
+ !result.report().hasWarnings()
+
+ and: "the parameter is preserved on the resulting WebLink"
+ result.weblinks().size() == 1
+ def link = result.weblinks().first()
+ link.extensionAttribute("x-custom")[0] == "value"
+ }
+
+ /**
+ * A parameter without a value (e.g. 'rel' without '=...') is structurally
+ * allowed in RFC 8288. At the RFC semantic level we accept it and leave any
+ * deeper interpretation to profile-specific validators (e.g. Signposting).
+ *
+ * How you map "no value" into your RawLink/WebLink model is up to your
+ * implementation; here we assume null or withoutValue string is used to represent it.
+ */
+ def "parameter without value is accepted at RFC level"() {
+ given:
+ // Example representation: parameter present with null value.
+ // Adapt this to your actual RawLink model.
+ def params = [new RawParam("rel", null)]
+ def rawHeader = new RawLinkHeader([
+ new RawLink("https://example.org/no-value-param", params)
+ ])
+
+ and:
+ def validator = new Rfc8288WebLinkValidator()
+
+ when:
+ WebLinkValidator.ValidationResult result = validator.validate(rawHeader)
+
+ then: "URI is valid, so we get a WebLink back"
+ result.weblinks().size() == 1
+
+ and: "parameter without value does not cause an error at RFC-level"
+ !result.report().hasErrors()
+ }
+
+ def "parameter anchor with one occurrence is allowed"() {
+ given:
+ // Example representation: parameter present with null value.
+ // Adapt this to your actual RawLink model.
+ def params = [new RawParam("anchor", "https://example.org/one-anchor-only")]
+ def rawHeader = new RawLinkHeader([
+ new RawLink("https://example.org/one-anchor-only", params)
+ ])
+
+ and:
+ def validator = new Rfc8288WebLinkValidator()
+
+ when:
+ WebLinkValidator.ValidationResult result = validator.validate(rawHeader)
+
+ then: "URI is valid, so we get a WebLink back"
+ result.weblinks().size() == 1
+
+ and: "parameter anchor with only one occurrence does not cause an error at RFC-level"
+ !result.report().hasErrors()
+ }
+
+ def "a parameter with allowed multiplicity of 1 must be only processed on the first occurrence"() {
+ given:
+ // Example representation: parameter present with null value.
+ // Adapt this to your actual RawLink model.
+ def firstParam = new RawParam("rel", "https://example.org/first-occurrence")
+ def secondParam = new RawParam("rel", "https://example.org/next-occurrence")
+ def params = [firstParam, secondParam]
+ def rawHeader = new RawLinkHeader([
+ new RawLink("https://example.org/one-anchor-only", params)
+ ])
+
+ and:
+ def validator = new Rfc8288WebLinkValidator()
+
+ when:
+ WebLinkValidator.ValidationResult result = validator.validate(rawHeader)
+
+ then: "URI is valid, so we get a WebLink back"
+ result.weblinks().size() == 1
+
+ and: "parameter rel with only one occurrence does not cause an error at RFC-level"
+ !result.report().hasErrors()
+
+ and: "but results in a warning, since the second occurrence is skipped"
+ result.report().hasWarnings()
+
+ and: "uses only the value of the first occurrence"
+ var relations = result.weblinks().get(0).rel()
+ relations.size() == 1
+ relations.get(0).equals(firstParam.value())
+ }
+
+ def "the rel parameter can contain multiple relations as whitespace-separated list"() {
+ given:
+ // Example representation: parameter present with null value.
+ // Adapt this to your actual RawLink model.
+ def firstParam = new RawParam("rel", "self describedby another")
+ def params = [firstParam]
+ def rawHeader = new RawLinkHeader([
+ new RawLink("https://example.org/one-anchor-only", params)
+ ])
+
+ and:
+ def validator = new Rfc8288WebLinkValidator()
+
+ when:
+ WebLinkValidator.ValidationResult result = validator.validate(rawHeader)
+
+ then: "URI is valid, so we get a WebLink back"
+ result.weblinks().size() == 1
+
+ and: "parameter rel with only one occurrence does not cause an error at RFC-level"
+ !result.report().hasErrors()
+
+ and: "results in no warnings"
+ !result.report().hasWarnings()
+
+ and: "splits the relations into three values"
+ var relations = result.weblinks().get(0).rel()
+ relations.size() == 3
+ }
+
+
+ def "parameter anchor must not have multiple occurrences"() {
+ given:
+ // Example representation: parameter present with null value.
+ // Adapt this to your actual RawLink model.
+ def params = [new RawParam("anchor", "https://example.org/one-anchor-only"),
+ new RawParam("anchor", "https://example.org/another-anchor")]
+ def rawHeader = new RawLinkHeader([
+ new RawLink("https://example.org/one-anchor-only", params)
+ ])
+
+ and:
+ def validator = new Rfc8288WebLinkValidator()
+
+ when:
+ WebLinkValidator.ValidationResult result = validator.validate(rawHeader)
+
+ then: "URI is valid, so we get a WebLink back"
+ result.weblinks().size() == 1
+
+ and: "parameter anchor with only one occurrence does not cause an error at RFC-level"
+ result.report().hasWarnings()
+ result.report().issues().size() == 1
+ }
+}
diff --git a/pom.xml b/pom.xml
index 6d6d025ff..48d58588b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -33,6 +33,7 @@
email-service-provider
finances-infrastructure
finances-api
+ fair-signposting
pom