diff --git a/fair-signposting/pom.xml b/fair-signposting/pom.xml new file mode 100644 index 000000000..0711ab1f9 --- /dev/null +++ b/fair-signposting/pom.xml @@ -0,0 +1,32 @@ + + + 4.0.0 + + life.qbic.datamanager + datamanager + 1.11.0 + + + fair-signposting + + + 21 + 21 + UTF-8 + + + + + org.slf4j + slf4j-api + + + org.spockframework + spock-core + test + + + + diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/FormatException.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/FormatException.java new file mode 100644 index 000000000..4697628e9 --- /dev/null +++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/FormatException.java @@ -0,0 +1,14 @@ +package life.qbic.datamanager.signposting.http; + +/** + * + * + *

+ * + * @since + */ +public final class FormatException extends RuntimeException { + public FormatException(String message) { + super(message); + } +} diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLink.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLink.java new file mode 100644 index 000000000..b6411175c --- /dev/null +++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLink.java @@ -0,0 +1,139 @@ +package life.qbic.datamanager.signposting.http; + +import java.net.URI; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import life.qbic.datamanager.signposting.http.validation.RfcLinkParameter; + +/** + * A Java record representing a web link object following the + * RFC 8288 model specification. + */ +public record WebLink(URI reference, List params) { + + /** + * Creates an RFC 8288 compliant web + * link object. + *

+ * Following RFC8288, the ABNF for a link parameter is: + *

+ * {@code link-param = token BWS [ "=" BWS ( token / quoted-string ) ]} + *

+ * The parameter key must not be withoutValue, so during construction the {@code params} keys are checked + * for an withoutValue key. The values can be withoutValue though. + * + * @param reference a {@link URI} pointing to the actual resource + * @param params a {@link Map} of parameters as keys and a list of their values + * @return the new Weblink + * @throws NullPointerException if any method argument is {@code null} + */ + public static WebLink create(URI reference, List params) + throws NullPointerException { + Objects.requireNonNull(reference); + Objects.requireNonNull(params); + return new WebLink(reference, params); + } + + /** + * Web link constructor that can be used if a web link has no parameters. + *

+ * + * @param reference a {@link URI} pointing to the actual resource + * @return the new Weblink + * @throws NullPointerException if any method argument is {@code null} + */ + public static WebLink create(URI reference) throws NullPointerException { + return create(reference, List.of()); + } + + + public Optional anchor() { + return Optional.empty(); + } + + public List hreflang() { + return List.of(); + } + + public Optional media() { + return Optional.empty(); + } + + /** + * Returns all "rel" parameter values of the link. + *

+ * RFC 8288 section 3.3 states, that the relation parameter MUST NOT appear more than once in a + * given link-value, but one "rel" parameter value can contain multiple relation-types when + * separated by one or more space characters (SP = ASCII 0x20): + *

+ * {@code relation-type *( 1*SP relation-type ) }. + *

+ * The method returns space-separated values as individual values of the "rel" parameter. + * + * @return a list of relation parameter values + */ + public List rel() { + return this.params.stream() + .filter(param -> param.name().equals("rel")) + .map(WebLinkParameter::value) + .map(value -> value.split("\\s+")) + .flatMap(Arrays::stream) + .toList(); + } + + /** + * Returns all "rev" parameter values of the link. + *

+ * RFC 8288 section 3.3 does not specify the multiplicity of occurrence. But given the close + * relation to the "rel" parameter and its definition in the same section, web link will treat the + * "rev" parameter equally. + *

+ * As with the "rel" parameter, multiple regular relation types are allowed when they are + * separated by one or more space characters (SP = ASCII 0x20): + *

+ * {@code relation-type *( 1*SP relation-type ) }. + *

+ * The method returns space-separated values as individual values of the "rel" parameter. + * + * @return a list of relation parameter values + */ + public List rev() { + return this.params.stream() + .filter(param -> param.name().equals("rev")) + .map(WebLinkParameter::value) + .map(value -> value.split("\\s+")) + .flatMap(Arrays::stream) + .toList(); + } + + public Optional title() { + return Optional.empty(); + } + + public Optional titleMultiple() { + return Optional.empty(); + } + + public Optional type() { + return Optional.empty(); + } + + public Map> extensionAttributes() { + Set rfcParameterNames = Arrays.stream(RfcLinkParameter.values()) + .map(RfcLinkParameter::rfcValue) + .collect(Collectors.toSet()); + return this.params.stream() + .filter(param -> !rfcParameterNames.contains(param.name())) + .collect(Collectors.groupingBy(WebLinkParameter::name, + Collectors.mapping(WebLinkParameter::value, Collectors.toList()))); + } + + public List extensionAttribute(String name) { + return extensionAttributes().getOrDefault(name, List.of()); + } +} diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkLexer.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkLexer.java new file mode 100644 index 000000000..5e907c862 --- /dev/null +++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkLexer.java @@ -0,0 +1,36 @@ +package life.qbic.datamanager.signposting.http; + +import java.util.List; +import life.qbic.datamanager.signposting.http.lexing.WebLinkToken; + +/** + * Lexes a single Web Link (RFC 8288) serialisation string into a list of tokens. + *

+ * Implementations should be stateless or thread-confined. + */ +public interface WebLinkLexer { + + /** + * Lex the given input string into a sequence of tokens. + * + * @param input the raw Link header field-value or link-value + * @return list of tokens ending with an EOF token + * @throws LexingException if the input is not lexically well-formed + */ + List lex(String input) throws LexingException; + + /** + * Thrown when the input cannot be tokenised according to the Web Link lexical rules. + */ + class LexingException extends RuntimeException { + + public LexingException(String message) { + super(message); + } + + public LexingException(String message, Throwable cause) { + super(message, cause); + } + } + +} diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkParameter.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkParameter.java new file mode 100644 index 000000000..0d3c6fec9 --- /dev/null +++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkParameter.java @@ -0,0 +1,71 @@ +package life.qbic.datamanager.signposting.http; + +/** + * A parameter for the HTTP Link header attribute. + *

+ * Based on RFC 8288, a parameter with only a name is valid. + *

+ *

+ * {@code
+ * // ABNF notation for web links
+ * Link = #link-value
+ * link-value = "<" URI-Reference ">" *( OWS ";" OWS link-param )
+ * link-param = token BWS [ "=" BWS ( token / quoted-string ) ]
+ *
+ * // valid parameter examples
+ * "Link: ; rel; param1;"
+ * "Link: ; rel="self"; param1="";"
+ * }
+ * 
+ *

+ * It is important that different parameter serialisation cases are handled correctly. + *

+ * The following example shows three distinct cases that must be preserved during de-serialisation: + * + *

+ * {@code
+ * x=""  // empty double-quoted string
+ * x="y" // double-quoted with content
+ * x=y   // token value
+ * x     // parameter name only
+ * }
+ * 
+ *

+ * These are all valid parameter serialisations. + * + * + */ +public record WebLinkParameter(String name, String value) { + + /** + * Creates a new web link parameter with the provided name and value. + * + * @param name the name of the web link parameter + * @param value the value of the web link parameter + */ + public static WebLinkParameter create(String name, String value) { + return new WebLinkParameter(name, value); + } + + /** + * Creates a new web link parameter without a value. + * + * @param name the name of the parameter + */ + public static WebLinkParameter withoutValue(String name) { + return new WebLinkParameter(name, null); + } + + /** + * Checks if the web link parameter has a value. + *

+ * The method will return {@code true} only when a value (including an empty one) has been + * provided. + * + * @return {@code true}, if the parameter has a value (including an empty one). Returns + * {@code false}, if no value has been provided + */ + public boolean hasValue() { + return value != null; + } +} diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkParser.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkParser.java new file mode 100644 index 000000000..007828529 --- /dev/null +++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkParser.java @@ -0,0 +1,59 @@ +package life.qbic.datamanager.signposting.http; + +import java.util.List; +import life.qbic.datamanager.signposting.http.lexing.WebLinkToken; +import life.qbic.datamanager.signposting.http.parsing.RawLinkHeader; + +/** + * A parser that checks structural integrity of an HTTP Link header entry in compliance with RFC 8288. + *

+ * A web link parser is able to process tokens from web link lexing and convert the tokens to raw + * link headers after structural validation, which can be seen as an AST (abstract syntax tree). + *

+ * Note: Implementations must not perform semantic validation, this is concern of + * {@link WebLinkValidator} implementations. + *

+ * In case of structural violations, implementations of the {@link WebLinkParser} interface must + * throw a {@link StructureException}. + *

+ * RFC 8288 section 3 describes the serialization of the Link HTTP header attribute: + * + *

+ *   {@code
+ *   Link       = #link-value
+ *   link-value = "<" URI-Reference ">" *( OWS ";" OWS link-param )
+ *   link-param = token BWS [ "=" BWS ( token / quoted-string ) ]
+ *   }
+ * 
+ *

+ * The {@link WebLinkParser} interface can process {@link WebLinkToken}, which are the output of + * lexing raw character values into known token values. See {@link WebLinkLexer} for details to + * lexers. + */ +public interface WebLinkParser { + + /** + * Parses a list of {@link WebLinkToken} and performs structural validation based on the RFC 8288 + * serialisation requirement. + *

+ * The returned value is an AST of a raw link header with a list of raw web link items that can be + * used for semantic validation. + * + * @param tokens a list of web link tokens to process + * @return a raw link header parsed from the web link tokens + * @throws NullPointerException if the token list is {@code null} + * @throws StructureException if any structural violation occurred + */ + RawLinkHeader parse(List tokens) throws NullPointerException, StructureException; + + /** + * Indicates a structural violation of the RFC 8288 web link serialisation requirement. + */ + class StructureException extends RuntimeException { + + public StructureException(String message) { + super(message); + } + } +} diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkProcessor.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkProcessor.java new file mode 100644 index 000000000..9aa524ede --- /dev/null +++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkProcessor.java @@ -0,0 +1,210 @@ +package life.qbic.datamanager.signposting.http; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import life.qbic.datamanager.signposting.http.WebLinkLexer.LexingException; +import life.qbic.datamanager.signposting.http.WebLinkParser.StructureException; +import life.qbic.datamanager.signposting.http.WebLinkValidator.Issue; +import life.qbic.datamanager.signposting.http.WebLinkValidator.IssueReport; +import life.qbic.datamanager.signposting.http.WebLinkValidator.ValidationResult; +import life.qbic.datamanager.signposting.http.lexing.SimpleWebLinkLexer; +import life.qbic.datamanager.signposting.http.parsing.SimpleWebLinkParser; +import life.qbic.datamanager.signposting.http.validation.Rfc8288WebLinkValidator; + +/** + * Configurable processor for raw web link strings from the HTTP Link header field. + *

+ * The underlying standard is RFC 8288 + * + */ +public class WebLinkProcessor { + + private final WebLinkLexer lexer; + private final WebLinkParser parser; + private final List validators; + + private WebLinkProcessor() { + this.lexer = null; + this.parser = null; + this.validators = null; + } + + private WebLinkProcessor( + WebLinkLexer selectedLexer, + WebLinkParser selectedParser, + List selectedValidators) { + this.lexer = Objects.requireNonNull(selectedLexer); + this.parser = Objects.requireNonNull(selectedParser); + this.validators = List.copyOf(Objects.requireNonNull(selectedValidators)); + } + + /** + * Processes a raw link header string and returns a validation result with the final web links and + * an issue report. + *

+ * The processor performs different steps until the validation result returns: + * + *

    + *
  1. Tokenization: the raw string gets translated into enumerated token values
  2. + *
  3. Parsing: the token collection gets structurally parsed and checked, the result is an AST of raw link values
  4. + *
  5. Validation: one or more validation steps to semantically check the raw web links
  6. + *
+ *

+ * The caller is advised to check the {@link ValidationResult#report()} in case issues have been recorded. + *

+ * By contract of the validation interface, validators MUST record issues as errors in case there are severe semantically + * deviations from the model the validator represents. Warnings can be investigated, but clients + * can expect to continue to use the returned web links. + * + * @param rawLinkHeader the serialized raw link header value + * @return a validation result with the web links and an issue report with recorded findings of + * warnings and errors. + * @throws LexingException in case the header contains invalid characters (during + * tokenizing) + * @throws StructureException in case the header does not have the expected structure (during + * parsing) + * @throws NullPointerException in case the raw link header is {@code null} + */ + public ValidationResult process(String rawLinkHeader) + throws LexingException, StructureException, NullPointerException { + var header = Objects.requireNonNull(rawLinkHeader); + var tokenizedHeader = lexer.lex(header); + var parsedHeader = parser.parse(tokenizedHeader); + + var aggregatedIssues = new ArrayList(); + ValidationResult cachedValidationResult = null; + for (WebLinkValidator validator : validators) { + cachedValidationResult = validator.validate(parsedHeader); + aggregatedIssues.addAll(cachedValidationResult.report().issues()); + } + + if (cachedValidationResult == null) { + throw new IllegalStateException( + "No validation result was found after processing: " + rawLinkHeader); + } + + return new ValidationResult(cachedValidationResult.weblinks(), + new IssueReport(aggregatedIssues)); + } + + /** + * Builder for a {@link WebLinkProcessor}. + *

+ * The builder allows for flexible configuration of the different processing steps: + * + *

    + *
  1. Tokenization: the raw string gets translated into enumerated token values
  2. + *
  3. Parsing: the token collection gets structurally parsed and checked, the result is an AST of raw link values
  4. + *
  5. Validation: one or more validation steps to semantically check the raw web links
  6. + *
+ *

+ * It is possible to create a default processor by simply omitting any configuration: + * + *

+   *   {@code
+   *   // Creates a processor with default configuration
+   *   WebLinkProcessor defaultProcessor = new Builder.build()
+   *   }
+   * 
+ *

+ * The default components are: + * + *

+ * + * The RFC 8282 validator will only be used if no validator has been provided. If you want + * to combine the RFC validator with additional ones, you can do so: + * + *
+   *   {@code
+   *
+   *   WebLinkProcessor customProcessor =
+   *      new Builder.withValidator(Rfc8288WebLinkValidator.create())
+   *                 .withValidator(new MyCustomValidator())
+   *                 .build()
+   *   }
+   * 
+ */ + public static class Builder { + + private WebLinkLexer configuredLexer; + + private WebLinkParser configuredParser; + + private final List configuredValidators = new ArrayList<>(); + + /** + * Configures a different lexer from the default that shall be used in the processing. + * + * @param lexer the lexer to be used in the processing + * @return the builder instance + */ + public Builder withLexer(WebLinkLexer lexer) { + configuredLexer = lexer; + return this; + } + + /** + * Configures a different lexer from the default that shall be used in the processing. + * + * @param lexer the lexer to be used in the processing + * @return the builder instance + */ + public Builder withParser(WebLinkParser parser) { + configuredParser = parser; + return this; + } + + /** + * Configures a different lexer from the default that shall be used in the processing. + *

+ * Multiple validators can be configured by calling this method repeatedly. The validators are + * called in the order they have been configured on the builder. + * + *

+     *   {@code
+     *   var processor = Builder.withValidator(first)  // first validator
+     *                          .withValidator(other)  // appends next validator
+     *                          .build()
+     *   }
+     * 
+ * + * @param validator the validator to be used in the processing + * @return the builder instance + */ + public Builder withValidator(WebLinkValidator validator) { + configuredValidators.add(validator); + return this; + } + + /** + * Creates instance of a web link processor object based on the configuration. + * + * @return the configured web link processor + */ + public WebLinkProcessor build() { + var selectedLexer = configuredLexer == null ? defaultLexer() : configuredLexer; + var selectedParser = configuredParser == null ? defaultParser() : configuredParser; + var selectedValidators = + configuredValidators.isEmpty() ? List.of(defaultValidator()) : configuredValidators; + + return new WebLinkProcessor(selectedLexer, selectedParser, selectedValidators); + } + + private WebLinkParser defaultParser() { + return SimpleWebLinkParser.create(); + } + + private static WebLinkLexer defaultLexer() { + return SimpleWebLinkLexer.create(); + } + + private static WebLinkValidator defaultValidator() { + return Rfc8288WebLinkValidator.create(); + } + } +} diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkTokenType.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkTokenType.java new file mode 100644 index 000000000..4b1ac1472 --- /dev/null +++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkTokenType.java @@ -0,0 +1,53 @@ +package life.qbic.datamanager.signposting.http; + +/** + * Enumeration for being used to describe different token types for the + */ +public enum WebLinkTokenType { + + /** + * "<" + */ + LT, + + /** + * ">" + */ + GT, + + /** + * ";" + */ + SEMICOLON, + + /** + * "=" + */ + EQUALS, + + /** + * "," + */ + COMMA, + + /** + * A URI-Reference between "<" and ">". The angle brackets themselves are represented by LT and GT + * tokens. + */ + URI, + + /** + * An unquoted token (e.g. parameter name, token value). + */ + IDENT, + + /** + * A quoted-string value without the surrounding quotes. + */ + QUOTED, + + /** + * End-of-input marker. + */ + EOF +} diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkValidator.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkValidator.java new file mode 100644 index 000000000..9b5f141d1 --- /dev/null +++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/WebLinkValidator.java @@ -0,0 +1,117 @@ +package life.qbic.datamanager.signposting.http; + +import java.util.List; +import life.qbic.datamanager.signposting.http.parsing.RawLinkHeader; + +/** + * Performs validation of raw web link headers. + *

+ * Validator are expected to consume output of a {@link WebLinkParser} and convert the web link + * information into reusable web link objects. + *

+ * Implementations of the {@link WebLinkValidator} interface must perform semantic validation only. + *

+ * Implementations also must not interrupt the validation on violations but provide the information + * in the attached {@link IssueReport} of the {@link ValidationResult}. + */ +public interface WebLinkValidator { + + /** + * Validates the given raw link header against the semantic integrity of the validator type. + *

+ * Violations on the semantic level must be recorded in the returned issue list with type + * {@link IssueType#ERROR}. In the presence of any error, at least one web link entry is faulty + * and appropriate error handling is advised. + *

+ * Warnings shall indicate less strict deviations of the specification and must result in usable + * web link objects. If no errors are provided, the client must be able to be safely continue to + * use the web link object in the semantic scope that the validator guarantees. + *

+ * The implementation MUST NOT interrupt the validation in case any error is recorded. Validation + * shall always complete successfully and the method return the validation result. + * + * @param rawLinkHeader the raw link header + * @return the validation result with a list of web link objects and an {@link IssueReport}. + * @throws NullPointerException if the raw link header is {@code null} + */ + ValidationResult validate(RawLinkHeader rawLinkHeader) throws NullPointerException; + + /** + * A summary of the validation with the final web links for further use and an issue report with + * validation warnings or violations. + * + * @param weblinks a collection of web links that have been converted from validation + * @param report a container for recorded issues during validation + */ + record ValidationResult(List weblinks, IssueReport report) { + + public ValidationResult { + weblinks = List.copyOf(weblinks); + } + + public boolean containsIssues() { + return !report.isEmpty(); + } + } + + /** + * A container for recorded issues during validation. + * + * @param issues the issues found during validation + */ + record IssueReport(List issues) { + + public boolean hasErrors() { + return issues.stream().anyMatch(Issue::isError); + } + + public boolean hasWarnings() { + return issues.stream().anyMatch(Issue::isWarning); + } + + public boolean isEmpty() { + return issues.isEmpty(); + } + } + + /** + * Describes any deviations from a semantic model either as warning or error. + * + * @param message a descriptive message that helps clients to process the issue + * @param type the severity level of the issue. {@link IssueType#ERROR} shall be used to + * indicate serious violations from the semantic model that would lead to wrong + * interpretation by the client. For less severe deviations the + * {@link IssueType#WARNING} can be used. + */ + record Issue(String message, IssueType type) { + + public static Issue warning(String message) { + return new Issue(message, IssueType.WARNING); + } + + public static Issue error(String message) { + return new Issue(message, IssueType.ERROR); + } + + public boolean isWarning() { + return type.equals(IssueType.WARNING); + } + + public boolean isError() { + return type.equals(IssueType.ERROR); + } + } + + /** + * An enumeration of different issue types. + * + *

    + *
  • ERROR - Deviation from the semantic level that brakes interpretation, a specification or contract
  • + *
  • WARNING - Deviation from the semantic level that does not brake interpretation, specification or a contract
  • + *
+ */ + enum IssueType { + WARNING, + ERROR + } +} diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/lexing/SimpleWebLinkLexer.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/lexing/SimpleWebLinkLexer.java new file mode 100644 index 000000000..4247fbaa5 --- /dev/null +++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/lexing/SimpleWebLinkLexer.java @@ -0,0 +1,206 @@ +package life.qbic.datamanager.signposting.http.lexing; + +import java.util.ArrayList; +import java.util.List; +import life.qbic.datamanager.signposting.http.WebLinkLexer; +import life.qbic.datamanager.signposting.http.WebLinkTokenType; + +/** + * Simple scanning lexer for RFC 8288 Web Link serialisations. + *

+ * This implementation: + *

    + *
  • Skips ASCII whitespace (OWS/BWS) between tokens
  • + *
  • Treats URIs as everything between "<" and ">"
  • + *
  • Treats unquoted tokens as IDENT
  • + *
  • Produces QUOTED tokens for quoted-string values (without the quotes)
  • + *
  • Emits an EOF token at the end of input
  • + *
+ *

+ * Parsing and semantic validation are handled by later stages. + */ +public final class SimpleWebLinkLexer implements WebLinkLexer { + + private SimpleWebLinkLexer() {} + + public static SimpleWebLinkLexer create() { + return new SimpleWebLinkLexer(); + } + + + @Override + public List lex(String input) throws LexingException { + return new Scanner(input).scan(); + } + + /** + * Internal scanner doing a single left-to-right pass over the input. + */ + private static final class Scanner { + + private final String input; + private final int length; + private int pos = 0; + + private final List tokens = new ArrayList<>(); + + Scanner(String input) { + this.input = input != null ? input : ""; + this.length = this.input.length(); + } + + List scan() { + while (!eof()) { + char c = peek(); + + if (isWhitespace(c)) { + consumeWhitespace(); + continue; + } + + int start = pos; + + switch (c) { + case '<' -> readUri(start); + case '>' -> { + advance(); + tokens.add(WebLinkToken.of(WebLinkTokenType.GT, ">", start)); + } + case ';' -> { + advance(); + tokens.add(WebLinkToken.of(WebLinkTokenType.SEMICOLON, ";", start)); + } + case '=' -> { + advance(); + tokens.add(WebLinkToken.of(WebLinkTokenType.EQUALS, "=", start)); + } + case ',' -> { + advance(); + tokens.add(WebLinkToken.of(WebLinkTokenType.COMMA, ",", start)); + } + case '"' -> readQuoted(start); + default -> readIdent(start); + } + } + + tokens.add(WebLinkToken.of(WebLinkTokenType.EOF, "", pos)); + return tokens; + } + + /** + * Reads a URI-Reference between "<" and ">". Emits three tokens: LT, URI, GT. + */ + private void readUri(int start) { + // consume "<" + advance(); + tokens.add(WebLinkToken.of(WebLinkTokenType.LT, "<", start)); + + int uriStart = pos; + + while (!eof()) { + char c = peek(); + if (c == '>') { + break; + } + advance(); + } + + if (eof()) { + throw new LexingException( + "Unterminated URI reference: missing '>' for '<' at position " + start); + } + + String uriText = input.substring(uriStart, pos); + tokens.add(WebLinkToken.of(WebLinkTokenType.URI, uriText, uriStart)); + + // consume ">" + int gtPos = pos; + advance(); + tokens.add(WebLinkToken.of(WebLinkTokenType.GT, ">", gtPos)); + } + + /** + * Reads a quoted-string, without including the surrounding quotes. Does not yet handle escape + * sequences; that can be extended later. + */ + private void readQuoted(int start) { + // consume opening quote + advance(); + + int contentStart = pos; + + while (!eof()) { + char c = peek(); + if (c == '"') { + break; + } + // TODO: handle quoted-pair / escaping if needed + advance(); + } + + if (eof()) { + throw new LexingException( + "Unterminated quoted-string starting at position " + start); + } + + String content = input.substring(contentStart, pos); + + // consume closing quote + advance(); + + tokens.add(WebLinkToken.of(WebLinkTokenType.QUOTED, content, contentStart)); + } + + /** + * Reads an unquoted token (IDENT) until a delimiter or whitespace is reached. + */ + private void readIdent(int start) { + while (!eof()) { + char c = peek(); + if (isDelimiter(c) || isWhitespace(c)) { + break; + } + advance(); + } + + String text = input.substring(start, pos); + if (!text.isEmpty()) { + tokens.add(WebLinkToken.of(WebLinkTokenType.IDENT, text, start)); + } + } + + private void consumeWhitespace() { + while (!eof() && isWhitespace(peek())) { + advance(); + } + } + + private boolean isWhitespace(char c) { + // OWS/BWS: space or horizontal tab are most important; + // here we also accept CR/LF defensively. + return c == ' ' || c == '\t' || c == '\r' || c == '\n'; + } + + /** + * Characters that delimit IDENT tokens. + */ + private boolean isDelimiter(char c) { + return switch (c) { + case '<', '>', ';', '=', ',', '"' -> true; + default -> false; + }; + } + + private boolean eof() { + return pos >= length; + } + + private char peek() { + return input.charAt(pos); + } + + private void advance() { + pos++; + } + } +} diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/lexing/WebLinkToken.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/lexing/WebLinkToken.java new file mode 100644 index 000000000..dc6747aae --- /dev/null +++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/lexing/WebLinkToken.java @@ -0,0 +1,25 @@ +package life.qbic.datamanager.signposting.http.lexing; + +import life.qbic.datamanager.signposting.http.WebLinkTokenType; + +/** + * Single token produced by a WebLinkLexer. + * + * @param type the token type + * @param text the raw text content for this token (without decorations like quotes) + * @param position the zero-based character offset in the input where this token starts + */ +public record WebLinkToken( + WebLinkTokenType type, + String text, + int position) { + + public static WebLinkToken of(WebLinkTokenType type, String text, int position) { + return new WebLinkToken(type, text, position); + } + + @Override + public String toString() { + return type + "('" + text + "' @" + position + ")"; + } +} diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/RawLink.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/RawLink.java new file mode 100644 index 000000000..7d9ff7ddb --- /dev/null +++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/RawLink.java @@ -0,0 +1,14 @@ +package life.qbic.datamanager.signposting.http.parsing; + +import java.util.List; + +/** + * + * + *

+ * + * @since + */ +public record RawLink(String rawURI, List rawParameters) { + +} diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/RawLinkHeader.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/RawLinkHeader.java new file mode 100644 index 000000000..fa5036fb5 --- /dev/null +++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/RawLinkHeader.java @@ -0,0 +1,14 @@ +package life.qbic.datamanager.signposting.http.parsing; + +import java.util.List; + +/** + * + * + *

+ * + * @since + */ +public record RawLinkHeader(List rawLinks) { + +} diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/RawParam.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/RawParam.java new file mode 100644 index 000000000..3abdfac93 --- /dev/null +++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/RawParam.java @@ -0,0 +1,43 @@ +package life.qbic.datamanager.signposting.http.parsing; + +/** + * + * + *

+ * + * @since + */ +public record RawParam(String name, String value) { + + /** + * Creates an withoutValue raw parameter, that only has a name. + *

+ * A call to {@link #value()} will return {@code null} for withoutValue parameters. + * + * @param name the name of the parameter + * @return an withoutValue raw parameter with a name only + */ + public static RawParam emptyParameter(String name) { + return new RawParam(name, null); + } + + /** + * Creates a raw parameter with name and value. + *

+ * The client must not pass withoutValue or blank values as parameter value, but shall call + * {@link #emptyParameter(String)} explicitly. Alternatively, the client can also pass + * {@code null} for value, to indicate an withoutValue parameter. + * + * @param name the name of the parameter + * @param value the value of the parameter + * @return a raw parameter + * @throws IllegalArgumentException in case the value is withoutValue or blank + */ + public static RawParam withValue(String name, String value) throws IllegalArgumentException { + if (value != null && value.isBlank()) { + throw new IllegalArgumentException("Value cannot be blank"); + } + return new RawParam(name, value); + } + +} diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/SimpleWebLinkParser.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/SimpleWebLinkParser.java new file mode 100644 index 000000000..0f526a592 --- /dev/null +++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/parsing/SimpleWebLinkParser.java @@ -0,0 +1,312 @@ +package life.qbic.datamanager.signposting.http.parsing; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; +import java.util.Objects; +import life.qbic.datamanager.signposting.http.WebLinkParser; +import life.qbic.datamanager.signposting.http.lexing.WebLinkToken; +import life.qbic.datamanager.signposting.http.WebLinkTokenType; + +/** + * Parses serialized information used in Web Linking as described in RFC 8288. + *

+ * The implementation is based on the Link Serialisation in HTTP Headers, section 3 of the + * RFC 8288. + *

+ * Note: the implementation of this class is NOT thread-safe. + * + *

+ * + * Link = #link-value
link-value = "<" URI-Reference ">" *( OWS ";" OWS link-param )
+ * link-param = token BWS [ "=" BWS ( token / quoted-string ) ] + *
+ * + */ +public class SimpleWebLinkParser implements WebLinkParser { + + private int currentPosition = 0; + + private List tokens; + + private SimpleWebLinkParser() { + } + + /** + * Creates a new SimpleWebLinkParser object instance. + * + * @return the new SimpleWebLinkParser + */ + public static SimpleWebLinkParser create() { + return new SimpleWebLinkParser(); + } + + + /** + * Parses a list of lexed web link tokens to a raw link header value. The parser only performs + * structural validation, not semantic validation. + *

+ * The template for structural validation is the serialisation description in ABNF for RFC 8288 + * Section 3. + * + *

+ * Parser contract: + * + *

    + *
  • The token list must contain an EOF token
  • + *
  • The last token item must be an EOF token, based on ascending sorting by position
  • + *
+ *

+ * In case the contract is violated, a structure exception is thrown. + * + * @param tokens a list of tokens to parse as raw web link header + * @return a raw web link header, structurally validated against RFC 8288 + * @throws NullPointerException if the tokens list is {@code null} + * @throws StructureException if the tokens violate the structure of a valid web link token + */ + @Override + public RawLinkHeader parse(List tokens) + throws NullPointerException, StructureException { + Objects.requireNonNull(tokens); + + if (tokens.isEmpty()) { + throw new StructureException( + "A link header entry must have at least one web link. Tokens were withoutValue."); + } + + // Always reset the internal state on every parse() call + reset(); + + this.tokens = tokens.stream() + .sorted(Comparator.comparingInt(WebLinkToken::position)) + .toList(); + + // Validate contract + ensureEOF("Lexer did not append EOF token"); + + if (this.tokens.get(currentPosition).type() == WebLinkTokenType.EOF) { + throw new StructureException( + "A link header entry must have at least one web link. Tokens started with EOF."); + } + + var collectedLinks = new ArrayList(); + + var parsedLink = parseLinkValue(); + collectedLinks.add(parsedLink); + // While there is ',' (COMMA) present, parse another link value + while (current().type() == WebLinkTokenType.COMMA) { + next(); + if (currentIsEof()) { + throw new StructureException( + "Unexpected trailing comma: expected another link-value after ','."); + } + collectedLinks.add(parseLinkValue()); + } + + // Last consumed token must be always EOF to ensure that the token stream has been consumed + expectCurrent(WebLinkTokenType.EOF); + + return new RawLinkHeader(collectedLinks); + } + + /** + * Resets the internal state of the parser instance + */ + private void reset() { + currentPosition = 0; + } + + /** + * Checks if the last token in the token list is an EOF token. To keep the parser robust and + * simple, this is part of the contract and the parser shall fail early if the contract is + * violated. + * + * @param errorMessage the message to provide in the exception + * @throws IllegalStateException if the last token of the list ist not an EOF token + */ + private void ensureEOF(String errorMessage) throws IllegalStateException { + if (tokens.getLast().type() != WebLinkTokenType.EOF) { + throw new IllegalStateException(errorMessage); + } + } + + /** + * Parses a single web link value, which must contain a target (URI). Optionally, the web link can + * have one or more parameters. + *

+ * If the target has a trailing ',' (COMMA), no further parameters are expected. + *

+ * The correctness of the parameter structure with a precedent ';' (SEMICOLON) after the target is + * concern of the {@link #parseParameters()} method, since it is part of the parameter list + * description. + * + * @return a raw web link value with target and optionally one or more parameters + */ + private RawLink parseLinkValue() { + var parsedLinkValue = parseUriReference(); + if (current().type() != WebLinkTokenType.COMMA) { + return new RawLink(parsedLinkValue, parseParameters()); + } + return new RawLink(parsedLinkValue, List.of()); + } + + /** + * Parses parameters beginning from the current token position (inclusive). + *

+ * Based on the serialisation description of RFC 8288 for link-values, params must have a + * precedent ';' (SEMICOLON). If the start position on method call is not a semicolon, an + * exception will be thrown. + *

+ * In case the link-value has no parameters at all (e.g. multiple web links with targets (URI) + * only), this method should not be called in the first place. + * + * @return a list of raw parameters with param name and value + */ + private List parseParameters() { + var parameters = new ArrayList(); + if (currentIsEof()) { + return parameters; + } + // expected separator for a parameter entry is ';' (semicolon) based on RFC 8288 section 3 + expectCurrent(WebLinkTokenType.SEMICOLON); + next(); + + // now one or more parameters can follow + while (current().type() != WebLinkTokenType.COMMA) { + RawParam parameter = parseParameter(); + parameters.add(parameter); + // If the current token is no ';' (SEMICOLON), no additional parameters are expected + if (current().type() != WebLinkTokenType.SEMICOLON) { + break; + } + next(); + } + return parameters; + } + + private RawParam parseParameter() throws StructureException { + expectCurrent(WebLinkTokenType.IDENT); + var paramName = current().text(); + + next(); + + // Checks for withoutValue parameter + if (currentIsEof() + || current().type() == WebLinkTokenType.COMMA + || current().type() == WebLinkTokenType.SEMICOLON + ) { + return RawParam.emptyParameter(paramName); + } + + // Next token must be "=" (equals) + // RFC 8288: token BWS [ "=" BWS (token / quoted-string ) ] + expectCurrent(WebLinkTokenType.EQUALS); + + next(); + + expectCurrentAny(WebLinkTokenType.IDENT, WebLinkTokenType.QUOTED); + var rawParamValue = current().text(); + + next(); + + return RawParam.withValue(paramName, rawParamValue); + } + + /** + * Evaluates if the current token is an EOF token. + * + * @return {@code true}, if the current token is an EOF token, else {@code false} + */ + private boolean currentIsEof() { + return current().type() == WebLinkTokenType.EOF; + } + + /** + * Checks the current token and throws an exception, if it is not of the expected type. + * + * @param token the expected token + * @throws StructureException if the current token does not match the expected one + */ + private void expectCurrent(WebLinkTokenType token) throws StructureException { + if (current().type() != token) { + throw new StructureException( + "Expected %s but found %s('%s') at position %d".formatted(token, current().type(), + current().text(), current().position())); + } + } + + /** + * Checks if the current token matches any (at least one) expected token. + *

+ * If no expected type is provided, the method will throw a + * {@link life.qbic.datamanager.signposting.http.WebLinkParser.StructureException}. + * + * @param expected zero or more expected token types. + * @throws StructureException if the current token does not match any expected token + */ + private void expectCurrentAny(WebLinkTokenType... expected) throws StructureException { + var matches = Arrays.stream(expected) + .anyMatch(type -> type.equals(current().type())); + + if (!matches) { + var expectedNames = Arrays.stream(expected) + .map(Enum::name) + .reduce((a, b) -> a + ", " + b) + .orElse(""); + throw new StructureException( + "Expected any of [%s] but found %s('%s') at position %d" + .formatted(expectedNames, current().type(), current().text(), current().position())); + } + } + + /** + * Will use the token from the current position with {@link this#current()} and try to parse the + * raw URI value. After successful return the current position is advanced to the next token in + * the list. + * + * @return the raw value of the URI + */ + private String parseUriReference() { + var uriValue = ""; + + // URI value must start with '<' + expectCurrent(WebLinkTokenType.LT); + next(); + + // URI reference expected + expectCurrent(WebLinkTokenType.URI); + uriValue = current().text(); + next(); + + // URI value must end with '>' + expectCurrent(WebLinkTokenType.GT); + + next(); + return uriValue; + } + + /** + * Returns the token on the current position. + * + * @return the token on the current position. + */ + private WebLinkToken current() { + return tokens.get(currentPosition); + } + + /** + * Returns the next token from the current position. If the current position is already the last + * token of the token list, the last token will be returned. + *

+ * By contract, the parser expects the last item to be an EOF token (see + * {@link WebLinkTokenType#EOF}). So the last item in the token list will always be an EOF token. + */ + private WebLinkToken next() { + if (currentPosition < tokens.size() - 1) { + currentPosition++; + } + return current(); + } +} diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/validation/Rfc8288WebLinkValidator.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/validation/Rfc8288WebLinkValidator.java new file mode 100644 index 000000000..ce2115cfc --- /dev/null +++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/validation/Rfc8288WebLinkValidator.java @@ -0,0 +1,182 @@ +package life.qbic.datamanager.signposting.http.validation; + +import java.net.URI; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.regex.Pattern; +import life.qbic.datamanager.signposting.http.WebLinkParameter; +import life.qbic.datamanager.signposting.http.WebLinkValidator; +import life.qbic.datamanager.signposting.http.WebLink; +import life.qbic.datamanager.signposting.http.parsing.RawLink; +import life.qbic.datamanager.signposting.http.parsing.RawLinkHeader; +import life.qbic.datamanager.signposting.http.parsing.RawParam; + +/** + * Validation against RFC 8288 Web Linking. + *

+ * Violations against the specification will be recorded as + * {@link WebLinkValidator.IssueType#ERROR}. In the presence of at + * least one error, the web link MUST be regarded invalid and clients shall not continue to work + * with the link, but treat it as exception. + *

+ * The implementation also records issues as + * {@link WebLinkValidator.IssueType#WARNING}, in case the finding + * is not strictly against the RFC 8288, but e.g. a type usage is deprecated or when parameters have + * been skipped when the specification demands for it. A warning results in a still usable web link, + * but it is advised to investigate any findings. + * + */ +public class Rfc8288WebLinkValidator implements WebLinkValidator { + + // Defined in https://www.rfc-editor.org/rfc/rfc7230, section 3.2.6 + private static final Pattern ALLOWED_TOKEN_CHARS = Pattern.compile( + "^[!#$%&'*+-.^_`|~0-9A-Za-z]+$"); + + private Rfc8288WebLinkValidator() {} + + public static WebLinkValidator create() { + return new Rfc8288WebLinkValidator(); + } + + @Override + public ValidationResult validate(RawLinkHeader rawLinkHeader) { + var recordedIssues = new ArrayList(); + + var webLinks = new ArrayList(); + for (RawLink rawLink : rawLinkHeader.rawLinks()) { + var webLink = validate(rawLink, recordedIssues); + if (webLink != null) { + webLinks.add(webLink); + } + } + return new ValidationResult(webLinks, new IssueReport(List.copyOf(recordedIssues))); + } + + /** + * Validation entry point for a single raw link. Any findings must be recorded in the provided + * issue list. Only issue additions are allowed. + *

+ * In case the target is not a valid URI, the returned web link is {@code null}. + * + * @param rawLink the raw link information from parsing + * @param recordedIssues a list to record negative findings as warnings and errors + * @return a web link object, or {@code null}, in case the target is not a valid URI + */ + private WebLink validate(RawLink rawLink, List recordedIssues) { + URI uri = null; + try { + uri = URI.create(rawLink.rawURI()); + } catch (IllegalArgumentException e) { + recordedIssues.add( + Issue.error("Invalid URI '%s': %s".formatted(rawLink.rawURI(), e.getMessage()))); + } + var parameters = validateAndConvertParams(rawLink.rawParameters(), recordedIssues); + + if (uri == null) { + return null; + } + return new WebLink(uri, parameters); + } + + /** + * Validates a list of raw parameters and creates a list of link parameters that can be used to + * build the final web link object. + *

+ * Any error or warning will be recorded in the provided recorded issue list. + * + * @param rawParams a list of raw parameter values + * @param recordedIssues a list of recorded issues to add more findings during validation + * @return a list of converted link parameters + */ + private List validateAndConvertParams( + List rawParams, List recordedIssues) { + var params = new ArrayList(); + var seenParams = new HashSet(); + for (RawParam rawParam : rawParams) { + validateParam(rawParam, recordedIssues); + validateParamOccurrenceAndAddLink(rawParam, seenParams, params, recordedIssues); + } + return params; + } + + /** + * Validates a given raw parameter against known constraints and assumptions in the RFC 8288 + * specification. + *

+ * Currently, checks: + * + *

    + *
  • the parameter name MUST contain allowed characters only (see token definition)
  • + *
+ * + * @param rawParam the raw parameter to be validated + * @param recordedIssues a list of issues to record more findings + */ + private void validateParam(RawParam rawParam, List recordedIssues) { + if (tokenContainsInvalidChars(rawParam.name())) { + recordedIssues.add( + Issue.error("Invalid parameter name '%s': Only the characters '%s' are allowed".formatted( + rawParam.name(), ALLOWED_TOKEN_CHARS.pattern()))); + } + } + + /** + * Looks for the presence of invalid chars. + *

+ * Allowed token chars are defined by RFC + * 7230, section 3.2.6. + * + * @param token the token to be checked for invalid characters + * @return true, if the token violates the token character specification, else false + */ + private static boolean tokenContainsInvalidChars(String token) { + return !ALLOWED_TOKEN_CHARS.matcher(token).matches(); + } + + /** + * Validates parameter occurrence rules and honors the RFC 8288 specification for skipping + * parameter entries. + *

+ * Sofar multiple definitions are only allowed for the "hreflang" parameter. + *

+ * Note: occurrences after the first are ignored and issue a warning. This is a strict requirement + * from the RFC 8288 and must be honored. + * + * @param rawParam the raw parameter value + * @param recordedParameterNames a set to check, if a parameter has been already seen in the link + * @param parameters a list of converted link parameters for the final web link + * object + * @param recordedIssues a list of issue records to add new findings + */ + private void validateParamOccurrenceAndAddLink( + RawParam rawParam, + Set recordedParameterNames, + List parameters, + List recordedIssues) { + var rfcParamOptional = RfcLinkParameter.from(rawParam.name()); + + if (rfcParamOptional.isPresent()) { + var rfcParam = rfcParamOptional.get(); + // the "hreflang" parameter is the only parameter that is allowed to occur more than once + // see RFC 8288 for the parameter multiplicity definition + if (recordedParameterNames.contains(rawParam.name()) && !rfcParam.equals( + RfcLinkParameter.HREFLANG)) { + recordedIssues.add(Issue.warning( + "Parameter '%s' is not allowed multiple times. Skipped parameter.".formatted( + rfcParam.rfcValue()))); + return; + } + } + recordedParameterNames.add(rawParam.name()); + + WebLinkParameter webLinkParameter; + if (rawParam.value() == null || rawParam.value().isEmpty()) { + webLinkParameter = WebLinkParameter.withoutValue(rawParam.name()); + } else { + webLinkParameter = WebLinkParameter.create(rawParam.name(), rawParam.value()); + } + parameters.add(webLinkParameter); + } +} diff --git a/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/validation/RfcLinkParameter.java b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/validation/RfcLinkParameter.java new file mode 100644 index 000000000..d56c6a2a2 --- /dev/null +++ b/fair-signposting/src/main/java/life/qbic/datamanager/signposting/http/validation/RfcLinkParameter.java @@ -0,0 +1,66 @@ +package life.qbic.datamanager.signposting.http.validation; + +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +/** + * Standard parameters for the {@code Link} HTTP header. + * + *

    + *
  • "anchor" - see RFC 8288 section 3.2 (“Link Context”)
  • + *
  • "hreflang" - see RFC 8288 section 3.4.1 (“The hreflang Target Attribute”)
  • + *
  • "media" - see RFC 8288 section 3.4.2 (“The media Target Attribute”)
  • + *
  • "rel" - see RFC 8288 section 3.3 (“Relation Types”)
  • + *
  • "rev" - see RFC 8288 section 3.3 (historical note)
  • + *
  • "title" - see RFC 8288 section 3.4.4 (“The title Target Attribute”)
  • + *
  • "title*" - see RFC 8288 section 3.4.4 references RFC 5987 (“Character Set and Language Encoding for HTTP Header Field Parameters”)
  • + *
  • "type" - see RFC 8288 section 3.4.3 (“The type Target Attribute”)
  • + *
+ */ +public enum RfcLinkParameter { + + ANCHOR("anchor"), + HREFLANG("hreflang"), + MEDIA("media"), + REL("rel"), + REV("rev"), + TITLE("title"), + TITLE_MULT("title*"), + TYPE("type"); + + private final String value; + + private static final Map LOOKUP = new HashMap<>(); + + static { + for (RfcLinkParameter p : RfcLinkParameter.values()) { + LOOKUP.put(p.value, p); + } + } + + RfcLinkParameter(String value) { + this.value = value; + } + + /** + * Returns the RFC compliant value of the parameter name. + * + * @return the alpha-value of the link parameter + */ + public String rfcValue() { + return value; + } + + /** + * Creates an RfcLinkParameter from a given value, if the value belongs to any existing enum of + * this type. + * + * @param value the value to match the corresponding enum value + * @return the corresponding enum in an Optional, of returns Optional.withoutValue() + */ + public static Optional from(String value) { + return Optional.ofNullable(LOOKUP.getOrDefault(value, null)); + } + +} diff --git a/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/WebLinkProcessorSpec.groovy b/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/WebLinkProcessorSpec.groovy new file mode 100644 index 000000000..0932d90a0 --- /dev/null +++ b/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/WebLinkProcessorSpec.groovy @@ -0,0 +1,400 @@ +package life.qbic.datamanager.signposting.http + +import life.qbic.datamanager.signposting.http.WebLinkLexer.LexingException +import life.qbic.datamanager.signposting.http.lexing.WebLinkToken +import life.qbic.datamanager.signposting.http.parsing.RawLinkHeader +import life.qbic.datamanager.signposting.http.WebLinkValidator.Issue +import life.qbic.datamanager.signposting.http.WebLinkValidator.IssueReport +import life.qbic.datamanager.signposting.http.WebLinkValidator.ValidationResult +import spock.lang.Specification +import spock.lang.Unroll + +class WebLinkProcessorSpec extends Specification { + + // --------------------------------------------------------------------------- + // Helpers – ADAPT CONSTRUCTORS HERE + // --------------------------------------------------------------------------- + + /** + * Create a minimal but real WebLinkToken list. + * + */ + static List dummyTokens() { + return List.of( + new WebLinkToken(WebLinkTokenType.URI, "https://example.org", 0) + ) + } + + /** + * Create a minimal but real RawLinkHeader. + * Adjust constructor to your actual RawLinkHeader definition. + * + * Example assumption: + * public record RawLinkHeader(List rawLinks) { } + */ + static RawLinkHeader dummyParsedHeader() { + return new RawLinkHeader(List.of()) + } + + /** + * Create a minimal but real WebLink instance. + * Adjust constructor to your actual WebLink record/class. + * + * Example assumption: + * public record WebLink(URI reference, Map parameters) { } + */ + static WebLink dummyWebLink(String id) { + return new WebLink( + URI.create("https://example.org/" + id), + List.of() + ) + } + + // --------------------------------------------------------------------------- + // Tests + // --------------------------------------------------------------------------- + + def "default processor can process minimal valid link header"() { + given: + def processor = new WebLinkProcessor.Builder().build() + def input = "" + + when: + def result = processor.process(input) + + then: + result != null + result.weblinks() != null + result.report() != null + } + + /** + * When a custom lexer is provided, it must be used instead of the default one. + */ + def "processor uses configured lexer instead of default"() { + given: + def lexer = Mock(WebLinkLexer) + def parser = Mock(WebLinkParser) + def validator = Mock(WebLinkValidator) + + def tokens = dummyTokens() + def parsedHeader = dummyParsedHeader() + def validationResult = new ValidationResult(List.of(), new IssueReport(List.of())) + + and: + def processor = new WebLinkProcessor.Builder() + .withLexer(lexer) + .withParser(parser) + .withValidator(validator) + .build() + + when: + def result = processor.process("") + + then: + 1 * lexer.lex("") >> tokens + 1 * parser.parse(tokens) >> parsedHeader + 1 * validator.validate(parsedHeader) >> validationResult + + and: + result.weblinks().isEmpty() + !result.report().hasErrors() + } + + /** + * When a custom parser is provided, it must be used instead of the default one. + */ + def "processor uses configured parser instead of default"() { + given: + def lexer = Mock(WebLinkLexer) + def parser = Mock(WebLinkParser) + def validator = Mock(WebLinkValidator) + + def tokens = dummyTokens() + def parsedHeader = dummyParsedHeader() + def validationResult = new ValidationResult(List.of(), new IssueReport(List.of())) + + and: + def processor = new WebLinkProcessor.Builder() + .withLexer(lexer) + .withParser(parser) + .withValidator(validator) + .build() + + when: + def result = processor.process("") + + then: + 1 * lexer.lex("") >> tokens + 1 * parser.parse(tokens) >> parsedHeader + 1 * validator.validate(parsedHeader) >> validationResult + + and: + result != null + } + + def "builder injects default validator when none configured"() { + given: + def processor = new WebLinkProcessor.Builder().build() + def input = "" + + when: + def result = processor.process(input) + + then: + result != null + result.weblinks() != null + result.report() != null + } + + def "aggregates issues from multiple validators and uses last validator's weblinks"() { + given: + def lexer = Mock(WebLinkLexer) + def parser = Mock(WebLinkParser) + def validator1 = Mock(WebLinkValidator) + def validator2 = Mock(WebLinkValidator) + + def tokens = dummyTokens() + def parsedHeader = dummyParsedHeader() + + def link1 = dummyWebLink("v1") + def link2 = dummyWebLink("v2") + + def issue1 = Issue.error("first") + def issue2 = Issue.warning("second") + + def result1 = new ValidationResult(List.of(link1), new IssueReport(List.of(issue1))) + def result2 = new ValidationResult(List.of(link2), new IssueReport(List.of(issue2))) + + and: + def processor = new WebLinkProcessor.Builder() + .withLexer(lexer) + .withParser(parser) + .withValidator(validator1) + .withValidator(validator2) + .build() + + when: + def result = processor.process("") + + then: + 1 * lexer.lex("") >> tokens + 1 * parser.parse(tokens) >> parsedHeader + 1 * validator1.validate(parsedHeader) >> result1 + 1 * validator2.validate(parsedHeader) >> result2 + + and: + result.weblinks() == List.of(link2) + result.report().issues().containsAll(List.of(issue1, issue2)) + result.report().issues().size() == 2 + } + + @Unroll + def "process throws NullPointerException for null input (#caseName)"() { + given: + def processor = new WebLinkProcessor.Builder().build() + + when: + processor.process(input) + + then: + thrown(NullPointerException) + + where: + caseName | input + "null header" | null + } + + def "lexer exception is propagated and prevents parser and validators from running"() { + given: + def lexer = Mock(WebLinkLexer) + def parser = Mock(WebLinkParser) + def validator = Mock(WebLinkValidator) + + def processor = new WebLinkProcessor.Builder() + .withLexer(lexer) + .withParser(parser) + .withValidator(validator) + .build() + + when: + processor.process("> { throw new LexingException("boom") } + 0 * parser._ + 0 * validator._ + + and: + thrown(LexingException) + } + + def "parser exception is propagated and prevents validators from running"() { + given: + def lexer = Mock(WebLinkLexer) + def parser = Mock(WebLinkParser) + def validator = Mock(WebLinkValidator) + + def tokens = dummyTokens() + def processor = new WebLinkProcessor.Builder() + .withLexer(lexer) + .withParser(parser) + .withValidator(validator) + .build() + + when: + processor.process("") + + then: + 1 * lexer.lex("") >> tokens + 1 * parser.parse(tokens) >> { throw new RuntimeException("parse error") } + 0 * validator._ + + and: + thrown(RuntimeException) + } + + def "validator exception is propagated and stops further validators"() { + given: + def lexer = Mock(WebLinkLexer) + def parser = Mock(WebLinkParser) + def validator1 = Mock(WebLinkValidator) + def validator2 = Mock(WebLinkValidator) + + def tokens = dummyTokens() + def parsedHeader = dummyParsedHeader() + + def processor = new WebLinkProcessor.Builder() + .withLexer(lexer) + .withParser(parser) + .withValidator(validator1) + .withValidator(validator2) + .build() + + when: + processor.process("
") + + then: + 1 * lexer.lex("
") >> tokens + 1 * parser.parse(tokens) >> parsedHeader + 1 * validator1.validate(parsedHeader) >> { throw new RuntimeException("validator boom") } + 0 * validator2._ + + and: + thrown(RuntimeException) + } + + def "throws IllegalStateException when no validator produces a result (defensive branch)"() { + given: + def lexer = Mock(WebLinkLexer) + def parser = Mock(WebLinkParser) + + def tokens = dummyTokens() + def parsedHeader = dummyParsedHeader() + + def processor = new WebLinkProcessor.Builder() + .withLexer(lexer) + .withParser(parser) + .build() + + and: + def validatorsField = WebLinkProcessor.getDeclaredField("validators") + validatorsField.accessible = true + validatorsField.set(processor, List.of()) // simulate broken internal state + + when: + processor.process("") + + then: + 1 * lexer.lex("") >> tokens + 1 * parser.parse(tokens) >> parsedHeader + + and: + def ex = thrown(IllegalStateException) + ex.message.contains("No validation result was found") + } + + def "external mutation of issue list from validator does not break aggregated result"() { + given: + def lexer = Mock(WebLinkLexer) + def parser = Mock(WebLinkParser) + def validator = Mock(WebLinkValidator) + + def tokens = dummyTokens() + def parsedHeader = dummyParsedHeader() + + def mutableIssues = new ArrayList() + mutableIssues.add(Issue.error("original")) + + def validationResult = new ValidationResult( + List.of(dummyWebLink("l1")), + new IssueReport(mutableIssues) + ) + + def processor = new WebLinkProcessor.Builder() + .withLexer(lexer) + .withParser(parser) + .withValidator(validator) + .build() + + when: + def result = processor.process("") + + then: + 1 * lexer.lex("") >> tokens + 1 * parser.parse(tokens) >> parsedHeader + 1 * validator.validate(parsedHeader) >> validationResult + + and: + result.report().issues().size() == 1 + + when: + mutableIssues.clear() + + then: + result.report().issues().size() == 1 + } + + def "external mutation of weblink list from validator does not alter processor result"() { + given: + def lexer = Mock(WebLinkLexer) + def parser = Mock(WebLinkParser) + def validator = Mock(WebLinkValidator) + + def tokens = dummyTokens() + def parsedHeader = dummyParsedHeader() + + def mutableWebLinks = new ArrayList() + def link = dummyWebLink("foo") + mutableWebLinks.add(link) + + def validationResult = new ValidationResult( + mutableWebLinks, + new IssueReport(List.of()) + ) + + def processor = new WebLinkProcessor.Builder() + .withLexer(lexer) + .withParser(parser) + .withValidator(validator) + .build() + + when: + def result = processor.process("") + + then: + 1 * lexer.lex("") >> tokens + 1 * parser.parse(tokens) >> parsedHeader + 1 * validator.validate(parsedHeader) >> validationResult + + and: + result.weblinks().size() == 1 + result.weblinks().first() == link + + when: + mutableWebLinks.clear() + + then: + result.weblinks().size() == 1 + } +} diff --git a/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/WebLinkSpec.groovy b/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/WebLinkSpec.groovy new file mode 100644 index 000000000..23ef8e972 --- /dev/null +++ b/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/WebLinkSpec.groovy @@ -0,0 +1,23 @@ +package life.qbic.datamanager.signposting.http + +import spock.lang.Specification + +class WebLinkSpec extends Specification { + + def "An empty parameter key must throw an FormatException"() { + given: + var someURI = URI.create("myuri") + + and: + var someParameters = new HashMap>() + someParameters.put("someKey", "someValue") + someParameters.put("", "anotherValue") + + when: + WebLink.create(someURI, someParameters) + + then: + thrown(FormatException.class) + } + +} diff --git a/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/lexing/WebLinkLexerSpec.groovy b/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/lexing/WebLinkLexerSpec.groovy new file mode 100644 index 000000000..c40b3a8a1 --- /dev/null +++ b/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/lexing/WebLinkLexerSpec.groovy @@ -0,0 +1,234 @@ +package life.qbic.datamanager.signposting.http.lexing + + +import life.qbic.datamanager.signposting.http.WebLinkLexer +import life.qbic.datamanager.signposting.http.WebLinkLexer.LexingException +import life.qbic.datamanager.signposting.http.WebLinkTokenType; +import spock.lang.Specification + +/** + * Specification for a {@link WebLinkLexer} implementation. + * + * These tests verify that a raw Web Link (RFC 8288) serialisation + * is correctly tokenised into a sequence of {@link WebLinkToken}s, + * ending with an EOF token, and that malformed input causes a + * {@link LexingException}. + * + */ +class WebLinkLexerSpec extends Specification { + + // Adjust to your concrete implementation + WebLinkLexer lexer = new SimpleWebLinkLexer() + + /** + * Minimal working example: just a URI reference in angle brackets. + * + * ABNF: link-value = "<" URI-Reference ">" *( ...) + */ + def "lexes minimal link with URI only"() { + given: + def input = "" + + when: + def tokens = lexer.lex(input) + + then: "token sequence matches < URI > EOF" + tokens*.type() == [ + WebLinkTokenType.LT, + WebLinkTokenType.URI, + WebLinkTokenType.GT, + WebLinkTokenType.EOF + ] + + and: "URI token text is the raw reference" + tokens[1].text() == "https://example.org/resource" + } + + /** + * Single parameter with a token value. + * + * Example: ; rel=self + */ + def "lexes link with single token parameter"() { + given: + def input = "; rel=self" + + when: + def tokens = lexer.lex(input) + + then: + tokens*.type() == [ + WebLinkTokenType.LT, + WebLinkTokenType.URI, + WebLinkTokenType.GT, + WebLinkTokenType.SEMICOLON, + WebLinkTokenType.IDENT, // rel + WebLinkTokenType.EQUALS, + WebLinkTokenType.IDENT, // self + WebLinkTokenType.EOF + ] + + and: + tokens[1].text() == "https://example.org" + tokens[4].text() == "rel" + tokens[6].text() == "self" + } + + /** + * Single parameter with a quoted-string value. + * + * Example: ; title="A title" + */ + def "lexes link with quoted-string parameter value"() { + given: + def input = '; title="A title"' + + when: + def tokens = lexer.lex(input) + + then: + tokens*.type() == [ + WebLinkTokenType.LT, + WebLinkTokenType.URI, + WebLinkTokenType.GT, + WebLinkTokenType.SEMICOLON, + WebLinkTokenType.IDENT, // title + WebLinkTokenType.EQUALS, + WebLinkTokenType.QUOTED, // "A title" + WebLinkTokenType.EOF + ] + + and: "quoted token text does not contain quotes" + tokens[6].text() == "A title" + } + + /** + * Empty quoted-string is valid: title="". + * + * RFC 7230 §3.2.6 allows zero-length quoted-string. + */ + def "lexes parameter with empty quoted-string value"() { + given: + def input = '; title=""' + + when: + def tokens = lexer.lex(input) + + then: + tokens*.type() == [ + WebLinkTokenType.LT, + WebLinkTokenType.URI, + WebLinkTokenType.GT, + WebLinkTokenType.SEMICOLON, + WebLinkTokenType.IDENT, // title + WebLinkTokenType.EQUALS, + WebLinkTokenType.QUOTED, // "" + WebLinkTokenType.EOF + ] + + and: + tokens[6].text() == "" + } + + /** + * Whitespace (OWS/BWS) must be allowed around separators and '='. + * + * Example: <...> ; rel = "self" + */ + def "ignores optional whitespace around separators and equals"() { + given: + def input = ' ; rel = "self" ' + + when: + def tokens = lexer.lex(input) + + then: "same token sequence as without whitespace" + tokens*.type() == [ + WebLinkTokenType.LT, + WebLinkTokenType.URI, + WebLinkTokenType.GT, + WebLinkTokenType.SEMICOLON, + WebLinkTokenType.IDENT, + WebLinkTokenType.EQUALS, + WebLinkTokenType.QUOTED, + WebLinkTokenType.EOF + ] + + and: + tokens[4].text() == "rel" + tokens[6].text() == "self" + } + + /** + * Multiple link-values separated by a comma at the header field level. + * + * Example: ; rel=self, ; rel=next + * + * The lexer should emit a COMMA token between the two link-values. + */ + def "lexes multiple link-values separated by comma"() { + given: + def input = '; rel=self, ; rel=next' + + when: + def tokens = lexer.lex(input) + + then: + tokens*.type() == [ + WebLinkTokenType.LT, + WebLinkTokenType.URI, + WebLinkTokenType.GT, + WebLinkTokenType.SEMICOLON, + WebLinkTokenType.IDENT, + WebLinkTokenType.EQUALS, + WebLinkTokenType.IDENT, + WebLinkTokenType.COMMA, + WebLinkTokenType.LT, + WebLinkTokenType.URI, + WebLinkTokenType.GT, + WebLinkTokenType.SEMICOLON, + WebLinkTokenType.IDENT, + WebLinkTokenType.EQUALS, + WebLinkTokenType.IDENT, + WebLinkTokenType.EOF + ] + + and: + tokens[1].text() == "https://example.org/a" + tokens[6].text() == "self" + tokens[9].text() == "https://example.org/b" + tokens[14].text() == "next" + } + + /** + * Unterminated quoted-string should be rejected by the lexer. + * + * Example: title="unterminated + */ + def "throws on unterminated quoted string"() { + given: + def input = '; title="unterminated' + + when: + lexer.lex(input) + + then: + thrown(LexingException) + } + + /** + * Unterminated URI reference (missing closing '>') should be rejected. + * + * Example: with zero link-params. + * Spec: RFC 8288 Section 3 (“Link Serialisation in HTTP Headers”), ABNF link-value = "<" URI-Reference ">" *(...); * allows zero params. + */ + def "Minimal working serialized link, no parameters"() { + given: + var validSerialisation = "" + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + var result = weblinkParser.parse(lexer.lex(validSerialisation)) + + then: + noExceptionThrown() + result != null + } + + /** + * Why valid: link-param is token BWS [ "=" BWS token ]; both rel and self are tokens. + * Spec: RFC 8288 Section 3; RFC 7230 section 3.2.6 defines token. + */ + def "Single parameter, token value"() { + given: + var validSerialisation = "; rel=self" + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + var result = weblinkParser.parse(lexer.lex(validSerialisation)) + + then: + noExceptionThrown() + result != null + } + + /** + * Why valid: link-param value may be token / quoted-string; both forms equivalent. + * Spec: RFC 8288 section 3 (note on token vs quoted-string equivalence); RFC 7230 section 3.2.6 for quoted-string. + */ + def "Single parameter, quoted-string value"() { + given: + var validSerialisation = '; rel="self"' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + var result = weblinkParser.parse(lexer.lex(validSerialisation)) + + then: + noExceptionThrown() + result != null + } + + /** + * Why valid: ABNF allows zero or more ";" link-param after URI. + * Spec: RFC 8288 section 3, *( OWS ";" OWS link-param ). + */ + def "Multiple parameters"() { + given: + var validSerialisation = '; rel="self"; type="application/json"' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + var result = weblinkParser.parse(lexer.lex(validSerialisation)) + + then: + noExceptionThrown() + result != null + } + + /** + * Why valid: OWS and BWS allow optional whitespace around separators and =. + * Spec: RFC 8288 section 3 (uses OWS/BWS); RFC 7230 section 3.2.3 (OWS), section 3.2.4 (BWS concept). + */ + def "Whitespace around semi-colon and ="() { + given: + var validSerialisation = ' ; rel = "self" ; type = application/json' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + var result = weblinkParser.parse(lexer.lex(validSerialisation)) + + then: + noExceptionThrown() + result != null + } + + /** + * Why valid: link-param = token BWS [ "=" BWS ( token / quoted-string ) ]; the [ ... ] part is optional, so no = is allowed. + * Spec: RFC 8288 section 3, link-param ABNF (optional value). + */ + def "Parameter without value"() { + given: + var validSerialisation = "; rel" + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + var result = weblinkParser.parse(lexer.lex(validSerialisation)) + + then: + noExceptionThrown() + result != null + } + + /** + * Why valid: Empty string is a valid quoted-string. + * Spec: RFC 7230 section 3.2.6 (quoted-string can contain zero or more qdtext). + */ + def "Parameter with empty quoted string"() { + given: + var validSerialisation = '; title=""' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + var result = weblinkParser.parse(lexer.lex(validSerialisation)) + + then: + noExceptionThrown() + result != null + } + + /** + * Why valid: rel value is defined as a space-separated list of link relation types. + * Spec: RFC 8288 section 3.3 (“Relation Types”), which describes rel as a list of relation types. + */ + def "Multiple rel values in one parameter"() { + given: + var validSerialisation = '; rel="self describedby item"' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + var result = weblinkParser.parse(lexer.lex(validSerialisation)) + + then: + noExceptionThrown() + result != null + } + + /** + * Why valid: URI-Reference may be relative, resolved against base URI. + * Spec: RFC 8288 section 3 (uses URI-Reference); RFC 3986 section 4.1 (“URI Reference”). + */ + def "Relative URI"() { + given: + var validSerialisation = '; rel="item"' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + var result = weblinkParser.parse(lexer.lex(validSerialisation)) + + then: + noExceptionThrown() + result != null + } + + /** + * Why valid: At the header level, field-content is opaque to RFC 8288; title is a defined target attribute and its value is a quoted-string. + * Spec: RFC 8288 section 3 (defines title as a target attribute); RFC 7230 section 3.2 (header fields treat value as opaque except for defined syntax). + */ + def "Non-ASCII in quoted-string title"() { + given: + var validSerialisation = '; title="Données de recherche"' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + var result = weblinkParser.parse(lexer.lex(validSerialisation)) + + then: + noExceptionThrown() + result != null + } + + /** + * Why valid: link-value uses standard link-param names; rel="linkset" and type="application/linkset+json" are ordinary parameters. + * Spec: RFC 8288 section 3 (general link-param usage); linkset relation and media type from the Linkset draft (compatible with RFC 8288). + */ + def "Linkset type example"() { + given: + var validSerialisation = '; rel="linkset"; type="application/linkset+json"' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + var result = weblinkParser.parse(lexer.lex(validSerialisation)) + + then: + noExceptionThrown() + result != null + } + + /** + * Why valid: Link = #link-value; #rule allows 1+ link-values separated by commas in a single header field. + * Spec: RFC 8288 section 3 (Link = #link-value); RFC 7230 section 7 (“ABNF list extension: #rule”). + */ + def "Multiple link-values in one header"() { + given: + var validSerialisation = '; rel="self", ; rel="next"' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + var result = weblinkParser.parse(lexer.lex(validSerialisation)) + + then: + noExceptionThrown() + result != null + } + + def "Multiple links without parameters"() { + given: + var validSerialisation = ', ' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + var result = weblinkParser.parse(lexer.lex(validSerialisation)) + + then: + noExceptionThrown() + result != null + } + + /** + * Why valid: type parameter carries a media-type; application/ld+json fits token syntax and media-type grammar. + * Spec: RFC 8288 section 3 (defines type parameter); RFC 7231 section 3.1.1.1 (media-type grammar uses tokens). + */ + def "Parameter value as token with slash"() { + given: + var validSerialisation = '; type=application/ld+json' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + var result = weblinkParser.parse(lexer.lex(validSerialisation)) + + then: + noExceptionThrown() + result != null + } + + /** + * Why valid: anchor is a registered link-parameter giving the context URI; its value is a quoted-string. + * Spec: RFC 8288 section 3.2 (“Target Attributes”) defines anchor; RFC 7230 section 3.2.6 for quoted-string. + */ + def "Anchor parameter"() { + given: + var validSerialisation = '; rel="self"; anchor="https://example.org/records/123"' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + var result = weblinkParser.parse(lexer.lex(validSerialisation)) + + then: + noExceptionThrown() + result != null + } + + /** + * Why valid: link-param value may be token or quoted-string; mixing quoted and unquoted values is allowed. + * Spec: RFC 8288 section 3 (token / quoted-string equivalence for link-param values); RFC 7230 section 3.2.6. + */ + def "Mixed quoting styles in parameters"() { + given: + var validSerialisation = '; rel=self; type="application/json"' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + var result = weblinkParser.parse(lexer.lex(validSerialisation)) + + then: + noExceptionThrown() + result != null + } + + /** + * Why invalid: A trailing comma indicates an withoutValue link value, which is invalid. + * Spec: RFC 8288 Section 3, link-value = "<" URI-Reference ">" *( OWS ";" OWS link-param )” + */ + def "No trailing comma allowed for multiple link values"() { + given: + var validSerialisation = ',' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + weblinkParser.parse(lexer.lex(validSerialisation)) + + then: + thrown(WebLinkParser.StructureException.class) + } + + def "No trailing semicolon allowed for multiple link values"() { + + given: + var validSerialisation = ';' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + weblinkParser.parse(lexer.lex(validSerialisation)) + + then: + thrown(WebLinkParser.StructureException.class) + } + + + /** + * Why invalid: link-value must start with "<" URI-Reference ">"; a bare URI with params does not match link-value syntax. + * Spec: RFC 8288 Section 3, link-value = "<" URI-Reference ">" *( ... ). + */ + def "Invalid: Missing angle brackets around URI"() { + given: + var invalidSerialisation = 'https://example.org/resource; rel="self"' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + weblinkParser.parse(lexer.lex(invalidSerialisation)) + + then: + thrown(WebLinkParser.StructureException.class) + } + + /** + * Why invalid: link-value requires a "" prefix; parameters alone do not form a valid link-value. + * Spec: RFC 8288 Section 3, link-value ABNF. + */ + def "Invalid: Parameters without URI"() { + given: + var invalidSerialisation = 'rel="self"' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + weblinkParser.parse(lexer.lex(invalidSerialisation)) + + then: + thrown(WebLinkParser.StructureException.class) + + } + + /** + * Why invalid: link-param must start with token; an withoutValue name before equal sign violates token = 1*tchar. + * Spec: RFC 8288 section 3, link-param = token ...; RFC 7230 section 3.2.6 (token = 1*tchar). + */ + def "Invalid: Empty parameter name"() { + given: + var invalidSerialisation = '; =self' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + weblinkParser.parse(lexer.lex(invalidSerialisation)) + + then: + thrown(WebLinkParser.StructureException.class) + } + + /** + * Why invalid: Each ";" must be followed by a link-param; ";;" introduces an withoutValue parameter without a token. + * Spec: RFC 8288 section 3, *( OWS ";" OWS link-param ) requires a link-param after each ";". + */ + def "Invalid: Double semicolon introduces empty parameter"() { + given: + var invalidSerialisation = ';; rel="self"' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + weblinkParser.parse(lexer.lex(invalidSerialisation)) + + then: + thrown(WebLinkParser.StructureException.class) + } + + /** + * Why invalid: Comma is not allowed in token; parameter name containing "," violates token = 1*tchar. + * Spec: RFC 7230 section 3.2.6 (tchar set does not include ","). + */ + def "Invalid: Parameter name with illegal character"() { + given: + var invalidSerialisation = '; re,l="self"' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + weblinkParser.parse(lexer.lex(invalidSerialisation)) + + then: + thrown(WebLinkParser.StructureException.class) + } + + + /** + * Why invalid: link-param requires a token before "="; "=" without a parameter name violates link-param syntax. + * Spec: RFC 8288 section 3, link-param = token BWS [ "=" ... ]; RFC 7230 section 3.2.6 (token required). + */ + def "Invalid: Parameter with only equals sign and no name"() { + given: + var invalidSerialisation = '; = "self"' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + weblinkParser.parse(lexer.lex(invalidSerialisation)) + + then: + thrown(WebLinkParser.StructureException.class) + } + + /** + * Why invalid: link-value must start with ""; placing parameters before the URI does not match the ABNF. + * Spec: RFC 8288 section 3, link-value = "<" URI-Reference ">" *( ... ). + */ + def "Invalid: Parameters before URI"() { + given: + var invalidSerialisation = 'rel="self"; ' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + weblinkParser.parse(lexer.lex(invalidSerialisation)) + + then: + thrown(WebLinkParser.StructureException.class) + } + + /** + * Why invalid: URI must be enclosed in "<" and ">"; bare URI with parameters is not a valid link-value. + * Spec: RFC 8288 section 3, "<" URI-Reference ">" is mandatory in link-value. + */ + def "Invalid: URI not enclosed in angle brackets"() { + given: + var invalidSerialisation = 'https://example.org/resource; rel="self"' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + weblinkParser.parse(lexer.lex(invalidSerialisation)) + + then: + thrown(WebLinkParser.StructureException.class) + } + + + /** + * Why invalid: After ">" only OWS ";" OWS link-param is allowed; arbitrary token "foo" between ">" and ";" violates link-value syntax. + * Spec: RFC 8288 section 3, link-value = "<" URI-Reference ">" *( OWS ";" OWS link-param ). + */ + def "Invalid: Garbage between URI and first parameter"() { + given: + var invalidSerialisation = ' foo ; rel="self"' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + weblinkParser.parse(lexer.lex(invalidSerialisation)) + + then: + thrown(WebLinkParser.StructureException.class) + } + + /** + * Why invalid: #link-value requires 1+ elements separated by commas; a leading comma introduces an withoutValue element. + * Spec: RFC 8288 section 3 (Link = #link-value); RFC 7230 section 7 (#rule does not allow withoutValue list elements). + */ + def "Invalid: Leading comma in Link header list"() { + given: + var invalidSerialisation = ', ; rel="self"' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + weblinkParser.parse(lexer.lex(invalidSerialisation)) + + then: + thrown(WebLinkParser.StructureException.class) + } + + /** + * Why invalid: #link-value requires 1+ elements separated by commas; a trailing comma implies an withoutValue last element. + * Spec: RFC 8288 section 3 (Link = #link-value); RFC 7230 section 7 (#rule does not allow withoutValue list elements). + */ + def "Invalid: Trailing comma in Link header list"() { + given: + var invalidSerialisation = '; rel="self",' + + and: + var weblinkParser = SimpleWebLinkParser.create() + + and: + var lexer = new SimpleWebLinkLexer() + + when: + weblinkParser.parse(lexer.lex(invalidSerialisation)) + + then: + thrown(WebLinkParser.StructureException.class) + } + +} diff --git a/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/validation/Rfc8288ValidatorSpec.groovy b/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/validation/Rfc8288ValidatorSpec.groovy new file mode 100644 index 000000000..24b9cc83b --- /dev/null +++ b/fair-signposting/src/test/groovy/life/qbic/datamanager/signposting/http/validation/Rfc8288ValidatorSpec.groovy @@ -0,0 +1,274 @@ +package life.qbic.datamanager.signposting.http.validation + +import life.qbic.datamanager.signposting.http.WebLinkValidator +import life.qbic.datamanager.signposting.http.WebLink +import life.qbic.datamanager.signposting.http.parsing.RawLink +import life.qbic.datamanager.signposting.http.parsing.RawLinkHeader +import life.qbic.datamanager.signposting.http.parsing.RawParam +import spock.lang.Specification + +/** + * Specification for {@link Rfc8288WebLinkValidator}. + * + * Covers basic RFC 8288 semantics: + *
    + *
  • Valid URIs create {@link WebLink} instances without issues.
  • + *
  • Invalid URIs create error {@link WebLinkValidator.Issue}s and no WebLink for that entry.
  • + *
  • Multiple links are all validated; one invalid URI does not stop validation.
  • + *
  • Unknown / extension parameters are preserved and do not cause issues.
  • + *
+ * + * @since + */ +class Rfc8288ValidatorSpec extends Specification { + + /** + * Valid single link with a syntactically correct absolute URI + * should yield one WebLink and no issues. + */ + def "single valid link produces one WebLink and no issues"() { + given: + def rawHeader = new RawLinkHeader([ + new RawLink("https://example.org/resource", []) + ]) + + and: + def validator = new Rfc8288WebLinkValidator() + + when: + WebLinkValidator.ValidationResult result = validator.validate(rawHeader) + + then: "no issues are reported" + !result.containsIssues() + !result.report().hasErrors() + !result.report().hasWarnings() + + and: "exactly one WebLink is produced with the expected URI and withoutValue params" + result.weblinks().size() == 1 + WebLink link = result.weblinks().first() + link.reference().toString() == "https://example.org/resource" + link.params().isEmpty() + } + + /** + * A link with an invalid URI string should not yield a WebLink instance, + * but should record at least one error Issue. + */ + def "single invalid URI produces error issue and no WebLinks"() { + given: + // 'not a uri' will fail URI.create(...) + def rawHeader = new RawLinkHeader([ + new RawLink("not a uri", []) + ]) + + and: + def validator = new Rfc8288WebLinkValidator() + + when: + WebLinkValidator.ValidationResult result = validator.validate(rawHeader) + + then: "an error is reported" + result.containsIssues() + result.report().hasErrors() + + and: "no WebLinks are produced for invalid URIs" + result.weblinks().isEmpty() + } + + /** + * When there are multiple links and one has an invalid URI, + * the validator should still validate all links and produce + * WebLinks for the valid ones. + */ + def "multiple links - one invalid URI does not prevent valid WebLinks"() { + given: + def rawHeader = new RawLinkHeader([ + new RawLink("not a uri", []), + new RawLink("https://example.org/valid", []) + ]) + + and: + def validator = new Rfc8288WebLinkValidator() + + when: + WebLinkValidator.ValidationResult result = validator.validate(rawHeader) + + then: "at least one error is reported for the invalid entry" + result.containsIssues() + result.report().hasErrors() + + and: "the valid URI still yields a WebLink" + result.weblinks().size() == 1 + result.weblinks().first().reference().toString() == "https://example.org/valid" + } + + /** + * Unknown / extension parameters should be preserved on the WebLink + * and must not trigger errors at RFC 8288 level. + * + * Example: Link: ; foo="bar" + */ + def "unknown extension parameters are preserved and do not cause issues"() { + given: + def params = [new RawParam("x-custom", "value")] // arbitrary extension parameter + def rawHeader = new RawLinkHeader([ + new RawLink("https://example.org/with-param", params) + ]) + + and: + def validator = new Rfc8288WebLinkValidator() + + when: + WebLinkValidator.ValidationResult result = validator.validate(rawHeader) + + then: "no errors are reported for unknown parameters" + !result.report().hasErrors() + + and: "at RFC level, we do not warn about extension parameters either (optional; adjust if you decide to warn)" + !result.report().hasWarnings() + + and: "the parameter is preserved on the resulting WebLink" + result.weblinks().size() == 1 + def link = result.weblinks().first() + link.extensionAttribute("x-custom")[0] == "value" + } + + /** + * A parameter without a value (e.g. 'rel' without '=...') is structurally + * allowed in RFC 8288. At the RFC semantic level we accept it and leave any + * deeper interpretation to profile-specific validators (e.g. Signposting). + * + * How you map "no value" into your RawLink/WebLink model is up to your + * implementation; here we assume null or withoutValue string is used to represent it. + */ + def "parameter without value is accepted at RFC level"() { + given: + // Example representation: parameter present with null value. + // Adapt this to your actual RawLink model. + def params = [new RawParam("rel", null)] + def rawHeader = new RawLinkHeader([ + new RawLink("https://example.org/no-value-param", params) + ]) + + and: + def validator = new Rfc8288WebLinkValidator() + + when: + WebLinkValidator.ValidationResult result = validator.validate(rawHeader) + + then: "URI is valid, so we get a WebLink back" + result.weblinks().size() == 1 + + and: "parameter without value does not cause an error at RFC-level" + !result.report().hasErrors() + } + + def "parameter anchor with one occurrence is allowed"() { + given: + // Example representation: parameter present with null value. + // Adapt this to your actual RawLink model. + def params = [new RawParam("anchor", "https://example.org/one-anchor-only")] + def rawHeader = new RawLinkHeader([ + new RawLink("https://example.org/one-anchor-only", params) + ]) + + and: + def validator = new Rfc8288WebLinkValidator() + + when: + WebLinkValidator.ValidationResult result = validator.validate(rawHeader) + + then: "URI is valid, so we get a WebLink back" + result.weblinks().size() == 1 + + and: "parameter anchor with only one occurrence does not cause an error at RFC-level" + !result.report().hasErrors() + } + + def "a parameter with allowed multiplicity of 1 must be only processed on the first occurrence"() { + given: + // Example representation: parameter present with null value. + // Adapt this to your actual RawLink model. + def firstParam = new RawParam("rel", "https://example.org/first-occurrence") + def secondParam = new RawParam("rel", "https://example.org/next-occurrence") + def params = [firstParam, secondParam] + def rawHeader = new RawLinkHeader([ + new RawLink("https://example.org/one-anchor-only", params) + ]) + + and: + def validator = new Rfc8288WebLinkValidator() + + when: + WebLinkValidator.ValidationResult result = validator.validate(rawHeader) + + then: "URI is valid, so we get a WebLink back" + result.weblinks().size() == 1 + + and: "parameter rel with only one occurrence does not cause an error at RFC-level" + !result.report().hasErrors() + + and: "but results in a warning, since the second occurrence is skipped" + result.report().hasWarnings() + + and: "uses only the value of the first occurrence" + var relations = result.weblinks().get(0).rel() + relations.size() == 1 + relations.get(0).equals(firstParam.value()) + } + + def "the rel parameter can contain multiple relations as whitespace-separated list"() { + given: + // Example representation: parameter present with null value. + // Adapt this to your actual RawLink model. + def firstParam = new RawParam("rel", "self describedby another") + def params = [firstParam] + def rawHeader = new RawLinkHeader([ + new RawLink("https://example.org/one-anchor-only", params) + ]) + + and: + def validator = new Rfc8288WebLinkValidator() + + when: + WebLinkValidator.ValidationResult result = validator.validate(rawHeader) + + then: "URI is valid, so we get a WebLink back" + result.weblinks().size() == 1 + + and: "parameter rel with only one occurrence does not cause an error at RFC-level" + !result.report().hasErrors() + + and: "results in no warnings" + !result.report().hasWarnings() + + and: "splits the relations into three values" + var relations = result.weblinks().get(0).rel() + relations.size() == 3 + } + + + def "parameter anchor must not have multiple occurrences"() { + given: + // Example representation: parameter present with null value. + // Adapt this to your actual RawLink model. + def params = [new RawParam("anchor", "https://example.org/one-anchor-only"), + new RawParam("anchor", "https://example.org/another-anchor")] + def rawHeader = new RawLinkHeader([ + new RawLink("https://example.org/one-anchor-only", params) + ]) + + and: + def validator = new Rfc8288WebLinkValidator() + + when: + WebLinkValidator.ValidationResult result = validator.validate(rawHeader) + + then: "URI is valid, so we get a WebLink back" + result.weblinks().size() == 1 + + and: "parameter anchor with only one occurrence does not cause an error at RFC-level" + result.report().hasWarnings() + result.report().issues().size() == 1 + } +} diff --git a/pom.xml b/pom.xml index 6d6d025ff..48d58588b 100644 --- a/pom.xml +++ b/pom.xml @@ -33,6 +33,7 @@ email-service-provider finances-infrastructure finances-api + fair-signposting pom