Skip to content

Quality Assurance

Consorci de Serveis Universitaris de Catalunya edited this page Oct 27, 2021 · 6 revisions

This module allows users to review input data quality thorw three step process.

Quality

  1. Schema
  2. Sematinc
    • Structural validation
    • Uses Schematron
    • Only valid inputs can continue to step 3.
  3. Content
    • Review metadata fields values according to a configurable specification. Result can be:
      • OK: valid record
      • Error: invalid record
      • Warning: partial valid record
      • Info: valid items

Maven Dependencies

<dependency>
   <groupId>org.csuc</groupId>
   <artifactId>quality</artifactId>
   <version>${VERSION}</version>
</dependency>

Module settings

Need to define a RDFXML file containing EDM records from Transformation module.

Configuration file

Quality configuration file File to define kind of validation for each metadata field.
# QUALITY CONFIG

# The provided cultural heritage object
#"edm:ProvidedCHO"" {
#    "dc:contributor"" {
#    }
#    "dcterms:isReplacedBy"" {
#    }
#    "dc:coverage"" {
#    }
#    "dcterms:isRequiredBy" {
#    }
#    "dc:creator" {
#    }
#    "dcterms:issued" {
#    }
#    "dc:date" {
#    }
#    "dcterms:isVersionOf" {
#    }
#    "edm:description" {
#    }
#    "dcterms:medium" {
#    }
#    "dc:format" {
#    }
#    "dcterms:provenance" {
#    }
#    "dc:identifier" {
#    }
#    "dcterms:references" {
#    }
#    "dc:language" {
#    }
#    "dc:publisher" {
#    }
#    "dcterms:requires" {
#    }
#    "dc:relation" {
#    }
#    "dcterms:spatial" {
#    }
#    "dc:rights" {
#    }
#    "dcterms:tableOfContents" {
#    }
#    "dc:source" {
#    }
#    "dcterms:temporal" {
#    }
#    "dc:subject" {
#    }
#    "edm:currentLocation" {
#    }
#    "edm:title" {
#    }
#    "edm:hasMet" {
#    }
#    "dc:type" {
#    }
#    "edm:hasType" {
#    }
#    "dcterms:alternative" {
#    }
#    "edm:incorporates" {
#    }
#    "dcterms:conformsTo" {
#    }
#    "edm:isDerivativeOf" {
#    }
#    "dcterms:created" {
#    }
#    "edm:isNextInSequence" {
#    }
#    "dcterms:extent" {
#    }
#    "edm:isRelatedTo" {
#    }
#    "dcterms:hasFormat" {
#    }
#    "edm:isRepresentationOf" {
#    }
#    "dcterms:hasPart" {
#    }
#    "edm:isSimilarTo" {
#    }
#    "dcterms:hasVersion" {
#    }
#    "edm:isSuccessorOf" {
#    }
#    "dcterms:isFormatOf" {
#    }
#    "edm:realizes" {
#    }
#    "dcterms:isPartOf" {
#    }
#    "edm:type" {
#    }
#    "dcterms:isReferencedBy" {
#    }
#    "owl:sameAs" {
#    }
#}

# The web resource that is the digital representation
#"edm:WebResource"" {
#    "dc:creator" {
#    }
#    "dcterms:hasPart" {
#    }
#    "dc:description" {
#    }
#    "dcterms:isFormatOf" {
#    }
#    "dc:format" {
#    }
#    "dcterms:isPartOf" {
#    }
#    "dc:rights" {
#    }
#    "dcterms:isReferencedBy" {
#    }
#    "dc:source" {
#    }
#    "dcterms:issued" {
#    }
#    "dcterms:conformsTo" {
#    }
#    "edm:isNextInSequence" {
#    }
#    "dcterms:created" {
#    }
#    "edm:rights" {
#    }
#    "dcterms:extent" {
#    }
#    "owl:sameAs" {
#    }
#}

# The aggregation that groups the classes together
#"ore:Aggregation"" {
#    "edm:aggregatedCHO" {
#    }
#    "edm:provider" {
#    }
#    "edm:dataProvider" {
#    }
#    "dc:rights" {
#    }
#    "edm:hasView" {
#    }
#    "edm:rights" {
#    }
#    "edm:isShownAt" {
#    }
#    "edm:ugc" {
#    }
#    "edm:isShownBy" {
#    }
#    "edm:intermediateProvider" {
#    }
#    "edm:object" {
#    }
#}

# Who
#"edm:Agent" {
#    "skos:prefLabel{
#    }
#    "foaf:name" {
#    }
#    "skos:altLabel" {
#    }
#    "rdaGr2:biographicalInformation" {
#    }
#    "skos:note" {
#    }
#    "rdaGr2:dateOfBirth" {
#    }
#    "dc:date" {
#    }
#    "rdaGr2:dateOfDeath" {
#    }
#    "dc:identifier" {
#    }
#    "rdaGr2:dateOfEstablishment" {
#    }
#    "dcterms:hasPart" {
#    }
#    "rdaGr2:dateOfTermination" {
#    }
#    "dcterms:isPartOf" {
#    }
#    "rdaGr2:gender" {
#    }
#    "edm:begin" {
#    }
#    "rdaGr2:placeOfBirth" {
#    }
#    "edm:end" {
#    }
#    "rdaGr2:placeOfDeath" {
#    }
#    "edm:hasMet" {
#    }
#    "rdaGr2:professionOrOccupation" {
#    }
#    "edm:isRelatedTo" {
#    }
#    "owl:sameAs" {
#    }
#}

# Where
#"edm:Place" {
#   "wgs84_pos:lat" {
#   }
#   "skos:note" {
#   }
#   "wgs84_pos:long" {
#   }
#   "dcterms:hasPart" {
#   }
#   "wgs84_pos:alt" {
#   }
#   "dcterms:isPartOf" {
#   }
#   "skos:prefLabel" {
#   }
#   "edm:isNextInSequence" {
#   }
#   "skos:altLabel" {
#   }
#   "owl:sameAs" {
#    }
#}

# When
#"edm:TimeSpan" {
#    "skos:prefLabel" {
#    }
#    "edm:begin" {
#    }
#    "skos:altLabel" {
#    }
#    "edm:end" {
#    }
#    "skos:note" {
#    }
#    "edm:isNextInSequence" {
#    }
#    "dcterms:hasPart" {
#    }
#    "owl:sameAs" {
#    }
#    "dcterms:isPartOf" {
#    }
#}

# What
#"skos:Concept" {
#    "skos:prefLabel" {
#    }
#    "skos:relatedMatch" {
#    }
#    "skos:altLabel" {
#    }
#    "skos:exactMatch" {
#    }
#    "skos:broader" {
#    }
#    "skos:closeMatch" {
#    }
#    "skos:narrower" {
#    }
#    "skos:note" {
#    }
#    "skos:related" {
#    }
#    "skos:notation" {
#    }
#    "skos:broadMatch" {
#    }
#    "skos:inScheme" {
#    }
#    "skos:narrowMatch" {
#    }
#}

# Access and usage
# "cc:License" {
#    "odrl:inheritFrom" {
#    }
#    "cc:deprecatedOn" {
#    }
#}

Examples

Example 1

Input

EDM transformed job to RDFXML file.

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:edm="http://www.europeana.eu/schemas/edm/" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:wgs84_pos="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:rdaGr2="http://rdvocab.info/ElementsGr2/" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:ebucore="http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#" xmlns:doap="http://usefulinc.com/ns/doap#" xmlns:odrl="http://www.w3.org/ns/odrl/2/" xmlns:cc="http://creativecommons.org/ns#" xmlns:ore="http://www.openarchives.org/ore/terms/" xmlns:svcs="http://rdfs.org/sioc/services#">
<edm:ProvidedCHO rdf:about="ProvidedCHO:b7709721-551b-40ad-b04b-1ec44f9af654">
    <dc:identifier>b7709721-551b-40ad-b04b-1ec44f9af654</dc:identifier>
    <dc:coverage rdf:resource="Place:HemelumerOldeferd"></dc:coverage>
    <dc:description>Deel: 3336, Periode: 1728</dc:description>
    <dc:title>Stem- en floreenkohieren 1728</dc:title>
    <dc:type>other: Stemkohieren</dc:type>
    <dc:date>1728-01-01</dc:date>
    <dc:date>1728-12-31</dc:date>
    <dcterms:temporal rdf:resource="TimeSpan:1728"></dcterms:temporal>
    <dc:contributor rdf:resource="Agent:Person:9098c633-0477-41ba-b60b-a7d4ae71eacc"></dc:contributor>
    <edm:isRelatedTo rdf:resource="Concept:other:Stemkohieren"></edm:isRelatedTo>
    <edm:type>TEXT</edm:type>
</edm:ProvidedCHO>
<edm:Agent rdf:about="Agent:Person:9098c633-0477-41ba-b60b-a7d4ae71eacc">
    <skos:prefLabel>Sipke</skos:prefLabel>
    <dc:identifier>Person:9098c633-0477-41ba-b60b-a7d4ae71eacc</dc:identifier>
    <edm:isRelatedTo rdf:resource="Concept:other:Eigenaarengebruiker"></edm:isRelatedTo>
    <rdaGr2:gender>Man</rdaGr2:gender>
</edm:Agent>
<edm:Place rdf:about="Place:HemelumerOldeferd">
    <skos:prefLabel>Hemelumer Oldeferd</skos:prefLabel>
    <skos:altLabel>Place</skos:altLabel>
</edm:Place>
<skos:Concept rdf:about="Concept:other:Eigenaarengebruiker">
    <skos:prefLabel>other:Eigenaar en gebruiker</skos:prefLabel>
    <skos:related rdf:resource="Agent:Person:9098c633-0477-41ba-b60b-a7d4ae71eacc"/></skos:Concept>
    <skos:Concept rdf:about="Concept:other:Stemkohieren">
    <skos:prefLabel>other: Stemkohieren</skos:prefLabel>
</skos:Concept>
<edm:TimeSpan rdf:about="TimeSpan:1728">
    <skos:prefLabel>1728</skos:prefLabel>
</edm:TimeSpan>
<ore:Aggregation rdf:about="http://allefriezen.nl/zoeken/deeds/000155ff-e928-4d77-af5c-f337e8f2ef7e">
    <edm:aggregatedCHO rdf:resource="ProvidedCHO:b7709721-551b-40ad-b04b-1ec44f9af654"/>
    <edm:hasView rdf:resource="Concept:other:Stemkohieren"/>
    <edm:hasView rdf:resource="ProvidedCHO:b7709721-551b-40ad-b04b-1ec44f9af654"/>
    <edm:hasView rdf:resource="Agent:Person:9098c633-0477-41ba-b60b-a7d4ae71eacc"/>
    <edm:hasView rdf:resource="TimeSpan:1728"/><edm:hasView rdf:resource="Place:HemelumerOldeferd"/>
    <edm:hasView rdf:resource="Concept:other:Eigenaarengebruiker"/>
    <edm:isShownAt rdf:resource="http://allefriezen.nl/zoeken/deeds/000155ff-e928-4d77-af5c-f337e8f2ef7e"/>
    <edm:provider>Tresoar</edm:provider>
</ore:Aggregation>
</rdf:RDF>
Results

If any error is found an excel file will be generated. Click here to see an example.

Else the job will be available to publish on data lake.

Usage

Command line

Usage: 
 -f (--format) [datastore | xml | json] : format
 -i (--input) VAL                       : data input
 -o (--out) PATH                        : out
 -q (--quality-config) PATH             : quality file config
 -t (--type) [url | file]               : type

Example:

java -jar Quality-0.0.1.jar --type file -i ff5ba553-7e9f-41e8-adf8-c727ec5c6f70/0a12aa3f-7523-4d72-b287-8560396761f9.rdf -f json -q quality.conf

GUI

Quality Screen1: Send new job to Quality Assurance

Quality Screen2: Jobs list

Quality Screen3: Finished job details

Quality Screen4: Results page

Clone this wiki locally