diff --git a/README.md b/README.md index 0659bc4..13c73a7 100644 --- a/README.md +++ b/README.md @@ -279,6 +279,7 @@ Below are all validation rules ordered by their execution priority (every test i |Test|Info,Warning|AST20|Disposal decisions|Provides information about the number of disposal decisions related to series, classes, files, records, and document descriptions, and tests whether the corresponding value in arkivuttrekk.xml (inneholderDokumenterSomSkalKasseres) is correct.| |Test|Info,Warning|AST21|Disposals|Provides information about the number of disposals of series and document descriptions, and tests whether the corresponding value in arkivuttrekk.xml (omfatterDokumenterSomErKassert) is correct.| |Test|Info,Warning|AST22|Personal name fields|Checks whether all name fields contain seemingly valid personal names. The regular expressions used for the purpose are "\^[\p{L}\s-'.]*$" and "\^[\p{L}\s-'.]+$" depending on whether the value can be blank or not. The validated fields are: saksansvarlig (M306), kontaktperson (M412), korrespondansepartNavn (M400), sakspartNavn (M302), moeteDeltakerNavn (M372), arkivertAv (M605), avskrevetAv (M618), tilknyttetAv (M621), merknadRegistrertAv (M612), kassertAv (M631), slettetAv (M614), gradertAv (M625), presedensGodkjentAv (M629), verifisertAv (M623), nedgradertAv (M627), opprettetAv (M601), avsluttetAv (M603).| +|Test|Info,Error|AST23|Document object file sizes|Tests whether the recorded file size in arkivstruktur.xml is exactly the same as the one of the referenced physical file on the system.| **loependejournal:** diff --git a/noark-extraction-validator/src/main/java/com/documaster/validator/storage/model/Field.java b/noark-extraction-validator/src/main/java/com/documaster/validator/storage/model/Field.java index 10f8b9c..fde2b8f 100644 --- a/noark-extraction-validator/src/main/java/com/documaster/validator/storage/model/Field.java +++ b/noark-extraction-validator/src/main/java/com/documaster/validator/storage/model/Field.java @@ -32,6 +32,7 @@ public class Field { public static final String INTERNAL_ID = "_id"; public static final String DETECTED_CHECKSUM = "_detected_checksum"; + public static final String DETECTED_FILE_SIZE = "_detected_file_size"; public static final String DETECTED_FILE_TYPE = "_detected_type"; public static final String IS_VALID_FILE_TYPE = "_is_valid_type"; diff --git a/noark-extraction-validator/src/main/java/com/documaster/validator/validation/noark5/parsers/ArchiveStructureHandler.java b/noark-extraction-validator/src/main/java/com/documaster/validator/validation/noark5/parsers/ArchiveStructureHandler.java index f87a67b..0e7f199 100644 --- a/noark-extraction-validator/src/main/java/com/documaster/validator/validation/noark5/parsers/ArchiveStructureHandler.java +++ b/noark-extraction-validator/src/main/java/com/documaster/validator/validation/noark5/parsers/ArchiveStructureHandler.java @@ -121,6 +121,7 @@ public void endElement(String uri, String localName, String qName) throws SAXExc String contentType = isValidPdfA ? PDFAValidator.VALID_FILE_TYPE : PDFAValidator.getFileType(document); + getItem().add(Field.DETECTED_FILE_SIZE, document.length()); getItem().add(Field.DETECTED_FILE_TYPE, contentType); getItem().add(Field.DETECTED_CHECKSUM, checksum); getItem().add(Field.IS_VALID_FILE_TYPE, isValidPdfA); diff --git a/noark-extraction-validator/src/main/resources/noark5/noark5.properties b/noark-extraction-validator/src/main/resources/noark5/noark5.properties index 73ffab5..8599fcd 100644 --- a/noark-extraction-validator/src/main/resources/noark5/noark5.properties +++ b/noark-extraction-validator/src/main/resources/noark5/noark5.properties @@ -14,4 +14,4 @@ uniqueFields.loependejournal.journalpost = systemid uniqueFields.offentligjournal.journalpost = systemid # Additional fields that should be introduced per itemDef -additionalFields.arkivstruktur.dokumentobjekt = _detected_checksum, _detected_type, _is_valid_type +additionalFields.arkivstruktur.dokumentobjekt = _detected_checksum, _detected_file_size, _detected_type, _is_valid_type diff --git a/noark-extraction-validator/src/main/resources/noark5/noark53/noark53-validation.xml b/noark-extraction-validator/src/main/resources/noark5/noark53/noark53-validation.xml index 5bef23f..3351e78 100644 --- a/noark-extraction-validator/src/main/resources/noark5/noark53/noark53-validation.xml +++ b/noark-extraction-validator/src/main/resources/noark5/noark53/noark53-validation.xml @@ -2868,6 +2868,31 @@ + + Document object file sizes + + Tests whether the document object file sizes specified in arkivstruktur.xml + match the ones with the physical files on the file system. + + arkivstruktur + + + _detected_file_size; + ]]> + + + _detected_file_size; + ]]> + + + +