From 70e97439a2384fc597e7a8b9df98f1968b45745e Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 28 Oct 2024 14:19:00 +0100 Subject: [PATCH 1/4] Add HTML table: filter and sort (metafacture-core#369) The flux-commands-table.html is a HTML view of flux-commands.md with filter and sort functionality of the signature of the commands. Proof of concept - WIP --- assets/css/sortTable.css | 101 +++++++++++++++++ assets/js/sortable-table.js | 168 +++++++++++++++++++++++++++++ docs/flux/flux-commands-table.html | 122 +++++++++++++++++++++ 3 files changed, 391 insertions(+) create mode 100644 assets/css/sortTable.css create mode 100644 assets/js/sortable-table.js create mode 100644 docs/flux/flux-commands-table.html diff --git a/assets/css/sortTable.css b/assets/css/sortTable.css new file mode 100644 index 0000000..47ca04f --- /dev/null +++ b/assets/css/sortTable.css @@ -0,0 +1,101 @@ +.sr-only { + position: absolute; + top: -30em; +} + +table.sortable td, +table.sortable th { + padding: 0.125em 0.25em; + width: 22em; +} + +table.sortable th { + font-weight: bold; + border-bottom: thin solid #888; + position: relative; +} + +table.sortable th.no-sort { + padding-top: 0.35em; +} + +table.sortable th:nth-child(5) { + width: 10em; +} + +table.sortable th button { + padding: 4px; + margin: 1px; + font-size: 100%; + font-weight: bold; + background: transparent; + border: none; + display: inline; + right: 0; + left: 0; + top: 0; + bottom: 0; + width: 100%; + text-align: left; + outline: none; + cursor: pointer; +} + +table.sortable th button span { + position: absolute; + right: 4px; +} + +table.sortable th[aria-sort="descending"] span::after { + content: "▼"; + color: currentcolor; + font-size: 100%; + top: 0; +} + +table.sortable th[aria-sort="ascending"] span::after { + content: "▲"; + color: currentcolor; + font-size: 100%; + top: 0; +} + +table.show-unsorted-icon th:not([aria-sort]) button span::after { + content: "♢"; + color: currentcolor; + font-size: 100%; + position: relative; + top: -3px; + left: -4px; +} + +table.sortable td.num { + text-align: right; +} + +table.sortable tbody tr:nth-child(odd) { + background-color: #ddd; +} + +/* Focus and hover styling */ + +table.sortable th button:focus, +table.sortable th button:hover { + padding: 2px; + border: 2px solid currentcolor; + background-color: #e5f4ff; +} + +table.sortable th button:focus span, +table.sortable th button:hover span { + right: 2px; +} + +table.sortable th:not([aria-sort]) button:focus span::after, +table.sortable th:not([aria-sort]) button:hover span::after { + content: "▼"; + color: currentcolor; + font-size: 100%; + top: 0; +} + diff --git a/assets/js/sortable-table.js b/assets/js/sortable-table.js new file mode 100644 index 0000000..65be61c --- /dev/null +++ b/assets/js/sortable-table.js @@ -0,0 +1,168 @@ +/* + * This content is licensed according to the W3C Software License at + * https://www.w3.org/Consortium/Legal/2015/copyright-software-and-document + * + * File: sortable-table.js + * + * Desc: Adds sorting to a HTML data table that implements ARIA Authoring Practices + */ + +'use strict'; + +class SortableTable { + constructor(tableNode) { + this.tableNode = tableNode; + + this.columnHeaders = tableNode.querySelectorAll('thead th'); + + this.sortColumns = []; + + for (var i = 0; i < this.columnHeaders.length; i++) { + var ch = this.columnHeaders[i]; + var buttonNode = ch.querySelector('button'); + if (buttonNode) { + this.sortColumns.push(i); + buttonNode.setAttribute('data-column-index', i); + buttonNode.addEventListener('click', this.handleClick.bind(this)); + } + } + + this.optionCheckbox = document.querySelector( + 'input[type="checkbox"][value="show-unsorted-icon"]' + ); + + if (this.optionCheckbox) { + this.optionCheckbox.addEventListener( + 'change', + this.handleOptionChange.bind(this) + ); + if (this.optionCheckbox.checked) { + this.tableNode.classList.add('show-unsorted-icon'); + } + } + } + + setColumnHeaderSort(columnIndex) { + if (typeof columnIndex === 'string') { + columnIndex = parseInt(columnIndex); + } + + for (var i = 0; i < this.columnHeaders.length; i++) { + var ch = this.columnHeaders[i]; + var buttonNode = ch.querySelector('button'); + if (i === columnIndex) { + var value = ch.getAttribute('aria-sort'); + if (value === 'descending') { + ch.setAttribute('aria-sort', 'ascending'); + this.sortColumn( + columnIndex, + 'ascending', + ch.classList.contains('num') + ); + } else { + ch.setAttribute('aria-sort', 'descending'); + this.sortColumn( + columnIndex, + 'descending', + ch.classList.contains('num') + ); + } + } else { + if (ch.hasAttribute('aria-sort') && buttonNode) { + ch.removeAttribute('aria-sort'); + } + } + } + } + + sortColumn(columnIndex, sortValue, isNumber) { + function compareValues(a, b) { + if (sortValue === 'ascending') { + if (a.value === b.value) { + return 0; + } else { + if (isNumber) { + return a.value - b.value; + } else { + return a.value < b.value ? -1 : 1; + } + } + } else { + if (a.value === b.value) { + return 0; + } else { + if (isNumber) { + return b.value - a.value; + } else { + return a.value > b.value ? -1 : 1; + } + } + } + } + + if (typeof isNumber !== 'boolean') { + isNumber = false; + } + + var tbodyNode = this.tableNode.querySelector('tbody'); + var rowNodes = []; + var dataCells = []; + + var rowNode = tbodyNode.firstElementChild; + + var index = 0; + while (rowNode) { + rowNodes.push(rowNode); + var rowCells = rowNode.querySelectorAll('th, td'); + var dataCell = rowCells[columnIndex]; + + var data = {}; + data.index = index; + data.value = dataCell.textContent.toLowerCase().trim(); + if (isNumber) { + data.value = parseFloat(data.value); + } + dataCells.push(data); + rowNode = rowNode.nextElementSibling; + index += 1; + } + + dataCells.sort(compareValues); + + // remove rows + while (tbodyNode.firstChild) { + tbodyNode.removeChild(tbodyNode.lastChild); + } + + // add sorted rows + for (var i = 0; i < dataCells.length; i += 1) { + tbodyNode.appendChild(rowNodes[dataCells[i].index]); + } + } + + /* EVENT HANDLERS */ + + handleClick(event) { + var tgt = event.currentTarget; + this.setColumnHeaderSort(tgt.getAttribute('data-column-index')); + } + + handleOptionChange(event) { + var tgt = event.currentTarget; + + if (tgt.checked) { + this.tableNode.classList.add('show-unsorted-icon'); + } else { + this.tableNode.classList.remove('show-unsorted-icon'); + } + } +} + +// Initialize sortable table buttons +window.addEventListener('load', function () { + var sortableTables = document.querySelectorAll('table.sortable'); + for (var i = 0; i < sortableTables.length; i++) { + new SortableTable(sortableTables[i]); + } +}); + diff --git a/docs/flux/flux-commands-table.html b/docs/flux/flux-commands-table.html new file mode 100644 index 0000000..b06ebdd --- /dev/null +++ b/docs/flux/flux-commands-table.html @@ -0,0 +1,122 @@ + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Flux commands +  (column headers with buttons are sortable). +
+ + descriptionoptions + + + + + + example in Playgroundjava class
add-oreaggregationAdds ore:Aggregation to an Europeana Data Model stream. The aggregation id is set by emitting literal('aggregation_id', id)-StreamReceiverStreamReceiver-org.metafacture.linkeddata.OreAggregationAdder
add-preamble-epilogueAdds a String preamle and/or epilogue to the streampreamble (String), epilogue (String)StringStringexample in Playgroundorg.metafacture.formatting.PreambleEpilogueAdder
badd-preamble-epiloguebAdds a String preamle and/or epilogue to the streampreamble (String), epilogue (String)StringStreamReceiverexample in Playgroundorg.metafacture.formatting.PreambleEpilogueAdder
badd-preamble-epiloguebAdds a String preamle and/or epilogue to the streampreamble (String), epilogue (String)StreamReceiverStringexample in Playgroundorg.metafacture.formatting.PreambleEpilogueAdder
+
+ + + From 650a16b94c065fe1d069f0c8dcc53fe4f1f17300 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Thu, 31 Oct 2024 16:00:26 +0100 Subject: [PATCH 2/4] Add complete flux-commands --- docs/flux/flux-commands-table.html | 1087 ++++++++++++++++++++++++++-- 1 file changed, 1040 insertions(+), 47 deletions(-) diff --git a/docs/flux/flux-commands-table.html b/docs/flux/flux-commands-table.html index b06ebdd..2bf1e6d 100644 --- a/docs/flux/flux-commands-table.html +++ b/docs/flux/flux-commands-table.html @@ -50,71 +50,1064 @@ - description - options + Description + Options - + - + - example in Playground - java class + Example in Playground + Java class - - - add-oreaggregation - Adds ore:Aggregation to an Europeana Data Model stream. The aggregation id is set by emitting literal('aggregation_id', id) - - - StreamReceiver - StreamReceiver - - - org.metafacture.linkeddata.OreAggregationAdder - - - add-preamble-epilogue - Adds a String preamle and/or epilogue to the stream - preamble (String), epilogue (String) - String - String - example in Playground - org.metafacture.formatting.PreambleEpilogueAdder - - - badd-preamble-epilogue - bAdds a String preamle and/or epilogue to the stream - preamble (String), epilogue (String) - String - StreamReceiver - example in Playground - org.metafacture.formatting.PreambleEpilogueAdder - - - badd-preamble-epilogue - bAdds a String preamle and/or epilogue to the stream - preamble (String), epilogue (String) - StreamReceiver - String - example in Playground - org.metafacture.formatting.PreambleEpilogueAdder - - + + + add-oreaggregation + adds ore:Aggregation to an Europeana Data Model stream. The aggregation id is set by emitting literal('aggregation_id', id) + + StreamReceiver +StreamReceiver + + org.metafacture.linkeddata.OreAggregationAdder + + + add-preamble-epilogue + Adds a String preamle and/or epilogue to the stream + preamble (String), epilogue (String) + String +String + example in Playground + org.metafacture.formatting.PreambleEpilogueAdder + + + as-formeta-records + Reads a stream of formeta data and splits between each top-level element + + Reader +String + example in Playground + org.metafacture.formeta.FormetaRecordsReader + + + as-lines + Processes input from a reader line by line. + + Reader +String + example in Playground + org.metafacture.io.LineReader + + + as-records + Reads data from a Reader and splits it into individual records + skipemptyrecords (boolean), separator (String) + Reader +String + example in Playground + org.metafacture.io.RecordReader + + + batch-log + Writes log info every BATCHSIZE records. + batchsize (int) + StreamReceiver +StreamReceiver + example in Playground + org.metafacture.monitoring.StreamBatchLogger + + + batch-reset + Resets flow for every BATCHSIZE records. + batchsize (int) + StreamReceiver +StreamReceiver + + org.metafacture.flowcontrol.StreamBatchResetter + + + calculate-metrics + Calculates values for various cooccurrence metrics. The expected inputs are triples containing as subject the var name and as object the count. Marginal counts must appear first, joint counts second. Marinal counts must be written as 1:A, Joint counts as 2:A&B + + Triple +Triple + + org.metafacture.statistics.CooccurrenceMetricCalculator + + + catch-object-exception + passes objects through and catches exceptions. + logprefix (String), logstacktrace (boolean) + Object +Object + + org.metafacture.flowcontrol.ObjectExceptionCatcher + + + catch-stream-exception + passes streams events through and catches exceptions. + + StreamReceiver +StreamReceiver + + org.metafacture.flowcontrol.StreamExceptionCatcher + + + change-id + By default changes the record ID to the value of the '_id' literal (if present). Use the contructor to choose another literal as ID source. + keepidliteral (boolean), idliteral (String), keeprecordswithoutidliteral (boolean) + StreamReceiver +StreamReceiver + example in Playground + org.metafacture.mangling.RecordIdChanger + + + collect-triples + Collects named values to form records. The name becomes the id, the value is split by 'separator' into name and value + + Triple +StreamReceiver + example in Playground + org.metafacture.triples.TripleCollect + + + count-triples + Counts triples + countpredicate (String), countby [SUBJECT, PREDICATE, OBJECT, ALL] + Triple +Triple + + org.metafacture.triples.TripleCount + + + decode-aseq + Parses a raw Aseq record (UTF-8 encoding expected). + + String +StreamReceiver + example in Playground + org.metafacture.biblio.AseqDecoder + + + decode-csv + Decodes lines of CSV files. First line may be interpreted as header. + hasheader (boolean), separator (String) + String +StreamReceiver + example in Playground + org.metafacture.csv.CsvDecoder + + + decode-formeta + Decodes a record in formeta format. + + String +StreamReceiver + example in Playground + org.metafacture.formeta.FormetaDecoder + + + decode-html + Decode HTML to metadata events. The attrValsAsSubfields option can be used to override the default attribute values to be used as subfields (e.g. by default `link rel="canonical" href="http://example.org"` becomes `link.canonical`). It expects an HTTP-style query string specifying as key the attributes whose value should be used as a subfield, and as value the attribute whose value should be the subfield value, e.g. the default contains `link.rel=href`. To use the HTML element text as the value (instead of another attribute), omit the value of the query-string key-value pair, e.g. `title.lang`. To add to the defaults, instead of replacing them, start with an `&`, e.g. `&h3.class` + attrvalsassubfields (String) + Reader +StreamReceiver + example in Playground + org.metafacture.html.HtmlDecoder + + + decode-json + Decodes JSON to metadata events. The 'recordPath' option can be used to set a JsonPath to extract a path as JSON - or to split the data into multiple JSON documents. + recordid (String), recordcount (int), booleanmarker (String), arraymarker (String), arrayname (String), recordpath (String), allowcomments (boolean), numbermarker (String) + String +StreamReceiver + example in Playground + org.metafacture.json.JsonDecoder + + + decode-mab + Parses a raw Mab2 stream (UTF-8 encoding expected). + + String +StreamReceiver + example in Playground + org.metafacture.biblio.MabDecoder + + + decode-marc21 + Decodes MARC 21 records (UTF-8 encoding expected). + emitleaderaswhole (boolean), ignoremissingid (boolean) + String +StreamReceiver + example in Playground + org.metafacture.biblio.marc21.Marc21Decoder + + + decode-pica + Parses pica+ records. The parser only parses single records. A string containing multiple records must be split into individual records before passing it to PicaDecoder. + trimfieldnames (boolean), normalizedserialization (boolean), ignoremissingidn (boolean), skipemptyfields (boolean), normalizeutf8 (boolean) + String +StreamReceiver + example in Playground + org.metafacture.biblio.pica.PicaDecoder + + + decode-string + Splits a String into several Strings, either by extracting parts that match a regexp or by splitting by a regexp. + mode [SPLIT, EXTRACT] + String +String + + org.metafacture.strings.StringDecoder + + + decode-xml + Reads an XML file and passes the XML events to a receiver. Set `totalEntitySizeLimit="0"` to allow unlimited XML entities. + totalentitysizelimit (String) + Reader +XmlReceiver + example in Playground + org.metafacture.xml.XmlDecoder + + + decode-yaml + Decodes YAML to metadata events. + recordid (String), recordcount (int), arraymarker (String), arrayname (String) + String +StreamReceiver + example in Playground + org.metafacture.yaml.YamlDecoder + + + decouple + creates a new thread in which subsequent flow elements run. + debug (boolean) + Object +Object + + org.metafacture.flowcontrol.ObjectPipeDecoupler + + + defer-stream + Defers all stream events until an end-record event is received + + StreamReceiver +StreamReceiver + + org.metafacture.flowcontrol.StreamDeferrer + + + digest-file + Uses the input string as a file name and computes a cryptographic hash the file + + String +Triple + + org.metafacture.files.FileDigestCalculator + + + discard-events + + discardlifecycleevents (boolean), discardliteralevents (boolean), discardentityevents (boolean), discardrecordevents (boolean) + StreamReceiver +StreamReceiver + + org.metafacture.mangling.StreamEventDiscarder + + + draw-uniform-sample + Draws a uniform sample of records from the input stream. + + Object +Object + example in Playground + org.metafacture.statistics.UniformSampler + + + encode-csv + Encodes each value in a record as a csv row. + includeheader (boolean), noquotes (boolean), separator (String), includerecordid (boolean) + StreamReceiver +String + example in Playground + org.metafacture.csv.CsvEncoder + + + encode-formeta + Encodes streams in formeta format. + style [CONCISE, VERBOSE, MULTILINE] + StreamReceiver +String + example in Playground + org.metafacture.formeta.FormetaEncoder + + + encode-json + Serialises an object as JSON + booleanmarker (String), arraymarker (String), prettyprinting (boolean), numbermarker (String) + StreamReceiver +String + example in Playground + org.metafacture.json.JsonEncoder + + + encode-literals + Outputs the name and value of each literal which is received as a string. Name and value are separated by a separator string. The default separator string is a tab. If a literal name is empty, only the value will be output without a separator. The module ignores record and entity events. In particular, this means that literal names are not prefixed by the name of the entity which contains them. + separator (String) + StreamReceiver +String + example in Playground + org.metafacture.formatting.StreamLiteralFormatter + + + encode-marc21 + Encodes MARC21 records + generateidfield (boolean) + StreamReceiver +String + example in Playground + org.metafacture.biblio.marc21.Marc21Encoder + + + encode-marcxml + Encodes a stream into MARCXML. If you can't ensure valid MARC21 (e.g. the leader isn't correct or not set as one literal) then set the parameter `ensureCorrectMarc21Xml` to `true`. + ensurecorrectmarc21xml (boolean), emitnamespace (boolean), xmlversion (String), formatted (boolean), xmlencoding (String) + StreamReceiver +String + example in Playground + org.metafacture.biblio.marc21.MarcXmlEncoder + + + encode-pica + Encodes a stream in pica+ format + ignorerecordid (boolean) + StreamReceiver +String + + org.metafacture.biblio.pica.PicaEncoder + + + encode-xml + Encodes a stream as XML. Defaults: `rootTag="records"`, `recordTag="record"`, no attributeMarker. + recordtag (String), namespacefile (String), xmlheaderversion (String), writexmlheader (boolean), xmlheaderencoding (String), separateroots (boolean), roottag (String), valuetag (String), attributemarker (String), writeroottag (boolean), namespaces (String) + StreamReceiver +String + example in Playground + org.metafacture.xml.SimpleXmlEncoder + + + encode-yaml + Serialises an object as YAML. + arraymarker (String), [deprecated] prettyprinting (boolean) + StreamReceiver +String + example in Playground + org.metafacture.yaml.YamlEncoder + + + extract-element + Extracts the specified element from an HTML document + + Reader +String + example in Playground + org.metafacture.html.ElementExtractor + + + filter + Filters a stream based on a morph definition. A record is accepted if the morph returns at least one non empty value. + + StreamReceiver +StreamReceiver + + org.metafacture.metamorph.Filter + + + filter-duplicate-objects + Filters consecutive duplicated data objects. + + Object +Object + + org.metafacture.mangling.DuplicateObjectFilter + + + filter-null-values + Discards or replaces null values + replacement (String) + StreamReceiver +StreamReceiver + example in Playground + org.metafacture.mangling.NullFilter + + + filter-records-by-path + Splits a stream into records based on entity path + path (String), entityseparator (String), recordidformat (String) + StreamReceiver +StreamReceiver + example in Playground + org.metafacture.mangling.RecordPathFilter + + + filter-strings + Only forwards records which match (or do not match) a regular expression. + passmatches (boolean) + String +String + example in Playground + org.metafacture.strings.StringFilter + + + filter-triples + Filters triple. The patterns for subject, predicate and object are disjunctive. + predicatepattern (String), objectpattern (String), passmatches (boolean), subjectpattern (String) + Triple +Triple + example in Playground + org.metafacture.triples.TripleFilter + + + find-fix-paths + Finds all paths that have values that match the given pattern. Allows for regex. These paths can be used in a Fix to address fields. + + StreamReceiver +String + + org.metafacture.metafix.FindFixPaths + + + fix + Applies a fix transformation to the event stream, given as the path to a fix file or the fixes themselves. + repeatedfieldstoentities (boolean), strictness [PROCESS, RECORD, EXPRESSION], entitymembername (String), strictnesshandlesprocessexceptions (boolean) + StreamReceiver +StreamReceiver + + org.metafacture.metafix.Metafix + + + flatten + flattens out entities in a stream by introducing dots in literal names + entitymarker (String) + StreamReceiver +StreamReceiver + example in Playground + org.metafacture.mangling.StreamFlattener + + + from-jdom-document + + + Document +StreamReceiver + + org.metafacture.jdom.JDomDocumentToStream + + + handle-cg-xml + Reads CG-XML files + + XmlReceiver +StreamReceiver + + org.metafacture.xml.CGXmlHandler + + + handle-comarcxml + A comarc xml reader + + XmlReceiver +StreamReceiver + + org.metafacture.biblio.ComarcXmlHandler + + + handle-generic-xml + A generic XML reader. Separates XML data in distinct records with the defined record tag name (default: `recordtagname="record"`) If no matching record tag is found, the output will be empty. The handler breaks down XML elements with simple string values and optional attributes into entities with a value subfield (name configurable) and additional subfields for each attribute. Record tag and value tag names can be configured. Attributes can get an attributeMarker. + emitnamespace (boolean), recordtagname (String), attributemarker (String), valuetagname (String) + XmlReceiver +StreamReceiver + example in Playground + org.metafacture.xml.GenericXmlHandler + + + handle-mabxml + A MAB XML reader + + XmlReceiver +StreamReceiver + + org.metafacture.biblio.AlephMabXmlHandler + + + handle-marcxml + A MARC XML reader. To read marc data without namespace specification set option `namespace=""` + namespace (String), attributemarker (String) + XmlReceiver +StreamReceiver + example in Playground + org.metafacture.biblio.marc21.MarcXmlHandler + + + handle-picaxml + A pica xml reader + + XmlReceiver +StreamReceiver + + org.metafacture.biblio.pica.PicaXmlHandler + + + jscript + executes the function process(obj) in a given jscript + invoke (String) + Object +Object + + org.metafacture.scripting.JScriptObjectPipe + + + json-to-elasticsearch-bulk + + idkey (String), index (String), type (String) + String +String + + org.metafacture.elasticsearch.JsonToElasticsearchBulk + + + lines-to-records + Collects strings and emits them as records when a line matches the pattern or the stream is closed. + recordmarkerregexp (String) + String +String + example in Playground + org.metafacture.strings.LineRecorder + + + list-fix-paths + Lists all paths found in the input records. These paths can be used in a Fix to address fields. Options: `count` (output occurence frequency of each path, sorted by highest frequency first; default: `true`), `template` (for formatting the internal triple structure; default: `${o} | ${s}` if count is true, else `${s}`)`index` (output individual repeated subfields and array elements with index numbers instead of '*'; default: `false`) + template (String), count (boolean), index (boolean) + StreamReceiver +String + + org.metafacture.metafix.ListFixPaths + + + list-fix-values + Lists all values found for the given path. The paths can be found using fix-list-paths. Options: `count` (output occurence frequency of each value, sorted by highest frequency first; default: `true`)`template` (for formatting the internal triple structure; default: `${o} | ${s}` if count is true, else `${s}`) + template (String), count (boolean) + StreamReceiver +String + example in Playground + org.metafacture.metafix.ListFixValues + + + literal-to-object + Emits literal values as objects. + pattern (String) + StreamReceiver +String + example in Playground + org.metafacture.mangling.LiteralToObject + + + log-object + logs objects with the toString method + + Object +Object + + org.metafacture.monitoring.ObjectLogger + + + log-stream + logs events + + StreamReceiver +StreamReceiver + + org.metafacture.monitoring.StreamLogger + + + log-stream-time + Benchmarks the execution time of the downstream modules. + + StreamReceiver +StreamReceiver + + org.metafacture.monitoring.StreamTimer + + + log-time + Benchmarks the execution time of the downstream modules. + + Object +Object + + org.metafacture.monitoring.ObjectTimer + + + map-to-stream + + + Map +StreamReceiver + + org.metafacture.javaintegration.MapToStream + + + match + Matches the incoming strings against a regular expression and replaces the matching parts. + pattern (String), replacement (String) + String +String + example in Playground + org.metafacture.strings.StringMatcher + + + merge-batch-stream + Merges a sequence of batchSize records + + StreamReceiver +StreamReceiver + + org.metafacture.plumbing.StreamBatchMerger + + + merge-same-ids + + + StreamReceiver +StreamReceiver + example in Playground + org.metafacture.plumbing.StreamMerger + + + morph + Applies a metamorph transformation to the event stream. Metamorph definition is given in brackets. + + StreamReceiver +StreamReceiver + example in Playground + org.metafacture.metamorph.Metamorph + + + normalize-unicode-stream + Normalises composed and decomposed Unicode characters. + normalizationform [NFD, NFC, NFKD, NFKC], normalizevalues (boolean), normalizeids (boolean), normalizekeys (boolean) + StreamReceiver +StreamReceiver + + org.metafacture.strings.StreamUnicodeNormalizer + + + normalize-unicode-string + Normalizes diacritics in Unicode strings. + normalizationform [NFD, NFC, NFKD, NFKC] + String +String + + org.metafacture.strings.UnicodeNormalizer + + + object-batch-log + Writes log info for every BATCHSIZE records. + batchsize (int) + Object +Object + + org.metafacture.monitoring.ObjectBatchLogger + + + object-tee + Sends an object to more than one receiver. + + Object +Object + + org.metafacture.plumbing.ObjectTee + + + object-to-literal + Outputs a record containing the input object as literal + recordid (String), literalname (String) + Object +StreamReceiver + + org.metafacture.mangling.ObjectToLiteral + + + open-file + Opens a file. + decompressconcatenated (boolean), encoding (String), compression (String) + String +Reader + example in Playground + org.metafacture.io.FileOpener + + + open-http + Opens an HTTP resource. Supports setting HTTP header fields `Accept`, `Accept-Charset`, `Accept-Encoding`, `Content-Encoding` and `Content-Type`, as well as generic headers (separated by `\n`). Defaults: request `method` = `GET`, request `url` = `@-` (input data), request `body` = `@-` (input data) if request method supports body and input data not already used, `Accept` header (`accept`) = `*/*`, `Accept-Charset` header (`acceptcharset`) = `UTF-8`, `errorprefix` = `ERROR: `. + method [DELETE, GET, HEAD, OPTIONS, POST, PUT, TRACE], contentencoding (String), header (String), [deprecated] encoding (String), body (String), acceptcharset (String), acceptencoding (String), url (String), contenttype (String), accept (String), errorprefix (String) + String +Reader + example in Playground + org.metafacture.io.HttpOpener + + + open-oaipmh + Opens an OAI-PMH stream and passes a reader to the receiver. Mandatory arguments are: BASE_URL, DATE_FROM, DATE_UNTIL, METADATA_PREFIX, SET_SPEC . + setspec (String), datefrom (String), encoding (String), dateuntil (String), metadataprefix (String) + String +Reader + example in Playground + org.metafacture.biblio.OaiPmhOpener + + + open-resource + Opens a resource. + encoding (String) + String +Reader + + org.metafacture.io.ResourceOpener + + + open-tar + Opens a tar archive and passes every entry. + + Reader +Reader + + org.metafacture.io.TarReader + + + pass-through + A simple pass-through module + + StreamReceiver +StreamReceiver + + org.metafacture.plumbing.IdentityStreamPipe + + + print + Writes objects to stdout + footer (String), header (String), encoding (String), compression (String), separator (String) + Object +Void + example in Playground + org.metafacture.io.ObjectStdoutWriter + + + rdf-macros + Expands some macros for RDF/XML + autoaddedsubject (String) + StreamReceiver +StreamReceiver + + org.metafacture.linkeddata.RdfMacroPipe + + + read-beacon + Reads BEACON format + metadatafilter (String), buffersize (int), relation (String) + Reader +StreamReceiver + example in Playground + org.metafacture.linkeddata.BeaconReader + + + read-dir + Reads a directory and emits all filenames found. + filenamepattern (String), recursive (boolean) + String +String + + org.metafacture.files.DirReader + + + read-string + Creates a reader for the supplied string and sends it to the receiver + + String +Reader + example in Playground + org.metafacture.strings.StringReader + + + read-triples + Reads triples + + String +Triple + + org.metafacture.triples.TripleReader + + + record-to-entity + + entityname (String), idliteralname (String) + StreamReceiver +StreamReceiver + + org.metafacture.mangling.RecordToEntity + + + regex-decode + Decodes a string based on a regular expression using named capture groups + rawinputliteral (String) + String +StreamReceiver + example in Playground + org.metafacture.strings.RegexDecoder + + + remodel-pica-multiscript + Groups multiscript fields in entities + + StreamReceiver +StreamReceiver + + org.metafacture.biblio.pica.PicaMultiscriptRemodeler + + + reorder-triple + Shifts subjectTo predicateTo and objectTo around + subjectfrom [SUBJECT, PREDICATE, OBJECT], objectfrom [SUBJECT, PREDICATE, OBJECT], predicatefrom [SUBJECT, PREDICATE, OBJECT] + Triple +Triple + + org.metafacture.triples.TripleReorder + + + reset-object-batch + Resets the downstream modules every batch-size objects + batchsize (int) + Object +Object + + org.metafacture.flowcontrol.ObjectBatchResetter + + + retrieve-triple-objects + Uses the object value of the triple as a URL and emits a new triple in which the object value is replaced with the contents of the resource identified by the URL. + defaultencoding (String) + Triple +Triple + + org.metafacture.triples.TripleObjectRetriever + + + sort-triples + Sorts triples. Several options can be combined, e.g. `by="object",numeric="true",order="decreasing"` will numerically sort the Object of the triples in decreasing order (given that all Objects are indeed of numeric type). + by [SUBJECT, PREDICATE, OBJECT, ALL], numeric (boolean), order [INCREASING, DECREASING] + Triple +Triple + example in Playground + org.metafacture.triples.TripleSort + + + split-lines + Splits a string at new lines and sends each line to the receiver. + + String +String + + org.metafacture.strings.LineSplitter + + + split-xml-elements + Splits elements (e.g. defining single records) residing in one XML document into multiple single XML documents. + elementname (String), xmldeclaration (String), toplevelelement (String) + XmlReceiver +StreamReceiver + + org.metafacture.xml.XmlElementSplitter + + + stream-count + Counts the number of records and fields read. + + StreamReceiver +StreamReceiver + + org.metafacture.statistics.Counter + + + stream-tee + Replicates an event stream to an arbitrary number of stream receivers. + + StreamReceiver +StreamReceiver + + org.metafacture.plumbing.StreamTee + + + stream-to-triples + Emits the literals which are received as triples such that the name and value become the predicate and the object of the triple. The record id containing the literal becomes the subject. If 'redirect' is true, the value of the subject is determined by using either the value of a literal named '_id', or for individual literals by prefixing their name with '{to:ID}'. Set 'recordPredicate' to encode a complete record in one triple. The value of 'recordPredicate' is used as the predicate of the triple. If 'recordPredicate' is set, no {to:ID}NAME-style redirects are possible. + redirect (boolean), recordpredicate (String) + StreamReceiver +Triple + example in Playground + org.metafacture.triples.StreamToTriples + + + stream-to-xml + Encodes a stream as XML. Defaults: `rootTag="records"`, `recordTag="record"`, no attributeMarker. + recordtag (String), namespacefile (String), xmlheaderversion (String), writexmlheader (boolean), xmlheaderencoding (String), separateroots (boolean), roottag (String), valuetag (String), attributemarker (String), writeroottag (boolean), namespaces (String) + StreamReceiver +String + + org.metafacture.xml.SimpleXmlEncoder + + + string-list-map-to-stream + + + ListMap +StreamReceiver + + org.metafacture.javaintegration.StringListMapToStream + + + template + Builds a String from a template and an Object. Provide template in brackets. `${o}` marks the place where the object is to be inserted. If the object is an instance of Triple `${s}`, `${p}` and `${o}` are used instead. + + Object +String + example in Playground + org.metafacture.formatting.ObjectTemplate + + + thread-object-tee + Incoming objects are distributed to the added receivers, running in their own threads. + + Object +Object + + org.metafacture.flowcontrol.ObjectThreader + + + to-jdom-document + Converts a stream into a JDom document. + + StreamReceiver +Document + + org.metafacture.jdom.StreamToJDomDocument + + + triples-to-stream + Converts a triple into a record stream + + Triple +StreamReceiver + + org.metafacture.triples.TriplesToStream + + + validate-json + Validate JSON against a given schema, send only valid input to the receiver. Pass the schema location to validate against. Write valid and/or invalid output to locations specified with `writeValid` and `writeInvalid`. Set the JSON key for the record ID value with `idKey` (for logging output, defaults to `id`). + idkey (String), writeinvalid (String), writevalid (String) + String +String + + org.metafacture.json.JsonValidator + + + wait-for-inputs + Blocks close-stream events until a given number of close-stream events have been received. + + Object +Object + example in Playground + org.metafacture.flowcontrol.CloseSuppressor + + + write + Writes objects to stdout or a file +Arguments: [stdout, PATH] + appendiffileexists (boolean), footer (String), header (String), encoding (String), compression [NONE, AUTO, BZIP2, GZIP, PACK200, XZ], separator (String) + Object +Void + + org.metafacture.io.ObjectWriter + + + write-files + Writes objects to one (or more) file(s) + appendiffileexists (boolean), footer (String), header (String), encoding (String), compression [NONE, AUTO, BZIP2, GZIP, PACK200, XZ], separator (String) + Object +Void + + org.metafacture.io.ObjectFileWriter + + + write-triple-objects + Writes the object value of the triple into a file. The filename is constructed from subject and predicate. Please note: This module does not check if the filename constructed from subject and predicate stays within `baseDir`. THIS MODULE SHOULD NOT BE USED IN ENVIRONMENTS IN WHICH THE VALUES OF SUBJECT AND PREDICATE A PROVIDED BY AN UNTRUSTED SOURCE! + encoding (String) + Triple +Void + + org.metafacture.triples.TripleObjectWriter + + + write-triples + Writes triples into a file. + + Triple +Void + + org.metafacture.triples.TripleWriter + + + write-xml-files + Writes the XML into the filesystem. The filename is constructed from the XPATH given as 'property'. Variables are:`target` (determining the output directory), `property` (the element in the XML entity. Constitutes the main part of the file's name.), `startIndex` ( a subfolder will be extracted out of the filename. This marks the index' beginning ), `stopIndex` ( a subfolder will be extracted out of the filename. This marks the index' end ) + endindex (int), startindex (int), property (String), filesuffix (String), encoding (String), compression (String), target (String) + StreamReceiver +Void + + org.metafacture.xml.XmlFilenameWriter + + + xml-tee + Sends an object to more than one receiver. + + XmlReceiver +XmlReceiver + example in Playground + org.metafacture.plumbing.XmlTee + + + + From 47be65f91ba95e5febdca720173e6c3b4c92558d Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 5 May 2025 15:42:59 +0200 Subject: [PATCH 3/4] Use jekyll markdown template (metafacture-core#369) - freeze table header when scrolling --- assets/css/sortTable.css | 16 + docs/flux/flux-commands-table.md | 1119 ++++++++++++++++++++++++++++++ 2 files changed, 1135 insertions(+) create mode 100644 docs/flux/flux-commands-table.md diff --git a/assets/css/sortTable.css b/assets/css/sortTable.css index 47ca04f..6858c7d 100644 --- a/assets/css/sortTable.css +++ b/assets/css/sortTable.css @@ -3,6 +3,22 @@ top: -30em; } +tr thead{ +width: 100%; +table-layout: fixed; +} + +table{ + height: 80%; + display: block; +} +tbody{ + overflow-y: scroll; + height: 80%; + width: 100%; + position: absolute; +} + table.sortable td, table.sortable th { padding: 0.125em 0.25em; diff --git a/docs/flux/flux-commands-table.md b/docs/flux/flux-commands-table.md new file mode 100644 index 0000000..0574616 --- /dev/null +++ b/docs/flux/flux-commands-table.md @@ -0,0 +1,1119 @@ +--- +layout: table-wrappers +title: Flux Commands Table +parent: Flux +nav_order: 3 +--- + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Flux commands +  (column headers with buttons are sortable). +
+ + DescriptionOptions + + + + + + Example in Playground + +
add-oreaggregationadds ore:Aggregation to an Europeana Data Model stream. The aggregation id is set by emitting literal('aggregation_id', id)StreamReceiverStreamReceiverorg.metafacture.linkeddata.OreAggregationAdder
add-preamble-epilogueAdds a String preamle and/or epilogue to the streampreamble (String), epilogue (String)StringStringexample in Playgroundorg.metafacture.formatting.PreambleEpilogueAdder
as-formeta-recordsReads a stream of formeta data and splits between each top-level elementReaderStringexample in Playgroundorg.metafacture.formeta.FormetaRecordsReader
as-linesProcesses input from a reader line by line.ReaderStringexample in Playgroundorg.metafacture.io.LineReader
as-recordsReads data from a Reader and splits it into individual recordsskipemptyrecords (boolean), separator (String)ReaderStringexample in Playgroundorg.metafacture.io.RecordReader
batch-logWrites log info every BATCHSIZE records. batchsize (int)StreamReceiverStreamReceiverexample in Playgroundorg.metafacture.monitoring.StreamBatchLogger
batch-resetResets flow for every BATCHSIZE records.batchsize (int)StreamReceiverStreamReceiverorg.metafacture.flowcontrol.StreamBatchResetter
calculate-metricsCalculates values for various cooccurrence metrics. The expected inputs are triples containing as subject the var name and as object the count. Marginal counts must appear first, joint counts second. Marinal counts must be written as 1:A, Joint counts as 2:A&BTripleTripleorg.metafacture.statistics.CooccurrenceMetricCalculator
catch-object-exceptionpasses objects through and catches exceptions.logprefix (String), logstacktrace (boolean)ObjectObjectorg.metafacture.flowcontrol.ObjectExceptionCatcher
catch-stream-exceptionpasses streams events through and catches exceptions.StreamReceiverStreamReceiverorg.metafacture.flowcontrol.StreamExceptionCatcher
change-idBy default changes the record ID to the value of the '_id' literal (if present). Use the contructor to choose another literal as ID source.keepidliteral (boolean), idliteral (String), keeprecordswithoutidliteral (boolean)StreamReceiverStreamReceiverexample in Playgroundorg.metafacture.mangling.RecordIdChanger
collect-triplesCollects named values to form records. The name becomes the id, the value is split by 'separator' into name and valueTripleStreamReceiverexample in Playgroundorg.metafacture.triples.TripleCollect
count-triplesCounts triplescountpredicate (String), countby [SUBJECT, PREDICATE, OBJECT, ALL]TripleTripleorg.metafacture.triples.TripleCount
decode-aseqParses a raw Aseq record (UTF-8 encoding expected).StringStreamReceiverexample in Playgroundorg.metafacture.biblio.AseqDecoder
decode-csvDecodes lines of CSV files. First line may be interpreted as header.hasheader (boolean), separator (String)StringStreamReceiverexample in Playgroundorg.metafacture.csv.CsvDecoder
decode-formetaDecodes a record in formeta format.StringStreamReceiverexample in Playgroundorg.metafacture.formeta.FormetaDecoder
decode-htmlDecode HTML to metadata events. The attrValsAsSubfields option can be used to override the default attribute values to be used as subfields (e.g. by default `link rel="canonical" href="http://example.org"` becomes `link.canonical`). It expects an HTTP-style query string specifying as key the attributes whose value should be used as a subfield, and as value the attribute whose value should be the subfield value, e.g. the default contains `link.rel=href`. To use the HTML element text as the value (instead of another attribute), omit the value of the query-string key-value pair, e.g. `title.lang`. To add to the defaults, instead of replacing them, start with an `&`, e.g. `&h3.class`attrvalsassubfields (String)ReaderStreamReceiverexample in Playgroundorg.metafacture.html.HtmlDecoder
decode-jsonDecodes JSON to metadata events. The 'recordPath' option can be used to set a JsonPath to extract a path as JSON - or to split the data into multiple JSON documents.recordid (String), recordcount (int), booleanmarker (String), arraymarker (String), arrayname (String), recordpath (String), allowcomments (boolean), numbermarker (String)StringStreamReceiverexample in Playgroundorg.metafacture.json.JsonDecoder
decode-mabParses a raw Mab2 stream (UTF-8 encoding expected).StringStreamReceiverexample in Playgroundorg.metafacture.biblio.MabDecoder
decode-marc21Decodes MARC 21 records (UTF-8 encoding expected).emitleaderaswhole (boolean), ignoremissingid (boolean)StringStreamReceiverexample in Playgroundorg.metafacture.biblio.marc21.Marc21Decoder
decode-picaParses pica+ records. The parser only parses single records. A string containing multiple records must be split into individual records before passing it to PicaDecoder.trimfieldnames (boolean), normalizedserialization (boolean), ignoremissingidn (boolean), skipemptyfields (boolean), normalizeutf8 (boolean)StringStreamReceiverexample in Playgroundorg.metafacture.biblio.pica.PicaDecoder
decode-stringSplits a String into several Strings, either by extracting parts that match a regexp or by splitting by a regexp.mode [SPLIT, EXTRACT]StringStringorg.metafacture.strings.StringDecoder
decode-xmlReads an XML file and passes the XML events to a receiver. Set `totalEntitySizeLimit="0"` to allow unlimited XML entities.totalentitysizelimit (String)ReaderXmlReceiverexample in Playgroundorg.metafacture.xml.XmlDecoder
decode-yamlDecodes YAML to metadata events.recordid (String), recordcount (int), arraymarker (String), arrayname (String)StringStreamReceiverexample in Playgroundorg.metafacture.yaml.YamlDecoder
decouplecreates a new thread in which subsequent flow elements run.debug (boolean)ObjectObjectorg.metafacture.flowcontrol.ObjectPipeDecoupler
defer-streamDefers all stream events until an end-record event is receivedStreamReceiverStreamReceiverorg.metafacture.flowcontrol.StreamDeferrer
digest-fileUses the input string as a file name and computes a cryptographic hash the fileStringTripleorg.metafacture.files.FileDigestCalculator
discard-eventsdiscardlifecycleevents (boolean), discardliteralevents (boolean), discardentityevents (boolean), discardrecordevents (boolean)StreamReceiverStreamReceiverorg.metafacture.mangling.StreamEventDiscarder
draw-uniform-sampleDraws a uniform sample of records from the input stream.ObjectObjectexample in Playgroundorg.metafacture.statistics.UniformSampler
encode-csvEncodes each value in a record as a csv row.includeheader (boolean), noquotes (boolean), separator (String), includerecordid (boolean)StreamReceiverStringexample in Playgroundorg.metafacture.csv.CsvEncoder
encode-formetaEncodes streams in formeta format.style [CONCISE, VERBOSE, MULTILINE]StreamReceiverStringexample in Playgroundorg.metafacture.formeta.FormetaEncoder
encode-jsonSerialises an object as JSONbooleanmarker (String), arraymarker (String), prettyprinting (boolean), numbermarker (String)StreamReceiverStringexample in Playgroundorg.metafacture.json.JsonEncoder
encode-literalsOutputs the name and value of each literal which is received as a string. Name and value are separated by a separator string. The default separator string is a tab. If a literal name is empty, only the value will be output without a separator. The module ignores record and entity events. In particular, this means that literal names are not prefixed by the name of the entity which contains them.separator (String)StreamReceiverStringexample in Playgroundorg.metafacture.formatting.StreamLiteralFormatter
encode-marc21Encodes MARC21 recordsgenerateidfield (boolean)StreamReceiverStringexample in Playgroundorg.metafacture.biblio.marc21.Marc21Encoder
encode-marcxmlEncodes a stream into MARCXML. If you can't ensure valid MARC21 (e.g. the leader isn't correct or not set as one literal) then set the parameter `ensureCorrectMarc21Xml` to `true`.ensurecorrectmarc21xml (boolean), emitnamespace (boolean), xmlversion (String), formatted (boolean), xmlencoding (String)StreamReceiverStringexample in Playgroundorg.metafacture.biblio.marc21.MarcXmlEncoder
encode-picaEncodes a stream in pica+ formatignorerecordid (boolean)StreamReceiverStringorg.metafacture.biblio.pica.PicaEncoder
encode-xmlEncodes a stream as XML. Defaults: `rootTag="records"`, `recordTag="record"`, no attributeMarker.recordtag (String), namespacefile (String), xmlheaderversion (String), writexmlheader (boolean), xmlheaderencoding (String), separateroots (boolean), roottag (String), valuetag (String), attributemarker (String), writeroottag (boolean), namespaces (String)StreamReceiverStringexample in Playgroundorg.metafacture.xml.SimpleXmlEncoder
encode-yamlSerialises an object as YAML.arraymarker (String), [deprecated] prettyprinting (boolean)StreamReceiverStringexample in Playgroundorg.metafacture.yaml.YamlEncoder
extract-elementExtracts the specified element from an HTML documentReaderStringexample in Playgroundorg.metafacture.html.ElementExtractor
filterFilters a stream based on a morph definition. A record is accepted if the morph returns at least one non empty value.StreamReceiverStreamReceiverorg.metafacture.metamorph.Filter
filter-duplicate-objectsFilters consecutive duplicated data objects.ObjectObjectorg.metafacture.mangling.DuplicateObjectFilter
filter-null-valuesDiscards or replaces null valuesreplacement (String)StreamReceiverStreamReceiverexample in Playgroundorg.metafacture.mangling.NullFilter
filter-records-by-pathSplits a stream into records based on entity pathpath (String), entityseparator (String), recordidformat (String)StreamReceiverStreamReceiverexample in Playgroundorg.metafacture.mangling.RecordPathFilter
filter-stringsOnly forwards records which match (or do not match) a regular expression.passmatches (boolean)StringStringexample in Playgroundorg.metafacture.strings.StringFilter
filter-triplesFilters triple. The patterns for subject, predicate and object are disjunctive.predicatepattern (String), objectpattern (String), passmatches (boolean), subjectpattern (String)TripleTripleexample in Playgroundorg.metafacture.triples.TripleFilter
find-fix-pathsFinds all paths that have values that match the given pattern. Allows for regex. These paths can be used in a Fix to address fields.StreamReceiverStringorg.metafacture.metafix.FindFixPaths
fixApplies a fix transformation to the event stream, given as the path to a fix file or the fixes themselves.repeatedfieldstoentities (boolean), strictness [PROCESS, RECORD, EXPRESSION], entitymembername (String), strictnesshandlesprocessexceptions (boolean)StreamReceiverStreamReceiverorg.metafacture.metafix.Metafix
flattenflattens out entities in a stream by introducing dots in literal namesentitymarker (String)StreamReceiverStreamReceiverexample in Playgroundorg.metafacture.mangling.StreamFlattener
from-jdom-documentDocumentStreamReceiverorg.metafacture.jdom.JDomDocumentToStream
handle-cg-xmlReads CG-XML filesXmlReceiverStreamReceiverorg.metafacture.xml.CGXmlHandler
handle-comarcxmlA comarc xml readerXmlReceiverStreamReceiverorg.metafacture.biblio.ComarcXmlHandler
handle-generic-xmlA generic XML reader. Separates XML data in distinct records with the defined record tag name (default: `recordtagname="record"`) If no matching record tag is found, the output will be empty. The handler breaks down XML elements with simple string values and optional attributes into entities with a value subfield (name configurable) and additional subfields for each attribute. Record tag and value tag names can be configured. Attributes can get an attributeMarker.emitnamespace (boolean), recordtagname (String), attributemarker (String), valuetagname (String)XmlReceiverStreamReceiverexample in Playgroundorg.metafacture.xml.GenericXmlHandler
handle-mabxmlA MAB XML readerXmlReceiverStreamReceiverorg.metafacture.biblio.AlephMabXmlHandler
handle-marcxmlA MARC XML reader. To read marc data without namespace specification set option `namespace=""`namespace (String), attributemarker (String)XmlReceiverStreamReceiverexample in Playgroundorg.metafacture.biblio.marc21.MarcXmlHandler
handle-picaxmlA pica xml readerXmlReceiverStreamReceiverorg.metafacture.biblio.pica.PicaXmlHandler
jscriptexecutes the function process(obj) in a given jscriptinvoke (String)ObjectObjectorg.metafacture.scripting.JScriptObjectPipe
json-to-elasticsearch-bulkidkey (String), index (String), type (String)StringStringorg.metafacture.elasticsearch.JsonToElasticsearchBulk
lines-to-recordsCollects strings and emits them as records when a line matches the pattern or the stream is closed.recordmarkerregexp (String)StringStringexample in Playgroundorg.metafacture.strings.LineRecorder
list-fix-pathsLists all paths found in the input records. These paths can be used in a Fix to address fields. Options: `count` (output occurence frequency of each path, sorted by highest frequency first; default: `true`), `template` (for formatting the internal triple structure; default: `${o} | ${s}` if count is true, else `${s}`)`index` (output individual repeated subfields and array elements with index numbers instead of '*'; default: `false`)template (String), count (boolean), index (boolean)StreamReceiverStringorg.metafacture.metafix.ListFixPaths
list-fix-valuesLists all values found for the given path. The paths can be found using fix-list-paths. Options: `count` (output occurence frequency of each value, sorted by highest frequency first; default: `true`)`template` (for formatting the internal triple structure; default: `${o} | ${s}` if count is true, else `${s}`)template (String), count (boolean)StreamReceiverStringexample in Playgroundorg.metafacture.metafix.ListFixValues
literal-to-objectEmits literal values as objects.pattern (String)StreamReceiverStringexample in Playgroundorg.metafacture.mangling.LiteralToObject
log-objectlogs objects with the toString methodObjectObjectorg.metafacture.monitoring.ObjectLogger
log-streamlogs eventsStreamReceiverStreamReceiverorg.metafacture.monitoring.StreamLogger
log-stream-timeBenchmarks the execution time of the downstream modules.StreamReceiverStreamReceiverorg.metafacture.monitoring.StreamTimer
log-timeBenchmarks the execution time of the downstream modules.ObjectObjectorg.metafacture.monitoring.ObjectTimer
map-to-streamMapStreamReceiverorg.metafacture.javaintegration.MapToStream
matchMatches the incoming strings against a regular expression and replaces the matching parts.pattern (String), replacement (String)StringStringexample in Playgroundorg.metafacture.strings.StringMatcher
merge-batch-streamMerges a sequence of batchSize recordsStreamReceiverStreamReceiverorg.metafacture.plumbing.StreamBatchMerger
merge-same-idsStreamReceiverStreamReceiverexample in Playgroundorg.metafacture.plumbing.StreamMerger
morphApplies a metamorph transformation to the event stream. Metamorph definition is given in brackets.StreamReceiverStreamReceiverexample in Playgroundorg.metafacture.metamorph.Metamorph
normalize-unicode-streamNormalises composed and decomposed Unicode characters.normalizationform [NFD, NFC, NFKD, NFKC], normalizevalues (boolean), normalizeids (boolean), normalizekeys (boolean)StreamReceiverStreamReceiverorg.metafacture.strings.StreamUnicodeNormalizer
normalize-unicode-stringNormalizes diacritics in Unicode strings.normalizationform [NFD, NFC, NFKD, NFKC]StringStringorg.metafacture.strings.UnicodeNormalizer
object-batch-logWrites log info for every BATCHSIZE records.batchsize (int)ObjectObjectorg.metafacture.monitoring.ObjectBatchLogger
object-teeSends an object to more than one receiver.ObjectObjectorg.metafacture.plumbing.ObjectTee
object-to-literalOutputs a record containing the input object as literalrecordid (String), literalname (String)ObjectStreamReceiverorg.metafacture.mangling.ObjectToLiteral
open-fileOpens a file.decompressconcatenated (boolean), encoding (String), compression (String)StringReaderexample in Playgroundorg.metafacture.io.FileOpener
open-httpOpens an HTTP resource. Supports setting HTTP header fields `Accept`, `Accept-Charset`, `Accept-Encoding`, `Content-Encoding` and `Content-Type`, as well as generic headers (separated by `\n`). Defaults: request `method` = `GET`, request `url` = `@-` (input data), request `body` = `@-` (input data) if request method supports body and input data not already used, `Accept` header (`accept`) = `*/*`, `Accept-Charset` header (`acceptcharset`) = `UTF-8`, `errorprefix` = `ERROR: `.method [DELETE, GET, HEAD, OPTIONS, POST, PUT, TRACE], contentencoding (String), header (String), [deprecated] encoding (String), body (String), acceptcharset (String), acceptencoding (String), url (String), contenttype (String), accept (String), errorprefix (String)StringReaderexample in Playgroundorg.metafacture.io.HttpOpener
open-oaipmhOpens an OAI-PMH stream and passes a reader to the receiver. Mandatory arguments are: BASE_URL, DATE_FROM, DATE_UNTIL, METADATA_PREFIX, SET_SPEC .setspec (String), datefrom (String), encoding (String), dateuntil (String), metadataprefix (String)StringReaderexample in Playgroundorg.metafacture.biblio.OaiPmhOpener
open-resourceOpens a resource.encoding (String)StringReaderorg.metafacture.io.ResourceOpener
open-tarOpens a tar archive and passes every entry.ReaderReaderorg.metafacture.io.TarReader
pass-throughA simple pass-through moduleStreamReceiverStreamReceiverorg.metafacture.plumbing.IdentityStreamPipe
printWrites objects to stdoutfooter (String), header (String), encoding (String), compression (String), separator (String)ObjectVoidexample in Playgroundorg.metafacture.io.ObjectStdoutWriter
rdf-macrosExpands some macros for RDF/XMLautoaddedsubject (String)StreamReceiverStreamReceiverorg.metafacture.linkeddata.RdfMacroPipe
read-beaconReads BEACON formatmetadatafilter (String), buffersize (int), relation (String)ReaderStreamReceiverexample in Playgroundorg.metafacture.linkeddata.BeaconReader
read-dirReads a directory and emits all filenames found.filenamepattern (String), recursive (boolean)StringStringorg.metafacture.files.DirReader
read-stringCreates a reader for the supplied string and sends it to the receiverStringReaderexample in Playgroundorg.metafacture.strings.StringReader
read-triplesReads triplesStringTripleorg.metafacture.triples.TripleReader
record-to-entityentityname (String), idliteralname (String)StreamReceiverStreamReceiverorg.metafacture.mangling.RecordToEntity
regex-decodeDecodes a string based on a regular expression using named capture groupsrawinputliteral (String)StringStreamReceiverexample in Playgroundorg.metafacture.strings.RegexDecoder
remodel-pica-multiscriptGroups multiscript fields in entitiesStreamReceiverStreamReceiverorg.metafacture.biblio.pica.PicaMultiscriptRemodeler
reorder-tripleShifts subjectTo predicateTo and objectTo aroundsubjectfrom [SUBJECT, PREDICATE, OBJECT], objectfrom [SUBJECT, PREDICATE, OBJECT], predicatefrom [SUBJECT, PREDICATE, OBJECT]TripleTripleorg.metafacture.triples.TripleReorder
reset-object-batchResets the downstream modules every batch-size objectsbatchsize (int)ObjectObjectorg.metafacture.flowcontrol.ObjectBatchResetter
retrieve-triple-objectsUses the object value of the triple as a URL and emits a new triple in which the object value is replaced with the contents of the resource identified by the URL.defaultencoding (String)TripleTripleorg.metafacture.triples.TripleObjectRetriever
sort-triplesSorts triples. Several options can be combined, e.g. `by="object",numeric="true",order="decreasing"` will numerically sort the Object of the triples in decreasing order (given that all Objects are indeed of numeric type).by [SUBJECT, PREDICATE, OBJECT, ALL], numeric (boolean), order [INCREASING, DECREASING]TripleTripleexample in Playgroundorg.metafacture.triples.TripleSort
split-linesSplits a string at new lines and sends each line to the receiver.StringStringorg.metafacture.strings.LineSplitter
split-xml-elementsSplits elements (e.g. defining single records) residing in one XML document into multiple single XML documents.elementname (String), xmldeclaration (String), toplevelelement (String)XmlReceiverStreamReceiverorg.metafacture.xml.XmlElementSplitter
stream-countCounts the number of records and fields read.StreamReceiverStreamReceiverorg.metafacture.statistics.Counter
stream-teeReplicates an event stream to an arbitrary number of stream receivers.StreamReceiverStreamReceiverorg.metafacture.plumbing.StreamTee
stream-to-triplesEmits the literals which are received as triples such that the name and value become the predicate and the object of the triple. The record id containing the literal becomes the subject. If 'redirect' is true, the value of the subject is determined by using either the value of a literal named '_id', or for individual literals by prefixing their name with '{to:ID}'. Set 'recordPredicate' to encode a complete record in one triple. The value of 'recordPredicate' is used as the predicate of the triple. If 'recordPredicate' is set, no {to:ID}NAME-style redirects are possible.redirect (boolean), recordpredicate (String)StreamReceiverTripleexample in Playgroundorg.metafacture.triples.StreamToTriples
stream-to-xmlEncodes a stream as XML. Defaults: `rootTag="records"`, `recordTag="record"`, no attributeMarker.recordtag (String), namespacefile (String), xmlheaderversion (String), writexmlheader (boolean), xmlheaderencoding (String), separateroots (boolean), roottag (String), valuetag (String), attributemarker (String), writeroottag (boolean), namespaces (String)StreamReceiverStringorg.metafacture.xml.SimpleXmlEncoder
string-list-map-to-streamListMapStreamReceiverorg.metafacture.javaintegration.StringListMapToStream
templateBuilds a String from a template and an Object. Provide template in brackets. `${o}` marks the place where the object is to be inserted. If the object is an instance of Triple `${s}`, `${p}` and `${o}` are used instead.ObjectStringexample in Playgroundorg.metafacture.formatting.ObjectTemplate
thread-object-teeIncoming objects are distributed to the added receivers, running in their own threads.ObjectObjectorg.metafacture.flowcontrol.ObjectThreader
to-jdom-documentConverts a stream into a JDom document.StreamReceiverDocumentorg.metafacture.jdom.StreamToJDomDocument
triples-to-streamConverts a triple into a record streamTripleStreamReceiverorg.metafacture.triples.TriplesToStream
validate-jsonValidate JSON against a given schema, send only valid input to the receiver. Pass the schema location to validate against. Write valid and/or invalid output to locations specified with `writeValid` and `writeInvalid`. Set the JSON key for the record ID value with `idKey` (for logging output, defaults to `id`).idkey (String), writeinvalid (String), writevalid (String)StringStringorg.metafacture.json.JsonValidator
wait-for-inputsBlocks close-stream events until a given number of close-stream events have been received.ObjectObjectexample in Playgroundorg.metafacture.flowcontrol.CloseSuppressor
writeWrites objects to stdout or a file +Arguments: [stdout, PATH]appendiffileexists (boolean), footer (String), header (String), encoding (String), compression [NONE, AUTO, BZIP2, GZIP, PACK200, XZ], separator (String)ObjectVoidorg.metafacture.io.ObjectWriter
write-filesWrites objects to one (or more) file(s)appendiffileexists (boolean), footer (String), header (String), encoding (String), compression [NONE, AUTO, BZIP2, GZIP, PACK200, XZ], separator (String)ObjectVoidorg.metafacture.io.ObjectFileWriter
write-triple-objectsWrites the object value of the triple into a file. The filename is constructed from subject and predicate. Please note: This module does not check if the filename constructed from subject and predicate stays within `baseDir`. THIS MODULE SHOULD NOT BE USED IN ENVIRONMENTS IN WHICH THE VALUES OF SUBJECT AND PREDICATE A PROVIDED BY AN UNTRUSTED SOURCE!encoding (String)TripleVoidorg.metafacture.triples.TripleObjectWriter
write-triplesWrites triples into a file.TripleVoidorg.metafacture.triples.TripleWriter
write-xml-filesWrites the XML into the filesystem. The filename is constructed from the XPATH given as 'property'. Variables are:`target` (determining the output directory), `property` (the element in the XML entity. Constitutes the main part of the file's name.), `startIndex` ( a subfolder will be extracted out of the filename. This marks the index' beginning ), `stopIndex` ( a subfolder will be extracted out of the filename. This marks the index' end )endindex (int), startindex (int), property (String), filesuffix (String), encoding (String), compression (String), target (String)StreamReceiverVoidorg.metafacture.xml.XmlFilenameWriter
xml-teeSends an object to more than one receiver.XmlReceiverXmlReceiverexample in Playgroundorg.metafacture.plumbing.XmlTee
+
+ From c2fceb78d1127c2614ab7c595b4c7ce33465ad81 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 5 May 2025 17:25:17 +0200 Subject: [PATCH 4/4] WIP this file is just for temporary use It is plain html an can be viewed in browser without using jekyll. Good for the workshop at Dini-Kim, maybe. --- docs/flux/flux-commands-table.html | 35 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/docs/flux/flux-commands-table.html b/docs/flux/flux-commands-table.html index 2bf1e6d..4475f73 100644 --- a/docs/flux/flux-commands-table.html +++ b/docs/flux/flux-commands-table.html @@ -1,11 +1,9 @@ - - - - - - + + - +
@@ -44,10 +41,10 @@ - + Description @@ -82,7 +79,12 @@ Example in Playground - Java class + + + @@ -152,7 +154,7 @@ calculate-metrics - Calculates values for various cooccurrence metrics. The expected inputs are triples containing as subject the var name and as object the count. Marginal counts must appear first, joint counts second. Marinal counts must be written as 1:A, Joint counts as 2:A&B + Calculates values for various cooccurrence metrics. The expected inputs are triples containing as subject the var name and as object the count. Marginal counts must appear first, joint counts second. Marinal counts must be written as 1:A, Joint counts as 2:A&B Triple Triple @@ -233,7 +235,7 @@ decode-html - Decode HTML to metadata events. The attrValsAsSubfields option can be used to override the default attribute values to be used as subfields (e.g. by default `link rel="canonical" href="http://example.org"` becomes `link.canonical`). It expects an HTTP-style query string specifying as key the attributes whose value should be used as a subfield, and as value the attribute whose value should be the subfield value, e.g. the default contains `link.rel=href`. To use the HTML element text as the value (instead of another attribute), omit the value of the query-string key-value pair, e.g. `title.lang`. To add to the defaults, instead of replacing them, start with an `&`, e.g. `&h3.class` + Decode HTML to metadata events. The attrValsAsSubfields option can be used to override the default attribute values to be used as subfields (e.g. by default `link rel="canonical" href="http://example.org"` becomes `link.canonical`). It expects an HTTP-style query string specifying as key the attributes whose value should be used as a subfield, and as value the attribute whose value should be the subfield value, e.g. the default contains `link.rel=href`. To use the HTML element text as the value (instead of another attribute), omit the value of the query-string key-value pair, e.g. `title.lang`. To add to the defaults, instead of replacing them, start with an `&`, e.g. `&h3.class` attrvalsassubfields (String) Reader StreamReceiver @@ -1110,6 +1112,5 @@
- - +