diff --git a/CONTRIBUTORS.yaml b/CONTRIBUTORS.yaml
index d6679e9de9f062..f73fe1e3435b07 100644
--- a/CONTRIBUTORS.yaml
+++ b/CONTRIBUTORS.yaml
@@ -677,6 +677,7 @@ Sch-Da:
- uni-freiburg
- deKCD
- elixir-europe
+ - nfdi4culture
dadrasarmin:
name: Armin Dadras
diff --git a/GRANTS.yaml b/GRANTS.yaml
index f732e3f68f6a51..987f93cdb3cf04 100644
--- a/GRANTS.yaml
+++ b/GRANTS.yaml
@@ -203,6 +203,17 @@ nfdi4bioimage:
url: https://nfdi4bioimage.de
avatar: "/training-material/shared/images/nfdi4bioimage.jpeg"
+nfdi4culture:
+ name: "NFDI4Culture – Consortium for Research Data on Material and Immaterial Cultural Heritage"
+ short_name: "NFDI4Culture"
+ joined: 2025-08
+ avatar: "https://upload.wikimedia.org/wikipedia/commons/5/5d/NFDI4Culture_Logo.png"
+ github: false
+ url: https://nfdi4culture.de/
+ funder_name: DFG
+ funding_id: "441958017"
+ funding_database: dfg
+
nfdi4plants:
short_name: DataPLANT
name: DataPLANT (NFDI4Plants)
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/data-library.yaml b/topics/digital-humanities/tutorials/open-refine-tutorial/data-library.yaml
new file mode 100644
index 00000000000000..0131513e3318e5
--- /dev/null
+++ b/topics/digital-humanities/tutorials/open-refine-tutorial/data-library.yaml
@@ -0,0 +1,23 @@
+---
+destination:
+ type: library
+ name: GTN - Material
+ description: Galaxy Training Network Material
+ synopsis: Galaxy Training Network Material. See https://training.galaxyproject.org
+items:
+- name: Digital Humanities
+ description: Training material for all kinds of Digital Humanities analysis.
+ items:
+ - name: Training material for Galaxy tutorial "Introduction to OpenRefine for researching cultural data"
+ items:
+ - name: 'DOI: 10.5281/zenodo.17047254'
+ description: latest
+ items:
+ - url: https://zenodo.org/api/records/17047254/files/phm_collection_adapted.tsv/content
+ src: url
+ ext: tsv
+ info: https://zenodo.org/records/17047254
+ - url: https://zenodo.org/api/records/17047254/files/stopwords-en.txt/content
+ src: url
+ ext: txt
+ info: https://zenodo.org/records/17047254
\ No newline at end of file
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/faqs/index.md b/topics/digital-humanities/tutorials/open-refine-tutorial/faqs/index.md
new file mode 100644
index 00000000000000..9ce3fe4fce824b
--- /dev/null
+++ b/topics/digital-humanities/tutorials/open-refine-tutorial/faqs/index.md
@@ -0,0 +1,3 @@
+---
+layout: faq-page
+---
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/cluster.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/cluster.png
new file mode 100644
index 00000000000000..a2f2176e885888
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/cluster.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/cluster2.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/cluster2.png
new file mode 100644
index 00000000000000..7d44605ddf3811
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/cluster2.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/dataset_cleaned.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/dataset_cleaned.png
new file mode 100644
index 00000000000000..93e992ee35f838
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/dataset_cleaned.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/deduplicate.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/deduplicate.png
new file mode 100644
index 00000000000000..7fd81ddd5e6ce6
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/deduplicate.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/display_1969.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/display_1969.png
new file mode 100644
index 00000000000000..ec86476e188d14
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/display_1969.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/export_results3.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/export_results3.png
new file mode 100644
index 00000000000000..d32daf4c03824e
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/export_results3.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/extract_tasks.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/extract_tasks.png
new file mode 100644
index 00000000000000..331ba804032142
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/extract_tasks.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/extract_tasks2.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/extract_tasks2.png
new file mode 100644
index 00000000000000..9649fb9e808fbe
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/extract_tasks2.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/facet_categories_blank.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/facet_categories_blank.png
new file mode 100644
index 00000000000000..ae5768233614fa
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/facet_categories_blank.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/filter_grel.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/filter_grel.png
new file mode 100644
index 00000000000000..916dac7f47f4ce
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/filter_grel.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/filter_grel2.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/filter_grel2.png
new file mode 100644
index 00000000000000..0713c786a98108
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/filter_grel2.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/filter_grel3.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/filter_grel3.png
new file mode 100644
index 00000000000000..c3b0fe6f6afef5
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/filter_grel3.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/interactive_tools.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/interactive_tools.png
new file mode 100644
index 00000000000000..89dc7ac645300a
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/interactive_tools.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/join.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/join.png
new file mode 100644
index 00000000000000..f495a7ebecff08
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/join.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/openrefine.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/openrefine.png
new file mode 100644
index 00000000000000..05d46a5117453a
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/openrefine.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/openrefine_gui.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/openrefine_gui.png
new file mode 100644
index 00000000000000..a33930a8e7228a
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/openrefine_gui.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/openrefine_interface.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/openrefine_interface.png
new file mode 100644
index 00000000000000..7249863f50674b
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/openrefine_interface.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/openrefine_open_project.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/openrefine_open_project.png
new file mode 100644
index 00000000000000..72008769d311f5
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/openrefine_open_project.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/select_workflow.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/select_workflow.png
new file mode 100644
index 00000000000000..70c6fb8860a2bd
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/select_workflow.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/sort.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/sort.png
new file mode 100644
index 00000000000000..d8eb32865bea33
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/sort.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/sort2.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/sort2.png
new file mode 100644
index 00000000000000..20f006310c6304
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/sort2.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/sort3.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/sort3.png
new file mode 100644
index 00000000000000..6f2249dcb44c1e
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/sort3.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/sort4.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/sort4.png
new file mode 100644
index 00000000000000..125b3d15f2f9a6
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/sort4.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/sort5.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/sort5.png
new file mode 100644
index 00000000000000..e47a75be89d75b
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/sort5.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/sort6.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/sort6.png
new file mode 100644
index 00000000000000..eb91e1666cd24b
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/sort6.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/split_multi_valued_cells.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/split_multi_valued_cells.png
new file mode 100644
index 00000000000000..e09ecbc827a259
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/split_multi_valued_cells.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/text_facet.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/text_facet.png
new file mode 100644
index 00000000000000..6582b3ae0b743e
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/text_facet.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/text_facet2.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/text_facet2.png
new file mode 100644
index 00000000000000..09035428f98612
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/text_facet2.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/text_facet3.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/text_facet3.png
new file mode 100644
index 00000000000000..989372f83a2a7f
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/text_facet3.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/workflow.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/workflow.png
new file mode 100644
index 00000000000000..806ba34a8e6617
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/workflow.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/workflow_inputs.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/workflow_inputs.png
new file mode 100644
index 00000000000000..18a0ce24f6c2e1
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/workflow_inputs.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/workflow_overview.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/workflow_overview.png
new file mode 100644
index 00000000000000..b59fa4e8f96181
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/workflow_overview.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/workflowhub.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/workflowhub.png
new file mode 100644
index 00000000000000..f7f977cde879d7
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/workflowhub.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/images/workflows.png b/topics/digital-humanities/tutorials/open-refine-tutorial/images/workflows.png
new file mode 100644
index 00000000000000..3a9d1566c13e11
Binary files /dev/null and b/topics/digital-humanities/tutorials/open-refine-tutorial/images/workflows.png differ
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/tutorial.bib b/topics/digital-humanities/tutorials/open-refine-tutorial/tutorial.bib
new file mode 100644
index 00000000000000..8506bb2947cc86
--- /dev/null
+++ b/topics/digital-humanities/tutorials/open-refine-tutorial/tutorial.bib
@@ -0,0 +1,36 @@
+
+# This is the bibliography file for your tutorial.
+#
+# To add bibliography (bibtex) entries here, follow these steps:
+# 1) Find the DOI for the article you want to cite
+# 2) Go to https://doi2bib.org and fill in the DOI
+# 3) Copy the resulting bibtex entry into this file
+#
+# To cite the example below, in your tutorial.md file
+# use {% cite Batut2018 %}
+#
+# If you want to cite an online resourse (website etc)
+# you can use the 'online' format (see below)
+#
+# You can remove the examples below
+
+@article{Hooland_2013,
+ title={Cleaning Data with OpenRefine},
+ ISSN={2397-2068},
+ url={http://dx.doi.org/10.46430/phen0023},
+ DOI={10.46430/phen0023},
+ number={2},
+ journal={Programming Historian},
+ publisher={University of Sussex},
+ author={Hooland, Seth van and Verborgh, Ruben and Wilde, Max De},
+ editor={Crymble, Adam},
+ year={2013},
+ month=aug
+}
+
+@online{gtn-website,
+ author = {GTN community},
+ title = {GTN Training Materials: Collection of tutorials developed and maintained by the worldwide Galaxy community},
+ url = {https://training.galaxyproject.org},
+ urldate = {2021-03-24}
+}
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/tutorial.md b/topics/digital-humanities/tutorials/open-refine-tutorial/tutorial.md
new file mode 100644
index 00000000000000..d8b188eec80190
--- /dev/null
+++ b/topics/digital-humanities/tutorials/open-refine-tutorial/tutorial.md
@@ -0,0 +1,409 @@
+---
+layout: tutorial_hands_on
+title: OpenRefine Tutorial for researching cultural data
+level: Introductory
+zenodo_link: 'https://doi.org/10.5281/zenodo.17047254'
+questions:
+- How to use OpenRefine in Galaxy to clean your data?
+- How to use a workflow in Galaxy to extract and visualise information from your data?
+objectives:
+- Start OpenRefine as an Interactive Tool in Galaxy
+- Use OpenRefine to clean your data (remove duplicates, separate multiple values from the same field, etc.)
+- Export your cleaned data from OpenRefine to Galaxy
+- Use a pre-existing workflow in Galaxy to extract specific information and visualise your findings
+time_estimation: 2H
+key_points:
+- You can use OpenRefine online through Galaxy.
+- OpenRefine allows you to work interactively with messy data.
+- Galaxy allows you to run workflows with your data.
+- With Galaxy, you can visualise your data in various ways.
+contributions:
+ authorship:
+ - dianichj
+ - dadrasarmin
+ - Sch-Da
+ funding:
+ - nfdi4culture
+requirements:
+ - type: internal
+ topic_name: digital-humanities
+ tutorials:
+ - introduction_to_dh
+answer_histories:
+ - label: "UseGalaxy.eu"
+ history: https://usegalaxy.eu/u/armin.dadras/h/visualise-amount-of-objects-in-museum-collection
+ date: 2025-09-19
+---
+This tutorial shows how to use **OpenRefine** in Galaxy to clean and visualize data from the **humanities and social sciences**. It has two parts:
+- **Introduction to OpenRefine**, based on {% cite Hooland_2013 %} and adapted for Galaxy.
+- **Introduction to running Galaxy workflows** to visualize cleaned data and extract specific information.
+
+
+## What is OpenRefine?
+
+**OpenRefine** is a free, open-source “data wrangler” built for messy, heterogeneous, evolving datasets. It imports common formats (CSV/TSV, Excel, JSON, XML) and domain-specific ones used across GLAM (Galleries, Libraries, Archives and Museums) and official statistics (MARC, RDF serializations, PC-Axis).
+
+It is **non-destructive** — OpenRefine does not alter your source files, but works on copies and saves projects locally. Facets and filters let you audit categories, surface outliers, and triage inconsistencies without code. Its **clustering** tools consolidate near-duplicates using both key-collision methods (fingerprint, n-gram, phonetic) and edit-distance/nearest-neighbour methods (Levenshtein, PPM) so you can standardize names and places at scale while keeping human oversight.
+
+For enrichment, OpenRefine speaks the **Reconciliation API** to match local values to external authorities (e.g. **Wikidata**, **ROR**) and optionally pull back richer metadata. Transformations—both point-and-click and **GREL** formulas—are recorded as a stepwise, undoable history that you can export as JSON and re-apply to other datasets, enabling reproducible cleaning and easy peer review. Finished tables export cleanly to **CSV/TSV**, ODS/XLS(X), SQL statements, templated JSON, Google Sheets, or can be exported back to Galaxy.
+
+## From Cleaning to Analysis in Galaxy
+
+Once your dataset has been cleaned with OpenRefine, you often want to analyze it further or visualize specific aspects. This is where **Galaxy Workflows** become essential: they let you build reproducible pipelines that operate on your curated data, moving from one-off cleaning to structured analysis.
+
+## What are Galaxy Workflows?
+
+**Galaxy Workflows** are structured, stepwise pipelines you build and run entirely in the browser—either extracted from a recorded analysis *history* or assembled in the visual editor. They can be annotated, shared, published, imported, and rerun, making them ideal for teaching, collaboration, and reproducible research.
+
+A captured analysis is easy to share: export the workflow as JSON (**`.ga`**: tools, parameters, and Input/Output) or export a provenance-rich run as a **[Workflow Run RO-Crate](https://www.researchobject.org/workflow-run-crate/)** bundling the definition with inputs, outputs, and invocation metadata. This lowers the barrier to entry (no local installs; web UI with pre-installed tools and substantial compute) while preserving best practices (histories track tool versions and parameters; workflows are easily re-applied to new data).
+
+For findability and credit, the community uses **[WorkflowHub](https://workflowhub.eu/)**—a curated registry that supports multiple workflow technologies (including Galaxy) and promotes **FAIR** principles; it offers Spaces/Teams, permissions, versioning, and **DOIs via DataCite**, with metadata linking to identifiers like **[ORCID](https://orcid.org/)** so contributions enter scholarly knowledge graphs and are properly acknowledged.
+
+In practice, you can iterate on a workflow in a familiar GUI, export the exact definition or a run package, and deposit it where peers can discover, reuse, review, and cite it—closing the loop between simple authoring and robust scholarly dissemination.
+
+
+>
+>
+> In this tutorial, we will cover:
+>
+> 1. TOC
+> {:toc}
+>
+{: .agenda}
+
+# Hands on: Get the data
+
+We will work with a slightly adapted dataset from the **[Powerhouse Museum](https://powerhouse.com.au/)** (Australia’s largest museum group) containing collection metadata. The museum shared the dataset online before giving API access to its collection. We slightly adapted the dataset and put it on Zenodo for long-term reusability. The tabular file (**36.4 MB**) includes **14 columns** for **75,811** objects, released under a **[Creative Commons Attribution Share Alike (CCASA) license](http://creativecommons.org/licenses/by-nc/2.5/au/)**. We will answer two questions: *From what year does the museum have the most objects?* And *what objects does the museum have from that year?*
+
+**Why this dataset?** It is credible, openly published, and realistically messy—ideal for practising problems scholars encounter at scale. Records include a **Categories** field populated from the **Powerhouse Museum Object Names Thesaurus (PONT)**, a controlled vocabulary reflecting Australian usage. The tutorial deliberately surfaces common quality issues—blank values that are actually stray whitespace, duplicate rows, and multi-valued cells separated by the pipe character `|` (including edge cases where **double pipes** `||` inflate row counts)—so we can practice systematic inspection before any analysis. During cleaning, you will compute sanity checks (after de-duplication, the dataset drops to **XXXX** unique records; a facet reveals **XXXX** distinct categories and **XXXX** items with no category). Without careful atomization and clustering, these irregularities would bias statistics, visualizations, and downstream reconciliation.
+
+We suggest that you download the data from the Zenodo record as explained below. This helps us with the reproducibility of the results.
+
+> Upload your data
+>
+> 1. Create a new history for this tutorial and name it "Powerhouse Museum — OpenRefine"
+> 2. Import the file from [Zenodo]({{page.zenodo_link}}):
+>
+> ```
+> https://zenodo.org/records/17047254
+> ```
+>
+> {% snippet faqs/galaxy/datasets_import_via_link.md %}
+>
+> {% snippet faqs/galaxy/datasets_import_from_data_library.md %}
+>
+> 3. **Rename** {% icon galaxy-pencil %} the dataset: "**Powerhouse Museum metadata**."
+> 4. Ensure that the datatype is "tsv". Otherwise, use convert datatype.
+>
+> {% snippet faqs/galaxy/datasets_change_datatype.md datatype="datatypes" %}
+>
+{: .hands_on}
+
+# Use OpenRefine to explore and clean your dataset
+
+The users will familiarize themselves with the museum's metadata. In the next step, they will clean the metadata using various steps to enhance its quality and accessibility.
+
+## Start OpenRefine
+
+> Opening the dataset with OpenRefine
+>
+> 1. Open the {% tool [OpenRefine](interactive_tool_openrefine) %}: Working with messy data
+> - *"Input file in tabular format"*: `openrefine-phm-collection.tsv`
+>
+> 2. Click on "Run Tool".
+>
+> 
+>
+> 3. After around 30 seconds, using the interactive tools section on the left panel, you can open OpenRefine by clicking on its name. Make sure to wait until you see the symbol with an arrow > pointing outside the box that allows you to start OpenRefine in a new tab.
+>
+> 
+>
+> 4. Here, you can see the OpenRefine GUI. Click on `Open Project`.
+>
+> 
+>
+> 5. Click on `Galaxy file`. If the file does not appear, you may have started OpenRefine before it was fully loaded. Retry steps 3 and 4, and the file should be visible.
+>
+> 
+>
+> 6. You can see the data loaded for you.
+>
+> 
+>
+{: .hands_on}
+
+>
+>
+> 1. How many rows does this table have?
+>
+> >
+> >
+> > 1. 75809
+> >
+> {: .solution}
+{: .question}
+
+Great, now that the dataset is in OpenRefine, we can start cleaning it.
+
+## Remove blank rows
+
+> Removing the blank rows
+>
+> 1. Click on the triangle on the left of `Record ID`.
+>
+> 
+>
+> 2. Click on `Sort...`.
+>
+> 3. Select `numbers` and click on `OK`.
+>
+> 
+>
+> 4. Above the table, click on `Sort` and select `Reorder rows permanently`.
+>
+> 
+>
+> 5. Click on the triangle left of the `Record ID` column. Hover over `Edit cells` and select `Blank down`.
+>
+> 
+>
+> 6. Click on the triangle left of the `Record ID` column. Hover over `Facet`, then move your mouse to `Customized facets` and select `Facet by blank (null or empty string)`.
+>
+> 
+>
+> 7. On the left, a new option appears under `Facet/Filter` with the title `Record ID`. Click on `true`.
+>
+> 
+>
+> 8. Click on the triangle to the left of the column called `All`. Hover over `Edit rows`, and select `remove matching rows`.
+>
+> 
+>
+> 9. Close the `Facet` by clicking on the cross (x) to see all rows.
+>
+{: .hands_on}
+
+>
+>
+> 1. How many rows have been removed?
+>
+> >
+> >
+> > 1. 84
+> >
+> {: .solution}
+{: .question}
+
+The dataset does not contain any more blank rows now. But we need to do more cleaning to improve the dataset.
+
+## Use GREL
+
+> Find and replace typos using GREL
+>
+> To remove the occurance of double pipe \|\| from the file we can do the following:
+> 1. Click on the triangle on the left of `Categories` and select `Text filter`.
+> 2. On the left, using the `Facet/Filter` section, search for the occurrence of \| and \|\|. There are 71061 rows with \| and 9 rows with \|\|. We want to remove these 9 lines as they are there by mistake.
+> 3. Click on the triangle on the left of `Categories`, hover over `edit cells`, and click on `Transform...`.
+> 4. In the new window, use the following text `value.replace('||', '|')` as "Expression" and click on `OK`.
+>
+> 
+>
+> We can also remove the double occurrence of the same for different entries as follows:
+>
+> 5. Click on the triangle on the left of `Categories`, hover over `edit cells`, and click on `Transform...`.
+>
+> 
+>
+> 
+>
+> 6. In the new window, use the following text `split('|').uniques().join('|')` as "Expression" and click on `OK`.value.
+>
+{: .hands_on}
+
+>
+>
+> 1. How many cells had duplicated categories?
+>
+> >
+> >
+> > 1. 1,668
+> >
+> {: .solution}
+{: .question}
+
+## Atomization
+
+> Atomization
+>
+> Once the duplicate records have been removed, we can have a closer look at the Categories column. Different categories are separated from each other by pipe (\|). Each entry can have more
+> than one category. In order to analyze in detail the use of the keywords, the values of the Categories column need to be split up into individual cells on the basis of the pipe character.
+> 1. Click on the triangle on the left of `Categories`, hover over `edit cells`, and click on `Split multi-valued cells...`.
+>
+> 
+>
+> 2. Define the `Separator` as `\|` (pipe). Click on `OK`.
+>
+> 
+>
+{: .hands_on}
+
+Are you ready for a little challenge? Let's investigate the categories column of the museum items.
+
+>
+>
+> 1. How many rows do you have after atomizing the categories column?
+> 2. How many entries do not have any category?
+>
+> >
+> >
+> > 1. 168,476
+> > 2. Click on the triangle on the left of `Categories` and hover over `facet` and move your mouse over `Customized facets`, and click on `Facet by blank (null or empty string)`. The `true` value for blank entries is 447.
+> >
+> {: .solution}
+{: .question}
+
+Now, let's use faceting based on text.
+
+## Faceting
+
+> Atomization
+>
+> 1. Click on the triangle on the left of `Categories`, hover over `facet`, and click on`Text facet`.
+> 2. On the left panel, it mentions the total number of choices. The default value of `count limit` is low for this dataset, and we should increase it. Click on `Set choice count limit`.
+>
+> 
+>
+> 3. Enter `5000` as the new limit and click on `Ok`.
+>
+> 
+>
+> 4. Now, you see all categories. Click on `count` to see the categories sorted in descending order.
+>
+> 
+>
+{: .hands_on}
+
+>
+>
+> 1. What are the top 3 categories? How many items are associated with each of them?
+>
+> >
+> >
+> > 1. Numismatics (8011), Ceramics (7389), and Clothing and Dress (7279)
+> >
+> {: .solution}
+{: .question}
+
+
+## Clustering
+
+The clustering allows you to solve issues regarding case inconsistencies, incoherent use of either the singular or plural form, and simple spelling mistakes.
+
+> Clustering of similar categories
+>
+> 1. Click on the `Cluster` button on the left in the `Facet/Filter` tab.
+> 2. Use `Key collision` as clustering method. Change the Keying function to `n-Gram fingerprint` and change the n-Gram size to `3`.
+>
+> 
+>
+> 3. Click on the `cluster` button in the middle window.
+>
+> 
+>
+> 4. Here, you can see different suggestions from OpenRefine to cluster different categories and merge them into one. In our tutorial, we merge all of the suggestions by clicking on `select > all` and then clicking on `Merge selected and re-cluster`.
+>
+> 
+>
+> 5. Now, you can close the clustering window by clicking on `close`.
+>
+> Be careful! Some methods are too aggressive, so you might end up clustering values that do not belong together. Now that the values have been clustered individually, we can put them back together in a single cell.
+> 6. Click the Categories triangle and hover over the `Edit cells` and click on `Join multi-valued cells`.
+> 7. Choose the pipe character (`\|`) as a separator and click on `OK`.
+> The rows now look like before, with a multi-valued Categories field.
+>
+{: .hands_on}
+
+When you’re happy with your analysis results, choose whether to export the dataset into your Galaxy history or download it directly onto your computer.
+
+## Exporting your data back to Galaxy
+
+> Exporting the results and history
+>
+> 1. Click on `Export` at the top of the table.
+> 2. Select `Galaxy exporter`. Wait a few seconds. In a new page, you will see a text as follows: "Dataset has been exported to Galaxy, please close this tab". When you see this, you can close that tab. Alternatively, you can download your cleaned dataset in various formats such as CSV, TSV, and Excel. You can also close the extra tab that contains OpenRefine and click on the orange item `OpenRefine on data [and a number]`. You do not need it for your next steps
+>
+> 
+>
+> 3. You can find a new dataset in your Galaxy History (with a green background) that contains your cleaned dataset for further analysis.
+> 4. You can click on the eye icon ({% icon galaxy-eye %}) and investigate the table.
+>
+> 
+>
+{: .hands_on}
+
+> Exporting the results and history
+> Additionally, you can download the tasks you performed using OpenRefine in JSON format. This way, you can import it later and reproduce the exact same analysis. To do so:
+> 1. Click on `Undo/Redo` on the left panel.
+> 2. Click on `Extract...`.
+>
+> 
+>
+> 3. Click on the steps that you want to extract. Here, we selected everything.
+> 4. Click on `Export`. Give your file a name to save it on your computer.
+>
+> 
+>
+{: .hands_on}
+
+# Run a Galaxy Workflow on your cleaned data
+
+Congratulations, you have successfully cleaned your data and improved its quality!
+But what can you do with it now?
+This depends on your aims as a researcher. For us, it is interesting to extract further information from the data.
+To make it easy for you, we created a so-called workflow, which links all the tools needed to do this analysis.
+We wanted to know, from what year the museum had the most objects and what they were.
+You can follow along and answer those questions with us, or explore the Galaxy tools on your own, to adapt the analysis to your needs.
+In this case, be sure to check out our other tutorials, particularly the introductory ones.
+
+## How to find and run existing workflows
+
+> Run a Galaxy workflow on your dataset
+>
+> There are different ways to import or create a workflow to Galaxy. For example, you can import a workflow from the registered workflows on [WorkflowHub](https://workflowhub.eu/) which is a registry for describing, sharing, and publishing scientific computational workflows. To do that, you have to navigate to the [WorkflowHub](https://workflowhub.eu/) and find the workflow of interest. In this tutorial, we are working [with this workflow](https://workflowhub.eu/workflows/1884?version=1). When you open the link to this workflow on WorkflowHub, you see the following page:
+>
+> 
+>
+> Please click on the `Run on Galaxy` button on top right. After doing this, you will be redirected to your Galaxy account and see the workflow automatically in your middle panel as follows:
+>
+> 
+>
+> Let's assume that you have imported a workflow to your Galaxy account.
+> 1. You can find all workflows available to you by clicking on the Workflows Icon ({% icon galaxy-workflows-activity %}) on the left panel.
+>
+> 
+>
+> 2. Then, you can select and run different workflows (if you have any workflows in your account). Here, let's click on the Run button ({% icon workflow-run %}) of the workflow we provided to you in this tutorial.
+>
+> 
+>
+> 3. Determine the inputs as follows:
+> Input: `openrefine-Galaxy file.tsv`
+> stop_words_english: `stop_words_english.txt`, which is the file we provided to you in this tutorial.
+>
+> 
+>
+> 5. Click on the `Run Workflow` button at the top.
+> 6. You can follow the stages of different jobs (computational tasks). They will be created, scheduled, executed, and completed. When everything is green, your workflow has run fully and the results are ready.
+>
+> 
+>
+{: .hands_on}
+
+What can you see here? To follow along, we made all substeps of the task available as outputs. To answer our question of what year most elements in the museum derive from, we first cut the column of production time from the table and filter only dates from the table that derive from specific years, not year ranges. Regular expressions help clean remaining inconsistencies in the dataset. Sorting the production date in descending order reveals that one faulty dataset that is supposed to be created in 2041 is part of the table. We remove it. Datamash allows for summing up how many elements came to the museum in what year. The ascending order, we visualise in a bar chart. To find out from what year most objects derive, we use another sorting order. We parse the input as a conditional statement to search for object descriptions from the objects of that year. In our case, this is 1969. From all object descriptions from 1969, we create a word cloud using the offered stop word list.
+As a result, we get that most objects from the museum are negatives from Davis Mist, which he created in that year and gave to the museum.
+
+
+
+# Conclusion
+
+Congratulations! You used OpenRefine to clean your data and ran a workflow from Galaxy with your results! You now know how to do basic steps in Galaxy, run OpenRefine as an interactive tool and get your data from Galaxy to OpenRefine and back. On the way, you have learned basic data cleaning, like facetting, to enhance the quality of your data. To extract further information from the cleaned data, running a pre-designed workflow showed you a glimpse into Galaxy. Of course, you can always do your own analysis with the tools most useful for you, instead.
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/workflows/Visualise-amount-of-objects-in-Museum-Collection-tests.yml b/topics/digital-humanities/tutorials/open-refine-tutorial/workflows/Visualise-amount-of-objects-in-Museum-Collection-tests.yml
new file mode 100644
index 00000000000000..531a601abc7618
--- /dev/null
+++ b/topics/digital-humanities/tutorials/open-refine-tutorial/workflows/Visualise-amount-of-objects-in-Museum-Collection-tests.yml
@@ -0,0 +1,21 @@
+- doc: Test outline for Visualise-amount-of-objects-in-Museum-Collection
+ job:
+ Input:
+ class: File
+ path: https://zenodo.org/records/17047254/files/phm_collection_adapted.tsv
+ filetype: tsv
+ stop_words_english:
+ class: File
+ path: https://zenodo.org/records/17047254/files/stopwords-en.txt?download=1
+ filetype: txt
+ outputs:
+ out_file1:
+ asserts:
+ has_size:
+ value: 5920
+ delta: 3000
+ output:
+ asserts:
+ has_size:
+ value: 29354
+ delta: 5000
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/workflows/Visualise-amount-of-objects-in-Museum-Collection.ga b/topics/digital-humanities/tutorials/open-refine-tutorial/workflows/Visualise-amount-of-objects-in-Museum-Collection.ga
new file mode 100644
index 00000000000000..94d4fb3e336edc
--- /dev/null
+++ b/topics/digital-humanities/tutorials/open-refine-tutorial/workflows/Visualise-amount-of-objects-in-Museum-Collection.ga
@@ -0,0 +1,712 @@
+{
+ "a_galaxy_workflow": "true",
+ "annotation": "Text mining a museum collection in tabular format to extract from which year most objects derive and what they are.",
+ "comments": [],
+ "creator": [
+ {
+ "class": "Person",
+ "identifier": "0000-0001-9536-5587",
+ "name": "Daniela Schneider"
+ }
+ ],
+ "format-version": "0.1",
+ "license": "CC-BY-4.0",
+ "name": "Visualise amount of objects in Museum Collection",
+ "report": {
+ "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n"
+ },
+ "steps": {
+ "0": {
+ "annotation": "Upload a tsv file with multiple rows and columns to compute on.",
+ "content_id": null,
+ "errors": null,
+ "id": 0,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "Upload a tsv file with multiple rows and columns to compute on.",
+ "name": "Input"
+ }
+ ],
+ "label": "Input",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 0,
+ "top": 108.34040475940378
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false, \"format\": [\"tsv\"], \"tag\": \"\"}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "a2709002-3e21-4d31-bd7d-5fc29a3d7dde",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "1": {
+ "annotation": "Upload a list of English stop words in a .txt-format.",
+ "content_id": null,
+ "errors": null,
+ "id": 1,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "Upload a list of English stop words in a .txt-format.",
+ "name": "stop_words_english"
+ }
+ ],
+ "label": "stop_words_english",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 1242.0063636474608,
+ "top": 873.9931030273438
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false, \"format\": [\"txt\"], \"tag\": \"\"}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "bd9773b3-9bb7-4172-9a5a-5c7f3c4d3442",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "2": {
+ "annotation": "",
+ "content_id": "Cut1",
+ "errors": null,
+ "id": 2,
+ "input_connections": {
+ "input": {
+ "id": 0,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Cut",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 337.97291638183583,
+ "top": 0
+ },
+ "post_job_actions": {},
+ "tool_id": "Cut1",
+ "tool_state": "{\"__input_ext\": \"tabular\", \"__workflow_invocation_uuid__\": \"60dca66c790311f0bed3b42e99728f38\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"columnList\": \"c6\", \"delimiter\": \"T\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.0.2",
+ "type": "tool",
+ "uuid": "52a9d785-ef3c-446c-a8aa-cf8145ad30c4",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "3": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/filter_tabular/filter_tabular/3.3.1",
+ "errors": null,
+ "id": 3,
+ "input_connections": {
+ "input": {
+ "id": 2,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Filter Tabular",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 338.9666748046875,
+ "top": 125
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/filter_tabular/filter_tabular/3.3.1",
+ "tool_shed_repository": {
+ "changeset_revision": "90f657745fea",
+ "name": "filter_tabular",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"tabular\", \"__workflow_invocation_uuid__\": \"60dca66c790311f0bed3b42e99728f38\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"comment_char\": false, \"input\": {\"__class__\": \"ConnectedValue\"}, \"linefilters\": [{\"__index__\": 0, \"filter\": {\"filter_type\": \"regex\", \"__current_case__\": 8, \"regex_pattern\": \"-\", \"regex_action\": \"exclude_find\"}}, {\"__index__\": 1, \"filter\": {\"filter_type\": \"regex\", \"__current_case__\": 8, \"regex_pattern\": \"\\\\d\", \"regex_action\": \"include_find\"}}, {\"__index__\": 2, \"filter\": {\"filter_type\": \"regex\", \"__current_case__\": 8, \"regex_pattern\": \"BC\", \"regex_action\": \"exclude_find\"}}], \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "3.3.1",
+ "type": "tool",
+ "uuid": "56d46f45-3d55-4408-80f4-9e5358a9c0d2",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "4": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regexColumn1/1.0.3",
+ "errors": null,
+ "id": 4,
+ "input_connections": {
+ "input": {
+ "id": 3,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Column Regex Find And Replace",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 339.9666748046875,
+ "top": 257
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regexColumn1/1.0.3",
+ "tool_shed_repository": {
+ "changeset_revision": "503bcd6ebe4b",
+ "name": "regex_find_replace",
+ "owner": "galaxyp",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"60dca66c790311f0bed3b42e99728f38\", \"checks\": [{\"__index__\": 0, \"pattern\": \"AD\", \"replacement\": \"\"}, {\"__index__\": 1, \"pattern\": \"\\\\d\\\\d/\\\\d\\\\d/\", \"replacement\": \"\"}], \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"field\": \"1\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.0.3",
+ "type": "tool",
+ "uuid": "c3186d08-94b6-4aef-b340-99bac5e803c9",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "5": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_sort_header_tool/9.5+galaxy2",
+ "errors": null,
+ "id": 5,
+ "input_connections": {
+ "infile": {
+ "id": 4,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Sort",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 335.966796875,
+ "top": 384
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_sort_header_tool/9.5+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "c41d78ae5fee",
+ "name": "text_processing",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"60dca66c790311f0bed3b42e99728f38\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"header\": \"0\", \"ignore_case\": false, \"infile\": {\"__class__\": \"ConnectedValue\"}, \"sortkeys\": [{\"__index__\": 0, \"column\": \"1\", \"order\": \"r\", \"style\": \"n\"}], \"unique\": false, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "9.5+galaxy2",
+ "type": "tool",
+ "uuid": "4439b057-4b48-49d6-a8cf-f2abf77188ae",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "6": {
+ "annotation": "",
+ "content_id": "Remove beginning1",
+ "errors": null,
+ "id": 6,
+ "input_connections": {
+ "input": {
+ "id": 5,
+ "output_name": "outfile"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Remove beginning",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 338.966796875,
+ "top": 500
+ },
+ "post_job_actions": {},
+ "tool_id": "Remove beginning1",
+ "tool_state": "{\"__input_ext\": \"tabular\", \"__workflow_invocation_uuid__\": \"60dca66c790311f0bed3b42e99728f38\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"num_lines\": \"1\", \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.0.0",
+ "type": "tool",
+ "uuid": "b89c73d5-f7b9-4803-b82d-e78def5b9c99",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "7": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/datamash_ops/datamash_ops/1.9+galaxy0",
+ "errors": null,
+ "id": 7,
+ "input_connections": {
+ "in_file": {
+ "id": 6,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Datamash",
+ "outputs": [
+ {
+ "name": "out_file",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 339.9666748046875,
+ "top": 634
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/datamash_ops/datamash_ops/1.9+galaxy0",
+ "tool_shed_repository": {
+ "changeset_revision": "61e2aa6bb55d",
+ "name": "datamash_ops",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"60dca66c790311f0bed3b42e99728f38\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"grouping\": \"1\", \"header_in\": false, \"header_out\": false, \"ignore_case\": false, \"in_file\": {\"__class__\": \"ConnectedValue\"}, \"narm\": false, \"need_sort\": true, \"operations\": [{\"__index__\": 0, \"op_name\": \"count\", \"op_column\": \"1\"}], \"print_full_line\": false, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.9+galaxy0",
+ "type": "tool",
+ "uuid": "d9ac1c7f-a98a-4063-ac9c-990ce333695d",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "8": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_sort_header_tool/9.5+galaxy2",
+ "errors": null,
+ "id": 8,
+ "input_connections": {
+ "infile": {
+ "id": 7,
+ "output_name": "out_file"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Sort",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 343.9666748046875,
+ "top": 753
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_sort_header_tool/9.5+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "c41d78ae5fee",
+ "name": "text_processing",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"60dca66c790311f0bed3b42e99728f38\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"header\": \"0\", \"ignore_case\": false, \"infile\": {\"__class__\": \"ConnectedValue\"}, \"sortkeys\": [{\"__index__\": 0, \"column\": \"1\", \"order\": \"\", \"style\": \"n\"}], \"unique\": false, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "9.5+galaxy2",
+ "type": "tool",
+ "uuid": "1509daf6-c5d0-489f-af5a-cc3c362075c0",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "9": {
+ "annotation": "",
+ "content_id": "barchart_gnuplot",
+ "errors": null,
+ "id": 9,
+ "input_connections": {
+ "input": {
+ "id": 8,
+ "output_name": "outfile"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Bar chart",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 597.9113977058988,
+ "top": 703.3195725430694
+ },
+ "post_job_actions": {},
+ "tool_id": "barchart_gnuplot",
+ "tool_state": "{\"__input_ext\": \"tabular\", \"__workflow_invocation_uuid__\": \"60dca66c790311f0bed3b42e99728f38\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"colList\": [\"2\"], \"input\": {\"__class__\": \"ConnectedValue\"}, \"pdf_size\": \"800,600\", \"title\": \"Amount of objects \", \"xtic\": {\"userSpecified\": \"Yes\", \"__current_case__\": 0, \"xticColumn\": \"1\"}, \"ylabel\": \"V1\", \"ymax\": \"0\", \"ymin\": \"0\", \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.0.0",
+ "type": "tool",
+ "uuid": "36e40ec4-3ace-4c00-853d-de94cf27e30c",
+ "when": null,
+ "workflow_outputs": [
+ {
+ "label": "out_file1",
+ "output_name": "out_file1",
+ "uuid": "204d5ba9-e4d0-4bfd-a421-43b71f361bc9"
+ }
+ ]
+ },
+ "10": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_sort_header_tool/9.5+galaxy2",
+ "errors": null,
+ "id": 10,
+ "input_connections": {
+ "infile": {
+ "id": 8,
+ "output_name": "outfile"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Sort",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 598.2704577949555,
+ "top": 840.067017546533
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_sort_header_tool/9.5+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "c41d78ae5fee",
+ "name": "text_processing",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"header\": \"0\", \"ignore_case\": false, \"infile\": {\"__class__\": \"ConnectedValue\"}, \"sortkeys\": [{\"__index__\": 0, \"column\": \"2\", \"order\": \"r\", \"style\": \"n\"}], \"unique\": false, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "9.5+galaxy2",
+ "type": "tool",
+ "uuid": "d565d42c-07fe-457f-92a2-d5bbe69ba8cb",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "11": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_head_tool/9.5+galaxy2",
+ "errors": null,
+ "id": 11,
+ "input_connections": {
+ "infile": {
+ "id": 10,
+ "output_name": "outfile"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool Select first",
+ "name": "infile"
+ }
+ ],
+ "label": null,
+ "name": "Select first",
+ "outputs": [
+ {
+ "name": "outfile",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 599.0110140921608,
+ "top": 949.1107476960407
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_head_tool/9.5+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "c41d78ae5fee",
+ "name": "text_processing",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"complement\": \"\", \"count\": \"1\", \"infile\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "9.5+galaxy2",
+ "type": "tool",
+ "uuid": "25fe3162-c931-47c8-8136-11f3138feae0",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "12": {
+ "annotation": "",
+ "content_id": "Cut1",
+ "errors": null,
+ "id": 12,
+ "input_connections": {
+ "input": {
+ "id": 11,
+ "output_name": "outfile"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool Cut",
+ "name": "input"
+ }
+ ],
+ "label": null,
+ "name": "Cut",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 597.6645182123016,
+ "top": 1079.69832973971
+ },
+ "post_job_actions": {},
+ "tool_id": "Cut1",
+ "tool_state": "{\"columnList\": \"c1\", \"delimiter\": \"T\", \"input\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.0.2",
+ "type": "tool",
+ "uuid": "5f1c21b5-1322-4f4f-bcb0-1e0b3930b09b",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "13": {
+ "annotation": "",
+ "content_id": "param_value_from_file",
+ "errors": null,
+ "id": 13,
+ "input_connections": {
+ "input1": {
+ "id": 12,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool Parse parameter value",
+ "name": "input1"
+ }
+ ],
+ "label": null,
+ "name": "Parse parameter value",
+ "outputs": [
+ {
+ "name": "text_param",
+ "type": "expression.json"
+ }
+ ],
+ "position": {
+ "left": 592.2785346928646,
+ "top": 1184.8821433859616
+ },
+ "post_job_actions": {},
+ "tool_id": "param_value_from_file",
+ "tool_state": "{\"input1\": {\"__class__\": \"RuntimeValue\"}, \"param_type\": \"text\", \"remove_newlines\": true, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "0.1.0",
+ "type": "tool",
+ "uuid": "49d07ed7-9e98-47c5-9977-76c3a07e774a",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "14": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/compose_text_param/compose_text_param/0.1.1",
+ "errors": null,
+ "id": 14,
+ "input_connections": {
+ "components_1|param_type|component_value": {
+ "id": 13,
+ "output_name": "text_param"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Compose text parameter value",
+ "outputs": [
+ {
+ "name": "out1",
+ "type": "expression.json"
+ }
+ ],
+ "position": {
+ "left": 594.9715264525831,
+ "top": 1354.4975363800722
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/compose_text_param/compose_text_param/0.1.1",
+ "tool_shed_repository": {
+ "changeset_revision": "e188c9826e0f",
+ "name": "compose_text_param",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"components\": [{\"__index__\": 0, \"param_type\": {\"select_param_type\": \"text\", \"__current_case__\": 0, \"component_value\": \"\\\\t\"}}, {\"__index__\": 1, \"param_type\": {\"select_param_type\": \"text\", \"__current_case__\": 0, \"component_value\": {\"__class__\": \"ConnectedValue\"}}}, {\"__index__\": 2, \"param_type\": {\"select_param_type\": \"text\", \"__current_case__\": 0, \"component_value\": \"\\\\t\"}}], \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "0.1.1",
+ "type": "tool",
+ "uuid": "1b7bab4e-abed-4ab2-b5ef-9568646559c7",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "15": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_grep_tool/9.5+galaxy2",
+ "errors": null,
+ "id": 15,
+ "input_connections": {
+ "infile": {
+ "id": 0,
+ "output_name": "output"
+ },
+ "url_paste": {
+ "id": 14,
+ "output_name": "out1"
+ }
+ },
+ "inputs": [
+ {
+ "description": "runtime parameter for tool Search in textfiles",
+ "name": "infile"
+ }
+ ],
+ "label": null,
+ "name": "Search in textfiles",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "input"
+ }
+ ],
+ "position": {
+ "left": 932.9419922972561,
+ "top": 911.5003919063773
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_grep_tool/9.5+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "c41d78ae5fee",
+ "name": "text_processing",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"case_sensitive\": \"-i\", \"color\": \"NOCOLOR\", \"infile\": {\"__class__\": \"RuntimeValue\"}, \"invert\": \"\", \"lines_after\": \"0\", \"lines_before\": \"0\", \"regex_type\": \"-P\", \"url_paste\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "9.5+galaxy2",
+ "type": "tool",
+ "uuid": "84648a64-f0ce-4b99-9fdc-19a7c778c667",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "16": {
+ "annotation": "",
+ "content_id": "Cut1",
+ "errors": null,
+ "id": 16,
+ "input_connections": {
+ "input": {
+ "id": 15,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Cut",
+ "outputs": [
+ {
+ "name": "out_file1",
+ "type": "tabular"
+ }
+ ],
+ "position": {
+ "left": 1242.0063636474608,
+ "top": 723.9931030273438
+ },
+ "post_job_actions": {},
+ "tool_id": "Cut1",
+ "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"columnList\": \"c2\", \"delimiter\": \"T\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.0.2",
+ "type": "tool",
+ "uuid": "83af80e6-e4a8-4654-8bc6-e169cb4f409b",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "17": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/wordcloud/wordcloud/1.9.4+galaxy2",
+ "errors": null,
+ "id": 17,
+ "input_connections": {
+ "stopwords": {
+ "id": 1,
+ "output_name": "output"
+ },
+ "text": {
+ "id": 16,
+ "output_name": "out_file1"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Generate a word cloud",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 1542.0063636474608,
+ "top": 763.9931030273438
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/wordcloud/wordcloud/1.9.4+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "57e3de4b3b0d",
+ "name": "wordcloud",
+ "owner": "bgruening",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"background\": \"#000000\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"color_choice\": {\"color_option\": \"color\", \"__current_case__\": 0, \"color\": \"#00ff00\"}, \"colormap\": null, \"contour_color\": \"#000000\", \"contour_width\": \"0.0\", \"font_step\": \"1\", \"fonts\": {\"fonts_selector\": \"none\", \"__current_case__\": 0}, \"height\": \"200\", \"include_numbers\": false, \"margin\": \"2\", \"mask\": null, \"max_font_size\": null, \"max_words\": \"200\", \"min_font_size\": \"4\", \"min_word_length\": \"0\", \"mode\": null, \"no_collocations\": true, \"no_normalize_plurals\": true, \"prefer_horizontal\": \"0.3\", \"random_state\": \"10\", \"relative_scaling\": \"0.8\", \"repeat\": false, \"scale\": \"1.0\", \"stopwords\": {\"__class__\": \"ConnectedValue\"}, \"text\": {\"__class__\": \"ConnectedValue\"}, \"width\": \"400\", \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.9.4+galaxy2",
+ "type": "tool",
+ "uuid": "98ed645e-d419-472d-8b61-25261cc3feb8",
+ "when": null,
+ "workflow_outputs": [
+ {
+ "label": "output",
+ "output_name": "output",
+ "uuid": "db4e9713-11f9-4f7b-b2a6-f0661e1f46f8"
+ }
+ ]
+ }
+ },
+ "tags": [
+ "openrefine",
+ "culturedata",
+ "4culture",
+ "visualisation",
+ "tabulardata",
+ "textcleaning"
+ ],
+ "uuid": "a27aad7b-c301-44c9-8098-9084df0e6950",
+ "version": 33
+}
\ No newline at end of file
diff --git a/topics/digital-humanities/tutorials/open-refine-tutorial/workflows/index.md b/topics/digital-humanities/tutorials/open-refine-tutorial/workflows/index.md
new file mode 100644
index 00000000000000..e092e0ae66ddd4
--- /dev/null
+++ b/topics/digital-humanities/tutorials/open-refine-tutorial/workflows/index.md
@@ -0,0 +1,3 @@
+---
+layout: workflow-list
+---