diff --git a/frontend/docs/docs/user-guide/workflow-setup.md b/frontend/docs/docs/user-guide/workflow-setup.md index d81e5bc61d..c2ea445d34 100644 --- a/frontend/docs/docs/user-guide/workflow-setup.md +++ b/frontend/docs/docs/user-guide/workflow-setup.md @@ -78,7 +78,9 @@ Crawl scopes are categorized as a **Page Crawl** or **Site Crawl**: ### Page URL(s) -One or more URLs of the page to crawl. URLs must follow [valid URL syntax](https://www.w3.org/Addressing/URL/url-spec.html). For example, if you're crawling a page that can be accessed on the public internet, your URL should start with `http://` or `https://`. +One or more URLs of the pages to crawl, visible when using a crawl scope of _Single Page_ or _List of Pages_. URLs will be crawled in the order that they are specified. + +URLs must follow [valid URL syntax](https://www.w3.org/Addressing/URL/url-spec.html). For example, if you're crawling a page that can be accessed on the public internet, your URL should start with `http://` or `https://`. See [List Of Pages](#list-of-pages) for additional info when providing a list of URLs. @@ -90,7 +92,7 @@ See [List Of Pages](#list-of-pages) for additional info when providing a list of ### Crawl Start URL -This is the first page that the crawler will visit. _Site Crawl_ scopes are based on this URL. +This is the first page that the crawler will visit. When using a crawl scope of _In-Page Links_, _Pages in Same Directory_, _Pages on Same Domain_, or _Pages on Same Domain + Subdomains_, this URL is the basis for determining whether a linked URL is within scope and should be crawled. ### Include Any Linked Page @@ -349,7 +351,9 @@ Describe and organize your crawl workflow and the resulting archived items. ### Name -Allows a custom name to be set for the workflow. If no name is set, the workflow's name will be set to the _Crawl Start URL_. For Page List crawls, the workflow's name will be set to the first URL present in the _Crawl URL(s)_ field, with an added `(+x)` where `x` represents the total number of URLs in the list. +Allows a custom name to be set for the workflow. + +If no name is set, the workflow's name will be set to the first page URL specified in _Scope_ (also referred to as the crawl start URL.) For _Single Page_ and _List of Pages_ crawls, the workflow's name will be suffixed by `+ N` where `N` represents the number of page URLs in addition to the crawl start URL. ### Description diff --git a/frontend/src/features/collections/collection-items-dialog.ts b/frontend/src/features/collections/collection-items-dialog.ts index b6f68842a0..676aa98497 100644 --- a/frontend/src/features/collections/collection-items-dialog.ts +++ b/frontend/src/features/collections/collection-items-dialog.ts @@ -50,7 +50,7 @@ const crawlSortOptions: SortOptions = [ }, { field: "firstSeed", - label: msg("Crawl Start URL"), + label: msg("First Page URL"), defaultDirection: 1, }, ]; diff --git a/frontend/src/features/crawl-workflows/workflow-action-menu/workflow-action-menu.ts b/frontend/src/features/crawl-workflows/workflow-action-menu/workflow-action-menu.ts index 87093a59de..b5565348f7 100644 --- a/frontend/src/features/crawl-workflows/workflow-action-menu/workflow-action-menu.ts +++ b/frontend/src/features/crawl-workflows/workflow-action-menu/workflow-action-menu.ts @@ -177,6 +177,13 @@ export class WorkflowActionMenu extends BtrixElement { `, )} + ClipboardController.copyToClipboard(workflow.firstSeed)} + > + + ${msg("Copy First Page URL")} + + ClipboardController.copyToClipboard(workflow.tags.join(", "))} diff --git a/frontend/src/features/crawl-workflows/workflow-editor.ts b/frontend/src/features/crawl-workflows/workflow-editor.ts index 2ec76e8a98..b823ee8f67 100644 --- a/frontend/src/features/crawl-workflows/workflow-editor.ts +++ b/frontend/src/features/crawl-workflows/workflow-editor.ts @@ -2165,6 +2165,20 @@ https://archiveweb.page/images/${"logo.svg"}`} }; private renderJobMetadata() { + const link_to_scope = html``; + return html` ${inputCol(html` `)} ${this.renderHelpTextCol( - msg(`Customize this Workflow's name. Workflows are named after - the first Crawl URL by default.`), + html`${msg(`Customize the name of this workflow.`)} + ${msg( + html`If omitted, the workflow will be named after the first page URL + specified in ${link_to_scope}.`, + )} `, )} ${inputCol(html` = { name: msg("Name"), - firstSeed: msg("Crawl Start URL"), + firstSeed: msg("First Page URL"), }; @property({ type: Boolean }) @@ -540,8 +540,8 @@ export class CrawlsList extends BtrixElement { placeholder=${this.itemType === "upload" ? msg("Search all uploads by name") : this.itemType === "crawl" - ? msg("Search all crawls by name or Crawl Start URL") - : msg("Search all items by name or Crawl Start URL")} + ? msg("Search all crawls by name or first page URL") + : msg("Search all items by name or first page URL")} @btrix-select=${(e: CustomEvent) => { const { key, value } = e.detail; this.filterBy = { diff --git a/frontend/src/pages/org/workflows-list.ts b/frontend/src/pages/org/workflows-list.ts index 857fd69a7c..229370a30a 100644 --- a/frontend/src/pages/org/workflows-list.ts +++ b/frontend/src/pages/org/workflows-list.ts @@ -21,7 +21,6 @@ import type { } from "@/components/ui/filter-chip"; import { parsePage, type PageChangeEvent } from "@/components/ui/pagination"; import { type SelectEvent } from "@/components/ui/search-combobox"; -import { ClipboardController } from "@/controllers/clipboard"; import { SearchParamsController } from "@/controllers/searchParams"; import type { SelectJobTypeEvent } from "@/features/crawl-workflows/new-workflow-dialog"; import { @@ -42,7 +41,6 @@ import { } from "@/types/workflow"; import { isApiError } from "@/utils/api"; import { settingsForDuplicate } from "@/utils/crawl-workflows/settingsForDuplicate"; -import { isArchivingDisabled } from "@/utils/orgs"; import { tw } from "@/utils/tailwind"; type SearchFields = "name" | "firstSeed"; @@ -77,7 +75,7 @@ const sortableFields: Record< defaultDirection: "asc", }, firstSeed: { - label: msg("Crawl Start URL"), + label: msg("First Page URL"), defaultDirection: "asc", }, created: { @@ -107,7 +105,7 @@ const USED_FILTERS = [ export class WorkflowsList extends BtrixElement { static FieldLabels: Record = { name: msg("Name"), - firstSeed: msg("Crawl Start URL"), + firstSeed: msg("First Page URL"), }; @state() @@ -744,7 +742,7 @@ export class WorkflowsList extends BtrixElement { .searchOptions=${this.searchOptions} .keyLabels=${WorkflowsList.FieldLabels} selectedKey=${ifDefined(this.selectedSearchFilterKey)} - placeholder=${msg("Search all Workflows by name or Crawl Start URL")} + placeholder=${msg("Search all workflows by name or first page URL")} @btrix-select=${(e: SelectEvent) => { const { key, value } = e.detail; if (key == null) return; @@ -850,138 +848,6 @@ export class WorkflowsList extends BtrixElement { `; - private renderMenu(workflow: ListWorkflow) { - return html` - ${when( - workflow.isCrawlRunning && this.appState.isCrawler, - // HACK shoelace doesn't current have a way to override non-hover - // color without resetting the --sl-color-neutral-700 variable - () => html` - void this.stop(workflow.lastCrawlId)} - ?disabled=${workflow.lastCrawlStopping} - > - - ${msg("Stop Crawl")} - - void this.cancel(workflow.lastCrawlId)} - > - - ${msg(html`Cancel & Discard Crawl`)} - - `, - )} - ${when( - this.appState.isCrawler && !workflow.isCrawlRunning, - () => html` - void this.runNow(workflow)} - > - - ${msg("Run Crawl")} - - `, - )} - ${when( - this.appState.isCrawler && - workflow.isCrawlRunning && - !workflow.lastCrawlStopping, - // HACK shoelace doesn't current have a way to override non-hover - // color without resetting the --sl-color-neutral-700 variable - () => html` - - - this.navigate.to( - `${this.navigate.orgBasePath}/workflows/${workflow.id}/${WorkflowTab.LatestCrawl}`, - { - dialog: "scale", - }, - )} - > - - ${msg("Edit Browser Windows")} - - - this.navigate.to( - `${this.navigate.orgBasePath}/workflows/${workflow.id}/${WorkflowTab.LatestCrawl}`, - { - dialog: "exclusions", - }, - )} - > - - ${msg("Edit Exclusions")} - - - `, - )} - ${when( - this.appState.isCrawler, - () => - html` - this.navigate.to( - `${this.navigate.orgBasePath}/workflows/${workflow.id}?edit`, - )} - > - - ${msg("Edit Workflow Settings")} - `, - )} - - ClipboardController.copyToClipboard(workflow.tags.join(", "))} - ?disabled=${!workflow.tags.length} - > - - ${msg("Copy Tags")} - - ${when( - this.appState.isCrawler, - () => html` - void this.duplicateConfig(workflow)} - > - - ${msg("Duplicate Workflow")} - - - ClipboardController.copyToClipboard(workflow.id)} - > - - ${msg("Copy Workflow ID")} - - ${when( - !workflow.crawlCount, - () => html` - - { - this.workflowToDelete = workflow; - await this.updateComplete; - void this.deleteDialog?.show(); - }} - > - - ${msg("Delete Workflow")} - - `, - )} - `, - )} - `; - } - private renderName(crawlConfig: ListWorkflow) { if (crawlConfig.name) return crawlConfig.name; const { firstSeed, seedCount } = crawlConfig;