iterative
diff --git a/‎src/datachain/client/fsspec.py‎
Lines changed: 11 additions & 0 deletions b/‎src/datachain/client/fsspec.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎src/datachain/lib/dc/storage.py‎
Lines changed: 67 additions & 25 deletions b/‎src/datachain/lib/dc/storage.py‎
Lines changed: 67 additions & 25 deletions
@@ -44,6 +44,7 @@
 DELIMITER = "/"  # Path delimiter.
 
 DATA_SOURCE_URI_PATTERN = re.compile(r"^[\w]+:\/\/.*$")
+CLOUD_STORAGE_PROTOCOLS = {"s3", "gs", "az", "hf"}
 
 ResultQueue = asyncio.Queue[Optional[Sequence["File"]]]
 
@@ -62,6 +63,16 @@ def _is_win_local_path(uri: str) -> bool:
     return False
 
 
+def is_cloud_uri(uri: str) -> bool:
+    protocol = urlparse(uri).scheme
+    return protocol in CLOUD_STORAGE_PROTOCOLS
+
+
+def get_cloud_schemes() -> list[str]:
+    """Get list of cloud storage scheme prefixes."""
+    return [f"{p}://" for p in CLOUD_STORAGE_PROTOCOLS]
+
+
 class Bucket(NamedTuple):
     name: str
     uri: "StorageURI"
 
@@ -3,6 +3,13 @@
 from functools import reduce
 from typing import TYPE_CHECKING, Optional, Union
 
+from datachain.lib.dc.storage_pattern import (
+    apply_glob_filter,
+    expand_brace_pattern,
+    should_use_recursion,
+    split_uri_pattern,
+    validate_cloud_bucket_name,
+)
 from datachain.lib.file import FileType, get_file_type
 from datachain.lib.listing import get_file_info, get_listing, list_bucket, ls
 from datachain.query import Session
@@ -38,14 +45,18 @@ def read_storage(
     It returns the chain itself as usual.
 
     Parameters:
-        uri: storage URI with directory or list of URIs.
-            URIs must start with storage prefix such
-            as `s3://`, `gs://`, `az://` or "file:///"
+        uri: Storage path(s) or URI(s). Can be a local path or start with a
+            storage prefix like `s3://`, `gs://`, `az://`, `hf://` or "file:///".
+            Supports glob patterns:
+              - `*` : wildcard
+              - `**` : recursive wildcard
+              - `?` : single character
+              - `{a,b}` : brace expansion
         type: read file as "binary", "text", or "image" data. Default is "binary".
         recursive: search recursively for the given path.
-        column: Created column name.
+        column: Column name that will contain File objects. Default is "file".
         update: force storage reindexing. Default is False.
-        anon: If True, we will treat cloud bucket as public one
+        anon: If True, we will treat cloud bucket as public one.
         client_config: Optional client configuration for the storage client.
         delta: If True, only process new or changed files instead of reprocessing
             everything. This saves time by skipping files that were already processed in
@@ -80,12 +91,19 @@ def read_storage(
         chain = dc.read_storage("s3://my-bucket/my-dir")
         ```
 
+        Match all .json files recursively using glob pattern
+        ```py
+        chain = dc.read_storage("gs://bucket/meta/**/*.json")
+        ```
+
+        Match image file extensions for directories with pattern
+        ```py
+        chain = dc.read_storage("s3://bucket/202?/**/*.{jpg,jpeg,png}")
+        ```
+
         Multiple URIs:
         ```python
-        chain = dc.read_storage([
-            "s3://bucket1/dir1",
-            "s3://bucket2/dir2"
-        ])
+        chain = dc.read_storage(["s3://my-bkt/dir1", "s3://bucket2/dir2/dir3"])
         ```
 
         With AWS S3-compatible storage:
@@ -95,19 +113,6 @@ def read_storage(
             client_config = {"aws_endpoint_url": "<minio-endpoint-url>"}
         )
         ```
-
-        Pass existing session
-        ```py
-        session = Session.get()
-        chain = dc.read_storage([
-            "path/to/dir1",
-            "path/to/dir2"
-        ], session=session, recursive=True)
-        ```
-
-    Note:
-        When using multiple URIs with `update=True`, the function optimizes by
-        avoiding redundant updates for URIs pointing to the same storage location.
     """
     from .datachain import DataChain
     from .datasets import read_dataset
@@ -130,13 +135,36 @@ def read_storage(
     if not uris:
         raise ValueError("No URIs provided")
 
+    # Then expand all URIs that contain brace patterns
+    expanded_uris = []
+    for single_uri in uris:
+        uri_str = str(single_uri)
+        validate_cloud_bucket_name(uri_str)
+        expanded_uris.extend(expand_brace_pattern(uri_str))
+
+    # Now process each expanded URI
     chains = []
     listed_ds_name = set()
     file_values = []
 
-    for single_uri in uris:
+    updated_uris = set()
+
+    for single_uri in expanded_uris:
+        # Check if URI contains glob patterns and split them
+        base_uri, glob_pattern = split_uri_pattern(single_uri)
+
+        # If a pattern is found, use the base_uri for listing
+        # The pattern will be used for filtering later
+        list_uri_to_use = base_uri if glob_pattern else single_uri
+
+        # Avoid double updates for the same URI
+        update_single_uri = False
+        if update and (list_uri_to_use not in updated_uris):
+            updated_uris.add(list_uri_to_use)
+            update_single_uri = True
+
         list_ds_name, list_uri, list_path, list_ds_exists = get_listing(
-            single_uri, session, update=update
+            list_uri_to_use, session, update=update_single_uri
         )
 
         # list_ds_name is None if object is a file, we don't want to use cache
@@ -185,7 +213,21 @@ def lst_fn(ds_name, lst_uri):
                 lambda ds_name=list_ds_name, lst_uri=list_uri: lst_fn(ds_name, lst_uri)
             )
 
-        chains.append(ls(dc, list_path, recursive=recursive, column=column))
+        # If a glob pattern was detected, use it for filtering
+        # Otherwise, use the original list_path from get_listing
+        if glob_pattern:
+            # Determine if we should use recursive listing based on the pattern
+            use_recursive = should_use_recursion(glob_pattern, recursive or False)
+
+            # Apply glob filter - no need for brace expansion here as it's done above
+            chain = apply_glob_filter(
+                dc, glob_pattern, list_path, use_recursive, column
+            )
+            chains.append(chain)
+        else:
+            # No glob pattern detected, use normal ls behavior
+            chains.append(ls(dc, list_path, recursive=recursive, column=column))
+
         listed_ds_name.add(list_ds_name)
 
     storage_chain = None if not chains else reduce(lambda x, y: x.union(y), chains)