From 7be1b5021ed61ab25061c108abfdd6ea456f4e18 Mon Sep 17 00:00:00 2001 From: Contributor Date: Fri, 6 Mar 2026 04:17:17 +0000 Subject: [PATCH 1/3] docs: fix typos and grammar in base converter documentation - Fix 'based' -> 'based on' for proper grammar - Fix 'steam_info.url' -> 'stream_info.url' (typo) - Fix 'used to in cases' -> 'used in cases' (grammar) - Fix duplicate 'MUST be reset' text - Fix 'advances' -> 'advance' (subject-verb agreement) - Fix 'case' -> 'cases' (pluralization) --- packages/markitdown/src/markitdown/__main__.py | 2 +- packages/markitdown/src/markitdown/_base_converter.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/markitdown/src/markitdown/__main__.py b/packages/markitdown/src/markitdown/__main__.py index 6085ad6bb..0adf93d72 100644 --- a/packages/markitdown/src/markitdown/__main__.py +++ b/packages/markitdown/src/markitdown/__main__.py @@ -74,7 +74,7 @@ def main(): parser.add_argument( "-c", "--charset", - help="Provide a hint about the file's charset (e.g, UTF-8).", + help="Provide a hint about the file's charset (e.g., UTF-8).", ) parser.add_argument( diff --git a/packages/markitdown/src/markitdown/_base_converter.py b/packages/markitdown/src/markitdown/_base_converter.py index fa2b11145..4751ca9f1 100644 --- a/packages/markitdown/src/markitdown/_base_converter.py +++ b/packages/markitdown/src/markitdown/_base_converter.py @@ -50,18 +50,18 @@ def accepts( ) -> bool: """ Return a quick determination on if the converter should attempt converting the document. - This is primarily based `stream_info` (typically, `stream_info.mimetype`, `stream_info.extension`). - In cases where the data is retrieved via HTTP, the `steam_info.url` might also be referenced to + This is primarily based on `stream_info` (typically, `stream_info.mimetype`, `stream_info.extension`). + In cases where the data is retrieved via HTTP, the `stream_info.url` might also be referenced to make a determination (e.g., special converters for Wikipedia, YouTube etc). - Finally, it is conceivable that the `stream_info.filename` might be used to in cases + Finally, it is conceivable that the `stream_info.filename` might be used in cases where the filename is well-known (e.g., `Dockerfile`, `Makefile`, etc) NOTE: The method signature is designed to match that of the convert() method. This provides some assurance that, if accepts() returns True, the convert() method will also be able to handle the document. IMPORTANT: In rare cases, (e.g., OutlookMsgConverter) we need to read more from the stream to make a final - determination. Read operations inevitably advances the position in file_stream. In these case, the position - MUST be reset it MUST be reset before returning. This is because the convert() method may be called immediately + determination. Read operations inevitably advance the position in file_stream. In these cases, the position + MUST be reset before returning. This is because the convert() method may be called immediately after accepts(), and will expect the file_stream to be at the original position. E.g., From d0d5c14c1b22346fbcac4b75bfe4e26a86f90091 Mon Sep 17 00:00:00 2001 From: Contributor Date: Fri, 6 Mar 2026 04:19:08 +0000 Subject: [PATCH 2/3] fix: remove unused mammoth import from PlainTextConverter The PlainTextConverter had an unused mammoth import with exception handling that appears to be a copy-paste error from the docx converter. mammoth is not used by this converter. --- .../src/markitdown/converters/_plain_text_converter.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/packages/markitdown/src/markitdown/converters/_plain_text_converter.py b/packages/markitdown/src/markitdown/converters/_plain_text_converter.py index 6f1306fe8..ff6d75e62 100644 --- a/packages/markitdown/src/markitdown/converters/_plain_text_converter.py +++ b/packages/markitdown/src/markitdown/converters/_plain_text_converter.py @@ -5,15 +5,6 @@ from .._base_converter import DocumentConverter, DocumentConverterResult from .._stream_info import StreamInfo -# Try loading optional (but in this case, required) dependencies -# Save reporting of any exceptions for later -_dependency_exc_info = None -try: - import mammoth # noqa: F401 -except ImportError: - # Preserve the error and stack trace for later - _dependency_exc_info = sys.exc_info() - ACCEPTED_MIME_TYPE_PREFIXES = [ "text/", "application/json", From ae828d8ca0e2802cc20a9b9ef63311e85edbe62a Mon Sep 17 00:00:00 2001 From: Contributor Date: Fri, 6 Mar 2026 04:54:50 +0000 Subject: [PATCH 3/3] docs: quote extras install examples in package README --- packages/markitdown/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/markitdown/README.md b/packages/markitdown/README.md index edd270166..e52b95cd3 100644 --- a/packages/markitdown/README.md +++ b/packages/markitdown/README.md @@ -10,7 +10,7 @@ From PyPI: ```bash -pip install markitdown[all] +pip install 'markitdown[all]' ``` From source: @@ -18,7 +18,7 @@ From source: ```bash git clone git@github.com:microsoft/markitdown.git cd markitdown -pip install -e packages/markitdown[all] +pip install -e 'packages/markitdown[all]' ``` ## Usage