diff --git a/packages/markitdown/README.md b/packages/markitdown/README.md index edd270166..e52b95cd3 100644 --- a/packages/markitdown/README.md +++ b/packages/markitdown/README.md @@ -10,7 +10,7 @@ From PyPI: ```bash -pip install markitdown[all] +pip install 'markitdown[all]' ``` From source: @@ -18,7 +18,7 @@ From source: ```bash git clone git@github.com:microsoft/markitdown.git cd markitdown -pip install -e packages/markitdown[all] +pip install -e 'packages/markitdown[all]' ``` ## Usage diff --git a/packages/markitdown/src/markitdown/__main__.py b/packages/markitdown/src/markitdown/__main__.py index 6085ad6bb..0adf93d72 100644 --- a/packages/markitdown/src/markitdown/__main__.py +++ b/packages/markitdown/src/markitdown/__main__.py @@ -74,7 +74,7 @@ def main(): parser.add_argument( "-c", "--charset", - help="Provide a hint about the file's charset (e.g, UTF-8).", + help="Provide a hint about the file's charset (e.g., UTF-8).", ) parser.add_argument( diff --git a/packages/markitdown/src/markitdown/_base_converter.py b/packages/markitdown/src/markitdown/_base_converter.py index fa2b11145..4751ca9f1 100644 --- a/packages/markitdown/src/markitdown/_base_converter.py +++ b/packages/markitdown/src/markitdown/_base_converter.py @@ -50,18 +50,18 @@ def accepts( ) -> bool: """ Return a quick determination on if the converter should attempt converting the document. - This is primarily based `stream_info` (typically, `stream_info.mimetype`, `stream_info.extension`). - In cases where the data is retrieved via HTTP, the `steam_info.url` might also be referenced to + This is primarily based on `stream_info` (typically, `stream_info.mimetype`, `stream_info.extension`). + In cases where the data is retrieved via HTTP, the `stream_info.url` might also be referenced to make a determination (e.g., special converters for Wikipedia, YouTube etc). - Finally, it is conceivable that the `stream_info.filename` might be used to in cases + Finally, it is conceivable that the `stream_info.filename` might be used in cases where the filename is well-known (e.g., `Dockerfile`, `Makefile`, etc) NOTE: The method signature is designed to match that of the convert() method. This provides some assurance that, if accepts() returns True, the convert() method will also be able to handle the document. IMPORTANT: In rare cases, (e.g., OutlookMsgConverter) we need to read more from the stream to make a final - determination. Read operations inevitably advances the position in file_stream. In these case, the position - MUST be reset it MUST be reset before returning. This is because the convert() method may be called immediately + determination. Read operations inevitably advance the position in file_stream. In these cases, the position + MUST be reset before returning. This is because the convert() method may be called immediately after accepts(), and will expect the file_stream to be at the original position. E.g., diff --git a/packages/markitdown/src/markitdown/converters/_plain_text_converter.py b/packages/markitdown/src/markitdown/converters/_plain_text_converter.py index 6f1306fe8..ff6d75e62 100644 --- a/packages/markitdown/src/markitdown/converters/_plain_text_converter.py +++ b/packages/markitdown/src/markitdown/converters/_plain_text_converter.py @@ -5,15 +5,6 @@ from .._base_converter import DocumentConverter, DocumentConverterResult from .._stream_info import StreamInfo -# Try loading optional (but in this case, required) dependencies -# Save reporting of any exceptions for later -_dependency_exc_info = None -try: - import mammoth # noqa: F401 -except ImportError: - # Preserve the error and stack trace for later - _dependency_exc_info = sys.exc_info() - ACCEPTED_MIME_TYPE_PREFIXES = [ "text/", "application/json",