Skip to content

Commit 641d879

Browse files
authored
1.x dev (#12)
* Updated * OCR Fixing work 1. OCR error fixing work 2. Laravel 10 support * Laravel 10 support & Bug fixing - Laravel 10 support added - Bug fixing on OCR - EPUB support removed due to security issue. We will try to add it again later. * PPT Support added - PPT support now added (.ppt and .pptx) type files are now supported. * Updated - Updated and fixing work for PPTX. * Update README.md
1 parent 90d6f5a commit 641d879

File tree

6 files changed

+253
-2
lines changed

6 files changed

+253
-2
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ check file content MIME type before execute.
2525
- **ODT**
2626
- **ODS**
2727
- **RTF**
28+
- **PPTX** (NEW)
2829

2930
**We are working hard to make this laravel plugin useful. If you found any issue please add a post on discussion.**
3031

composer.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@
3939
"phpoffice/phpword": "^0.18",
4040
"laravel/framework": "^10.0",
4141
"thiagoalessio/tesseract_ocr": "^2.12",
42-
"html2text/html2text": "^4.3"
42+
"html2text/html2text": "^4.3",
43+
"phpoffice/phppresentation": "^1.0"
4344
},
4445
"require-dev": {
4546
"phpunit/phpunit": "^9.5"

composer.lock

Lines changed: 167 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
<?php
2+
3+
namespace Nilgems\PhpTextract\ExtractorService\ExtractorCommonProcessors;
4+
5+
use Nilgems\PhpTextract\Exceptions\TextractException;
6+
use Nilgems\PhpTextract\ExtractorService\Contracts\AbstractTextExtractor;
7+
use PhpOffice\PhpPresentation\IOFactory;
8+
use PhpOffice\PhpPresentation\Shape;
9+
10+
/**
11+
* PHP PowerPointProcessor
12+
* Read the document: https://phpoffice.github.io/PHPPresentation/usage/readers.html
13+
*/
14+
class PhpPowerPointProcessor extends AbstractTextExtractor
15+
{
16+
protected string $readerType = "PowerPoint2007";
17+
private function hasReadable(): bool
18+
{
19+
$file_path = $this->utilsService->getFilePath();
20+
$reader = IOFactory::createReader($this->readerType);
21+
try {
22+
$presentation = $reader->load($file_path);
23+
return count($presentation->getAllSlides()) > 0;
24+
} catch (\Exception $exception) {
25+
throw $exception;
26+
report($exception);
27+
throw new TextractException(trans('textract::processor.error_unable_to_read', [
28+
'path' => $this->utilsService->getFilePath()
29+
]));
30+
}
31+
32+
}
33+
34+
/**
35+
* @throws TextractException
36+
* @throws \PhpOffice\PhpSpreadsheet\Reader\Exception
37+
*/
38+
protected function getExtractedText(): string
39+
{
40+
if ($this->hasReadable()) {
41+
$data_iterable = [];
42+
$reader = IOFactory::createReader($this->readerType);
43+
$presentation = $reader->load($this->utilsService->getFilePath());
44+
foreach ($presentation->getAllSlides() as $slide) {
45+
$shapes = $slide->getShapeCollection();
46+
foreach ($shapes as $shape_k => $shape_v) {
47+
$shape = $shapes[$shape_k];
48+
if($shape instanceof Shape\RichText){
49+
$paragraphs = $shapes[$shape_k]->getParagraphs();
50+
foreach ($paragraphs as $paragraph_k => $paragraph_v) {
51+
$text_elements = $paragraph_v->getRichTextElements();
52+
foreach ($text_elements as $text_element_k => $text_element_v) {
53+
$data_iterable[] = $text_element_v->getText();
54+
}
55+
}
56+
}
57+
}
58+
}
59+
return implode("\n", $data_iterable);
60+
}
61+
return "";
62+
}
63+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<?php
2+
3+
namespace Nilgems\PhpTextract\ExtractorService\Extractors;
4+
5+
use Nilgems\PhpTextract\ExtractorService\ExtractorCommonProcessors\PhpPowerPointProcessor;
6+
7+
class MsOfficePptxExtractor extends PhpPowerPointProcessor
8+
{
9+
protected string $readerType = "PowerPoint2007";
10+
11+
protected array $supported_mime_types = [
12+
'application/vnd.openxmlformats-officedocument.presentationml.presentation'
13+
];
14+
15+
public array $supported_extension = ['pptx'];
16+
17+
protected string $reader_name = 'MsPresentation';
18+
}

src/Providers/ServiceProvider.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
use Nilgems\PhpTextract\ExtractorService\Extractors\ImageExtractor;
88
use Nilgems\PhpTextract\ExtractorService\Extractors\MsOfficeDocExtractor;
99
use Nilgems\PhpTextract\ExtractorService\Extractors\MsOfficeDocxExtractor;
10+
use Nilgems\PhpTextract\ExtractorService\Extractors\MsOfficePptxExtractor;
1011
use Nilgems\PhpTextract\ExtractorService\Extractors\OpenOfficeDocument;
1112
use Nilgems\PhpTextract\ExtractorService\Extractors\OpenOfficeSpreadSheet;
1213
use Nilgems\PhpTextract\ExtractorService\Extractors\PdfExtractor;
@@ -52,6 +53,7 @@ protected function registerExtractors(): void
5253
ImageExtractor::class,
5354
MsOfficeDocExtractor::class,
5455
MsOfficeDocxExtractor::class,
56+
MsOfficePptxExtractor::class,
5557
OpenOfficeDocument::class,
5658
OpenOfficeSpreadSheet::class,
5759
PdfExtractor::class,

0 commit comments

Comments
 (0)