-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathpptx_notes_txt.py
More file actions
163 lines (130 loc) · 5.76 KB
/
pptx_notes_txt.py
File metadata and controls
163 lines (130 loc) · 5.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#######################################
# Author: TheWilley #
# Copyright: 2023 #
# License: MIT #
# Version: 1.0.0 #
#######################################
import click
import zipfile
import xml.etree.ElementTree as ET
import os
@click.command()
@click.option('-i', '--input', default=None, help='The path to the pptx file.', required=True, type=str)
@click.option('-o', '--output', help='The path to the output file.', required=True, type=str)
@click.option('-p', '--prettyprint', is_flag=True, help='Write the text in a pretty format.')
@click.option('-md', '--markdown', is_flag=True, help='Write the text in a markdown format.')
@click.option('-c', '--custom', help='The path to a custom format file.', type=str)
def main(input, output, prettyprint, markdown, custom):
"""Script to extract the notes from a pptx file and write them to a text file."""
# Define lists to store the contents of the notes and slides
note_files = []
pages = []
# Check if input is empty
if input is None:
click.echo('No input file provided.')
exit(0)
# Check if file exists
try:
with open(input) as f:
pass
except Exception as e:
click.echo('Error: Input file does not exist.')
exit(0)
# Check if input is a pptx file
if not input.endswith('.pptx'):
click.echo('Error: Input file is not a valid pptx file.')
exit(0)
# Open the pptx file
with zipfile.ZipFile(input, 'r') as pptx:
# Loop through all the files in the pptx file and store the contents of the notes in a list
for file in pptx.namelist():
# Check if the file is a notes file
if file.startswith('ppt/notesSlides/notesSlide'):
# Open the notes file and store the contents in list
with pptx.open(file) as note:
note_files.append(note.read().decode('utf-8'))
# Loop through the list of notes and extract the text from the tags
for note in note_files:
# Define lists to store the contents of the tags and the text from the tags
all_text_tags = []
text_from_tags = []
# Create an ElementTree object from the note
myroot = ET.fromstring(note)
# Find all the note tags in the note and store them in a list
all_text_tags.append(myroot.findall('.//{*}t'))
# Loop through the list of tags and extract the text from the tags
for t in all_text_tags:
for i in t:
# Check if its the last tag
if i == t[-1]:
break
# Append the text from the tag to the list
text_from_tags.append(i.text)
# Join the text from the tags and append it to the pages list
pages.append(' '.join(text_from_tags))
# Try to write the contents of the pages list to a file
try:
with open(output, 'w') as f:
# Check if both prettyprint, markdown and custom are used
if prettyprint and markdown or prettyprint and custom or markdown and custom:
click.echo('Error: You can only use one of the flags [-p, --prettyprint] or [-md --markdown].')
exit(0)
# Mode is prettyprint
if prettyprint:
for page in pages:
f.write('Page ' + str(pages.index(page) + 1) + '\n')
f.write('-' * len('Page ' + str(pages.index(page) + 1)) + '\n\n')
f.write(page)
# If its the last page, don't add a new line
if page == pages[-1]:
break
else:
f.write('\n\n')
# Mode is markdown
elif markdown:
for page in pages:
f.write('# Page ' + str(pages.index(page) + 1) + '\n')
f.write(page)
# If its the last page, don't add a new line
if page == pages[-1]:
break
else:
f.write('\n\n')
# Mode is custom
elif custom:
# Check if the custom file is empty
if custom is None:
click.echo('Error: No custom file provided.')
exit(0)
# Check if the custom file is a .custom file
if not custom.endswith('.custom'):
click.echo('Error: Not a valid \'.custom\' file.')
exit(0)
# Check if the custom file exists
try:
with open(custom) as f2:
pass
except Exception as e:
click.echo('Error: \'.custom\' file does not exist.')
exit(0)
# Open the custom file and store the contents in a list
with open(custom) as f2:
custom = f2.read()
for page in pages:
# Use regex entered from user to replace the text with the custom format
f.write(custom.replace("{notes}", page).replace("{slide}", str(pages.index(page) + 1)))
# Mode is default
else:
for page in pages:
f.write(page)
# If its the last page, don't add a new line
if page == pages[-1]:
break
f.write('\n\n')
except Exception as e:
click.echo("Error: {}".format(e))
exit(0)
# Print success message
click.echo('Successfully wrote the notes to \'' + os.path.abspath(output) + '\'')
if __name__ == '__main__':
main()