88# ' Preceding spaces are ignored.
99# ' 3. Extracts all the text found for the label given by `inp_str`, and
1010# ' returns the description found for it (the actual alternative text)
11+ # ' 4. If no name is found, the code will figure out what the figure number
12+ # ' is (including preceding appendix letters) and set that as the alternative
13+ # ' text
1114# '
1215# ' Originally implemented in the `hake` package but copied here for simplicity
1316# ' so that the hake package did not need to be imported as that is a different
@@ -54,6 +57,7 @@ extract_alt_text <- function(inp_str,
5457 k <- map(fns , ~ {
5558 rmd <- readLines(.x )
5659 x <- grep(alt_str_ref , rmd )
60+
5761 if (length(x )){
5862 if (length(x ) > 1 ){
5963 bail(" Alt. text label `" , alt_str , " ` defined more than once in " ,
@@ -90,6 +94,7 @@ extract_alt_text <- function(inp_str,
9094 }else {
9195 end_ind <- end_ind - 1
9296 }
97+
9398 # Glue all the text lines together
9499 alt_text <- paste(rmd [start_ind : end_ind ], collapse = " " )
95100
@@ -101,11 +106,19 @@ extract_alt_text <- function(inp_str,
101106 # cannot have percent signs in un-escaped and the escape character shows
102107 # up in the output text and will be read by a reader so we remove percent
103108 # signs (if any) and all preceding backslashes and spaces (if any)
104- alt_text <- gsub(" [\\ ]* *%" ,
109+ #
110+ # First, remove all slashes before percent signs
111+ alt_text <- gsub(" ([\\ ]+%)" , " %" , alt_text )
112+ # If there is a format argument for a printf-type R statement inside
113+ # R chunk(s), the percent sign will be followed by a letter or number,
114+ # if so leave the percent sign intact be so that the R code will execute
115+ # properly
116+ alt_text <- gsub(" %(?![a-zA-Z0-9]+)" ,
105117 ifelse(fr(),
106118 " pour cent" ,
107119 " percent" ),
108- alt_text )
120+ alt_text ,
121+ perl = TRUE )
109122
110123 # Return a vector of the label and it's text
111124 alt_text
@@ -115,107 +128,190 @@ extract_alt_text <- function(inp_str,
115128 })
116129
117130 # Remove all NULLs from the list
118- if (length(k )){
119- k [sapply(k , is.null )] <- NULL
120- }
121- if (! length(k )){
122- bail(" Error retrieving an alternative text label. " ,
123- " You must add a (ref:" , alt_str , " ) with alternative text to " ,
124- " the document code." )
125- }
126- if (length(k ) > 1 ){
127- bail(" Error retrieving your alternative text label (ref:" , alt_str , " ). " ,
128- " There was more than one found in the code." )
129- }
131+ k [sapply(k , is.null )] <- NULL
130132
131- k <- unlist( k )
133+ if (length( k ) == 1 ){
132134
133- # Replace any inline r code with actual text (mini-knitr parser)
134- # Break up the string into chunks before and after the inline r code chunks
135- # TEST
136- # sp <- "hake"
137- # common_name <- "this is the common name"
138- # k <- "`r sp` `r sp` - Trying to match `r sp` with another `r 10 + 29 * 30` chunk `r common_name`."
135+ k <- unlist(k )
139136
140- backtick_inds <- unlist(gregexpr(" `" , k ))
141- if (backtick_inds [1 ] == - 1 ){
142- backtick_inds <- NULL
143- }
137+ # Replace any inline r code with actual text (mini-knitr parser)
138+ # Break up the string into chunks before and after the inline r code chunks
139+ # TEST
140+ # sp <- "hake"
141+ # common_name <- "this is the common name"
142+ # k <- "`r sp` `r sp` - Trying to match `r sp` with another `r 10 + 29 * 30` chunk `r common_name`."
143+
144+ backtick_inds <- unlist(gregexpr(" `" , k ))
145+ if (backtick_inds [1 ] == - 1 ){
146+ backtick_inds <- NULL
147+ }
148+
149+ # Remove inline R code chunks prior to checking for commas in alt text
150+ # so that commas in the function calls do not match
151+ tmp <- gsub(" `r.*`" , " " , k )
152+ # Check for commas that haven't been escaped. They will cause tagpdf errors
153+ # if not fixed. (?<!\\\\) is negative lookbehind which means that if a comma
154+ # is not preceded by a double backslash, then replace the comma with \\,
155+ # Note that for lookbehinds/lookaheads to work we must use perl = TRUE
156+ comma_pat <- " (?<!\\\\ )(\\ ,)"
157+
158+ grep_length <- length(grep(comma_pat , tmp , perl = TRUE ))
159+ if (grep_length ){
160+ if (grep_length == 1 ){
161+ message <- " There was a comma "
162+ }else {
163+ message <- paste0(" There were " , grep_length , " commas " )
164+ }
165+ alert(message , " found in the alt text entry for label `" ,
166+ alt_str , " ` You must re-write the alternative text without commas. " ,
167+ " This is a limitation of the tagpdf LaTeX package.\n For nowe, the " ,
168+ " comma was removed so the compilation could continue.\n The text in " ,
169+ " question is:\n\n " ,
170+ k , " \n " )
171+ # Remove the commas - was used when this was a warning (alert) instead, but
172+ # the warning does not print to the screen so it is useless.
173+ k <- gsub(comma_pat , " " , k , perl = TRUE )
174+ # Replace with double-backslash. Note this leaves a backslash in the text
175+ # and there's no way around it. I tried more or less backslashes and no dice
176+ # k <- gsub(comma_pat, "\\\\\\1", k, perl = TRUE)
177+ }
178+
179+ if (! length(backtick_inds )){
180+ return (k )
181+ }
182+ if (length(backtick_inds ) %% 2 != 0 ){
183+ bail(" There is an odd number of backticks in the text referred " ,
184+ " to by label " , inp_str , " . The text is:\n " , k )
185+ }
186+
187+ chunks_non_r <- str_split(k , " `r .*?`" )[[1 ]]
188+ chunks_non_r <- chunks_non_r [chunks_non_r != " " ]
144189
145- # Remove inline R code chunks prior to checking for commas in alt text
146- # so that commas in the function calls do not match
147- tmp <- gsub(" `r.*`" , " " , k )
148- # Check for commas that haven't been escaped. They will cause tagpdf errors
149- # if not fixed. (?<!\\\\) is negative lookbehind which means that if a comma
150- # is not preceded by a double backslash, then replace the comma with \\,
151- # Note that for lookbehinds/lookaheads to work we must use perl = TRUE
152- comma_pat <- " (?<!\\\\ )(\\ ,)"
153-
154- grep_length <- length(grep(comma_pat , tmp , perl = TRUE ))
155- if (grep_length ){
156- if (grep_length == 1 ){
157- message <- " There was a comma "
190+ # Number of backticks are even as they must be, so break them into chunks
191+ start_inds <- backtick_inds [seq(1 , length(backtick_inds ), 2 )]
192+ # Check to make sure the starting backticks have an 'r' immediately after
193+ walk(start_inds , ~ {
194+ if (substr(k , .x + 1 , .x + 1 ) != " r" && substr(k , .x + 2 , .x + 2 ) != " " ){
195+ stop(" Non-r code chunk found. R code chunks must be of the format " ,
196+ " `r code_here`" )
197+ }
198+ })
199+ end_inds <- backtick_inds [seq(2 , length(backtick_inds ), 2 )]
200+ chunks <- str_sub(k , start_inds , end_inds )
201+
202+ # Evaluate the R chunks. This is needed so that inline R chunks found in
203+ # alt text paragraphs are evaluated properly
204+ chunks <- map_chr(chunks , ~ {
205+ # Remove `r and ` from the code
206+ x <- gsub(" ^`r" , " " , .x )
207+ x <- gsub(" `$" , " " , x )
208+ x <- gsub(" +" , " " , x )
209+ eval(parse(text = x ))
210+ })
211+ # Here we have chunks and chunks_non_r. We need to find out which comes fist,
212+ chunk_len <- max(length(chunks ), length(chunks_non_r ))
213+ length(chunks ) <- chunk_len
214+ length(chunks_non_r ) <- chunk_len
215+
216+ if (start_inds [1 ] == 1 ){
217+ out_str <- c(rbind(chunks , chunks_non_r ))
158218 }else {
159- message <- paste0( " There were " , grep_length , " commas " )
219+ out_str <- c(rbind( chunks_non_r , chunks ) )
160220 }
161- alert(message , " found in the alt text entry for label `" ,
162- alt_str , " ` You must re-write the alternative text without commas. " ,
163- " This is a limitation of the tagpdf LaTeX package.\n For nowe, the " ,
164- " comma was removed so the compilation could continue.\n The text in " ,
165- " question is:\n\n " ,
166- k , " \n " )
167- # Remove the commas - was used when this was a warning (alert) instead, but
168- # the warning does not print to the screen so it is useless.
169- k <- gsub(comma_pat , " " , k , perl = TRUE )
170- # Replace with double-backslash. Note this leaves a backslash in the text
171- # and there's no way around it. I tried more or less backslashes and no dice
172- # k <- gsub(comma_pat, "\\\\\\1", k, perl = TRUE)
173- }
221+ out_str <- out_str [! is.na(out_str )]
222+ out_str <- paste(out_str , collapse = " " )
174223
175- if (! length(backtick_inds )){
176- return (k )
177- }
178- if (length(backtick_inds ) %% 2 != 0 ){
179- bail(" There is an odd number of backticks in the text referred " ,
180- " to by label " , inp_str , " . The text is:\n " , k )
181- }
224+ }else if (! length(k )){
225+ # Create a Figure XX alt text label. Need to check which appendix
226+ # it is in or if it is in the main document figures
227+ rmd <- map(fns , ~ {
228+ readLines(.x )
229+ }) | >
230+ unlist()
231+ fig_chunk_inds <- grep(" fig.cap *=" , rmd )
232+ fig_chunk_code <- rmd [fig_chunk_inds ]
233+ pat <- paste0(inp_str , " *," )
234+ the_fig_ind <- grep(pat , fig_chunk_code )
235+ if (! length(the_fig_ind )){
236+ bail(" There was a problem matching the name of the figure chunk '" ,
237+ inp_str , " ' using the regular expression '" , pat , " '.\n There " ,
238+ " was no match. This could occur if the name is not " ,
239+ " followed by zero or more spaces and then a comma." )
240+ }
241+ if (length(the_fig_ind ) > 1 ){
242+ bail(" There was a problem matching the name of the figure chunk '" ,
243+ inp_str , " ' using the regular expression '" , pat , " '.\n There " ,
244+ " was more than one match. This could occur if the name is not " ,
245+ " followed by zero or more spaces and then a comma." )
246+ }
247+ # Check if in an appendix and if so, which one
248+ is_appendix_fig <- FALSE
249+ appendix_ind <- grep(" Appendix \\ {-\\ }" , rmd )
250+ if (length(appendix_ind ) == 1 ){
251+ is_appendix_fig <- fig_chunk_inds [the_fig_ind ] > appendix_ind
252+ # Which appendix is it in?
253+ if (is_appendix_fig ){
254+ # Search for all heading 1 lines following the appendix declaration
255+ appendix_lines <- rmd [appendix_ind : length(rmd )]
256+ # pattern <- "(?<![#])# +(?![#])"
257+ # Assumes that the author has put {#app:<letter>} into the source code
258+ pattern <- " .*(\\ {#app:[a-z]\\ }) *$"
259+ # This will match both English and French appendix header lines.
260+ # We use the French ones as guides
261+ app_headers <- grep(pattern , appendix_lines , perl = TRUE , value = TRUE )
262+ if (! length(app_headers )){
263+ bail(" Could not find any tags on the appendix headers of the " ,
264+ " format: {#app:<letter>} where <letter> is a lower-case letter " ,
265+ " from a to z. This is required for automated alternative text " ,
266+ " to work. Here is an example line for an Appendix header:\n " ,
267+ " # Ecosystem considerations {#app:d}" )
268+ }
269+ # Remove all but the last occurrence of app:<letter> if there are more
270+ # than one
271+ doubles <- gsub(pattern , " \\ 1" , app_headers )
272+ app_headers <- app_headers [duplicated(doubles )]
273+ app_header_patterns <- gsub(" \\ {" , " \\\\ {" , app_headers )
274+ app_header_patterns <- gsub(" \\ }" , " \\\\ }" , app_header_patterns )
275+ app_header_patterns <- gsub(" \\ (" , " \\\\ (" , app_header_patterns )
276+ app_header_patterns <- gsub(" \\ )" , " \\\\ )" , app_header_patterns )
277+ app_header_inds <- map_dbl(app_header_patterns , ~ {grep(.x , rmd )})
182278
183- chunks_non_r <- str_split(k , " `r .*?`" )[[1 ]]
184- chunks_non_r <- chunks_non_r [chunks_non_r != " " ]
279+ # Find out which appendix the figure is in
280+ which_app <- max(which(fig_chunk_inds [the_fig_ind ] > app_header_inds ))
281+ which_app_letter <- LETTERS [which_app ]
282+ # Now find which figure number in the appendix it is 1-100 ?!
283+ # There is more than one appendix, so we have to look in the one we
284+ # are in and count up the figures in it
285+ last_app <- length(app_header_inds )
286+ if (which_app == last_app ){
287+ # It is the last appendix, so we search to the end of the document
288+ app_lines <- rmd [app_header_inds [which_app ]: length(rmd )]
289+ }else {
290+ # There must be at least one appendix following this one
291+ app_lines <- rmd [app_header_inds [which_app ]: app_header_inds [which_app + 1 ]]
292+ }
293+ fig_chunk_inds <- grep(" fig.cap *=" , app_lines )
294+ fig_chunk_code <- app_lines [fig_chunk_inds ]
295+ pat <- paste0(inp_str , " *," )
296+ the_fig_ind <- grep(pat , fig_chunk_code )
297+ }
185298
186- # Number of backticks are even as they must be, so break them into chunks
187- start_inds <- backtick_inds [seq(1 , length(backtick_inds ), 2 )]
188- # Check to make sure the starting backticks have an 'r' immediately after
189- walk(start_inds , ~ {
190- if (substr(k , .x + 1 , .x + 1 ) != " r" && substr(k , .x + 2 , .x + 2 ) != " " ){
191- stop(" Non-r code chunk found. R code chunks must be of the format " ,
192- " `r code_here`" )
299+ }else if (length(appendix_ind ) > 1 ){
300+ bail(" More than one 'Appendix {-}' was found while trying to objectify " ,
301+ " alternative text figure numbers. Check Rmd code" )
302+ }
303+
304+ if (is_appendix_fig ){
305+ out_str <- paste0(" Figure " , which_app_letter , " ." , the_fig_ind )
306+ }else {
307+ out_str <- paste0(" Figure " , the_fig_ind )
193308 }
194- })
195- end_inds <- backtick_inds [seq(2 , length(backtick_inds ), 2 )]
196- chunks <- str_sub(k , start_inds , end_inds )
197-
198- # Evaluate the R chunks. This is needed so that inline R chunks found in
199- # alt text paragraphs are evaluated properly
200- chunks <- map_chr(chunks , ~ {
201- # Remove `r and ` from the code
202- x <- gsub(" ^`r" , " " , .x )
203- x <- gsub(" `$" , " " , x )
204- x <- gsub(" +" , " " , x )
205- eval(parse(text = x ))
206- })
207- # Here we have chunks and chunks_non_r. We need to find out which comes fist,
208- chunk_len <- max(length(chunks ), length(chunks_non_r ))
209- length(chunks ) <- chunk_len
210- length(chunks_non_r ) <- chunk_len
211309
212- if (start_inds [1 ] == 1 ){
213- out_str <- c(rbind(chunks , chunks_non_r ))
214310 }else {
215- out_str <- c(rbind(chunks_non_r , chunks ))
311+ bail(" Error retrieving your alternative text label (ref:" , alt_str , " ). " ,
312+ " There was more than one found in the code." )
216313 }
217- out_str <- out_str [! is.na(out_str )]
218- out_str <- paste(out_str , collapse = " " )
314+
219315
220316 out_str
221317}
0 commit comments