Skip to content

Commit b6c1ac3

Browse files
committed
Set alt text defaults to be the figure number, including appendix
letter where necessary. See issue #297
1 parent 7aa092c commit b6c1ac3

File tree

2 files changed

+189
-90
lines changed

2 files changed

+189
-90
lines changed

R/extract-alt-text.R

Lines changed: 186 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
#' Preceding spaces are ignored.
99
#' 3. Extracts all the text found for the label given by `inp_str`, and
1010
#' returns the description found for it (the actual alternative text)
11+
#' 4. If no name is found, the code will figure out what the figure number
12+
#' is (including preceding appendix letters) and set that as the alternative
13+
#' text
1114
#'
1215
#' Originally implemented in the `hake` package but copied here for simplicity
1316
#' so that the hake package did not need to be imported as that is a different
@@ -54,6 +57,7 @@ extract_alt_text <- function(inp_str,
5457
k <- map(fns, ~{
5558
rmd <- readLines(.x)
5659
x <- grep(alt_str_ref, rmd)
60+
5761
if(length(x)){
5862
if(length(x) > 1){
5963
bail("Alt. text label `", alt_str, "` defined more than once in ",
@@ -90,6 +94,7 @@ extract_alt_text <- function(inp_str,
9094
}else{
9195
end_ind <- end_ind - 1
9296
}
97+
9398
# Glue all the text lines together
9499
alt_text <- paste(rmd[start_ind:end_ind], collapse = " ")
95100

@@ -101,11 +106,19 @@ extract_alt_text <- function(inp_str,
101106
# cannot have percent signs in un-escaped and the escape character shows
102107
# up in the output text and will be read by a reader so we remove percent
103108
# signs (if any) and all preceding backslashes and spaces (if any)
104-
alt_text <- gsub("[\\]* *%",
109+
#
110+
# First, remove all slashes before percent signs
111+
alt_text <- gsub("([\\]+%)", "%", alt_text)
112+
# If there is a format argument for a printf-type R statement inside
113+
# R chunk(s), the percent sign will be followed by a letter or number,
114+
# if so leave the percent sign intact be so that the R code will execute
115+
# properly
116+
alt_text <- gsub("%(?![a-zA-Z0-9]+)",
105117
ifelse(fr(),
106118
" pour cent",
107119
" percent"),
108-
alt_text)
120+
alt_text,
121+
perl = TRUE)
109122

110123
# Return a vector of the label and it's text
111124
alt_text
@@ -115,107 +128,190 @@ extract_alt_text <- function(inp_str,
115128
})
116129

117130
# Remove all NULLs from the list
118-
if(length(k)){
119-
k[sapply(k, is.null)] <- NULL
120-
}
121-
if(!length(k)){
122-
bail("Error retrieving an alternative text label. ",
123-
"You must add a (ref:", alt_str, ") with alternative text to ",
124-
"the document code.")
125-
}
126-
if(length(k) > 1){
127-
bail("Error retrieving your alternative text label (ref:", alt_str, "). ",
128-
"There was more than one found in the code.")
129-
}
131+
k[sapply(k, is.null)] <- NULL
130132

131-
k <- unlist(k)
133+
if(length(k) == 1){
132134

133-
# Replace any inline r code with actual text (mini-knitr parser)
134-
# Break up the string into chunks before and after the inline r code chunks
135-
# TEST
136-
# sp <- "hake"
137-
# common_name <- "this is the common name"
138-
# k <- "`r sp` `r sp` - Trying to match `r sp` with another `r 10 + 29 * 30` chunk `r common_name`."
135+
k <- unlist(k)
139136

140-
backtick_inds <- unlist(gregexpr("`", k))
141-
if(backtick_inds[1] == -1){
142-
backtick_inds <- NULL
143-
}
137+
# Replace any inline r code with actual text (mini-knitr parser)
138+
# Break up the string into chunks before and after the inline r code chunks
139+
# TEST
140+
# sp <- "hake"
141+
# common_name <- "this is the common name"
142+
# k <- "`r sp` `r sp` - Trying to match `r sp` with another `r 10 + 29 * 30` chunk `r common_name`."
143+
144+
backtick_inds <- unlist(gregexpr("`", k))
145+
if(backtick_inds[1] == -1){
146+
backtick_inds <- NULL
147+
}
148+
149+
# Remove inline R code chunks prior to checking for commas in alt text
150+
# so that commas in the function calls do not match
151+
tmp <- gsub("`r.*`", "", k)
152+
# Check for commas that haven't been escaped. They will cause tagpdf errors
153+
# if not fixed. (?<!\\\\) is negative lookbehind which means that if a comma
154+
# is not preceded by a double backslash, then replace the comma with \\,
155+
# Note that for lookbehinds/lookaheads to work we must use perl = TRUE
156+
comma_pat <- "(?<!\\\\)(\\,)"
157+
158+
grep_length <- length(grep(comma_pat, tmp, perl = TRUE))
159+
if(grep_length){
160+
if(grep_length == 1){
161+
message <- "There was a comma "
162+
}else{
163+
message <- paste0("There were ", grep_length, " commas ")
164+
}
165+
alert(message, "found in the alt text entry for label `",
166+
alt_str, "` You must re-write the alternative text without commas. ",
167+
"This is a limitation of the tagpdf LaTeX package.\nFor nowe, the ",
168+
"comma was removed so the compilation could continue.\n The text in ",
169+
"question is:\n\n",
170+
k, "\n")
171+
# Remove the commas - was used when this was a warning (alert) instead, but
172+
# the warning does not print to the screen so it is useless.
173+
k <- gsub(comma_pat, "", k, perl = TRUE)
174+
# Replace with double-backslash. Note this leaves a backslash in the text
175+
# and there's no way around it. I tried more or less backslashes and no dice
176+
#k <- gsub(comma_pat, "\\\\\\1", k, perl = TRUE)
177+
}
178+
179+
if(!length(backtick_inds)){
180+
return(k)
181+
}
182+
if(length(backtick_inds) %% 2 != 0){
183+
bail("There is an odd number of backticks in the text referred ",
184+
"to by label ", inp_str, ". The text is:\n", k)
185+
}
186+
187+
chunks_non_r <- str_split(k, "`r .*?`")[[1]]
188+
chunks_non_r <- chunks_non_r[chunks_non_r != ""]
144189

145-
# Remove inline R code chunks prior to checking for commas in alt text
146-
# so that commas in the function calls do not match
147-
tmp <- gsub("`r.*`", "", k)
148-
# Check for commas that haven't been escaped. They will cause tagpdf errors
149-
# if not fixed. (?<!\\\\) is negative lookbehind which means that if a comma
150-
# is not preceded by a double backslash, then replace the comma with \\,
151-
# Note that for lookbehinds/lookaheads to work we must use perl = TRUE
152-
comma_pat <- "(?<!\\\\)(\\,)"
153-
154-
grep_length <- length(grep(comma_pat, tmp, perl = TRUE))
155-
if(grep_length){
156-
if(grep_length == 1){
157-
message <- "There was a comma "
190+
# Number of backticks are even as they must be, so break them into chunks
191+
start_inds <- backtick_inds[seq(1, length(backtick_inds), 2)]
192+
# Check to make sure the starting backticks have an 'r' immediately after
193+
walk(start_inds, ~{
194+
if(substr(k, .x + 1, .x + 1) != "r" && substr(k, .x + 2, .x + 2) != " "){
195+
stop("Non-r code chunk found. R code chunks must be of the format ",
196+
"`r code_here`")
197+
}
198+
})
199+
end_inds <- backtick_inds[seq(2, length(backtick_inds), 2)]
200+
chunks <- str_sub(k, start_inds, end_inds)
201+
202+
# Evaluate the R chunks. This is needed so that inline R chunks found in
203+
# alt text paragraphs are evaluated properly
204+
chunks <- map_chr(chunks, ~{
205+
# Remove `r and ` from the code
206+
x <- gsub("^`r", "", .x)
207+
x <- gsub("`$", "", x)
208+
x <- gsub(" +", "", x)
209+
eval(parse(text = x))
210+
})
211+
# Here we have chunks and chunks_non_r. We need to find out which comes fist,
212+
chunk_len <- max(length(chunks), length(chunks_non_r))
213+
length(chunks) <- chunk_len
214+
length(chunks_non_r) <- chunk_len
215+
216+
if(start_inds[1] == 1){
217+
out_str <- c(rbind(chunks, chunks_non_r))
158218
}else{
159-
message <- paste0("There were ", grep_length, " commas ")
219+
out_str <- c(rbind(chunks_non_r, chunks))
160220
}
161-
alert(message, "found in the alt text entry for label `",
162-
alt_str, "` You must re-write the alternative text without commas. ",
163-
"This is a limitation of the tagpdf LaTeX package.\nFor nowe, the ",
164-
"comma was removed so the compilation could continue.\n The text in ",
165-
"question is:\n\n",
166-
k, "\n")
167-
# Remove the commas - was used when this was a warning (alert) instead, but
168-
# the warning does not print to the screen so it is useless.
169-
k <- gsub(comma_pat, "", k, perl = TRUE)
170-
# Replace with double-backslash. Note this leaves a backslash in the text
171-
# and there's no way around it. I tried more or less backslashes and no dice
172-
#k <- gsub(comma_pat, "\\\\\\1", k, perl = TRUE)
173-
}
221+
out_str <- out_str[!is.na(out_str)]
222+
out_str <- paste(out_str, collapse = "")
174223

175-
if(!length(backtick_inds)){
176-
return(k)
177-
}
178-
if(length(backtick_inds) %% 2 != 0){
179-
bail("There is an odd number of backticks in the text referred ",
180-
"to by label ", inp_str, ". The text is:\n", k)
181-
}
224+
}else if(!length(k)){
225+
# Create a Figure XX alt text label. Need to check which appendix
226+
# it is in or if it is in the main document figures
227+
rmd <- map(fns, ~{
228+
readLines(.x)
229+
}) |>
230+
unlist()
231+
fig_chunk_inds <- grep("fig.cap *=", rmd)
232+
fig_chunk_code <- rmd[fig_chunk_inds]
233+
pat <- paste0(inp_str, " *,")
234+
the_fig_ind <- grep(pat, fig_chunk_code)
235+
if(!length(the_fig_ind)){
236+
bail("There was a problem matching the name of the figure chunk '",
237+
inp_str, "' using the regular expression '", pat, "'.\nThere ",
238+
"was no match. This could occur if the name is not ",
239+
"followed by zero or more spaces and then a comma.")
240+
}
241+
if(length(the_fig_ind) > 1){
242+
bail("There was a problem matching the name of the figure chunk '",
243+
inp_str, "' using the regular expression '", pat, "'.\nThere ",
244+
"was more than one match. This could occur if the name is not ",
245+
"followed by zero or more spaces and then a comma.")
246+
}
247+
# Check if in an appendix and if so, which one
248+
is_appendix_fig <- FALSE
249+
appendix_ind <- grep("Appendix \\{-\\}", rmd)
250+
if(length(appendix_ind) == 1){
251+
is_appendix_fig <- fig_chunk_inds[the_fig_ind] > appendix_ind
252+
# Which appendix is it in?
253+
if(is_appendix_fig){
254+
# Search for all heading 1 lines following the appendix declaration
255+
appendix_lines <- rmd[appendix_ind:length(rmd)]
256+
#pattern <- "(?<![#])# +(?![#])"
257+
# Assumes that the author has put {#app:<letter>} into the source code
258+
pattern <- ".*(\\{#app:[a-z]\\}) *$"
259+
# This will match both English and French appendix header lines.
260+
# We use the French ones as guides
261+
app_headers <- grep(pattern, appendix_lines, perl = TRUE, value = TRUE)
262+
if(!length(app_headers)){
263+
bail("Could not find any tags on the appendix headers of the ",
264+
"format: {#app:<letter>} where <letter> is a lower-case letter ",
265+
"from a to z. This is required for automated alternative text ",
266+
"to work. Here is an example line for an Appendix header:\n",
267+
"# Ecosystem considerations {#app:d}")
268+
}
269+
# Remove all but the last occurrence of app:<letter> if there are more
270+
# than one
271+
doubles <- gsub(pattern, "\\1", app_headers)
272+
app_headers <- app_headers[duplicated(doubles)]
273+
app_header_patterns <- gsub("\\{", "\\\\{", app_headers)
274+
app_header_patterns <- gsub("\\}", "\\\\}", app_header_patterns)
275+
app_header_patterns <- gsub("\\(", "\\\\(", app_header_patterns)
276+
app_header_patterns <- gsub("\\)", "\\\\)", app_header_patterns)
277+
app_header_inds <- map_dbl(app_header_patterns, ~{grep(.x, rmd)})
182278

183-
chunks_non_r <- str_split(k, "`r .*?`")[[1]]
184-
chunks_non_r <- chunks_non_r[chunks_non_r != ""]
279+
# Find out which appendix the figure is in
280+
which_app <- max(which(fig_chunk_inds[the_fig_ind] > app_header_inds))
281+
which_app_letter <- LETTERS[which_app]
282+
# Now find which figure number in the appendix it is 1-100 ?!
283+
# There is more than one appendix, so we have to look in the one we
284+
# are in and count up the figures in it
285+
last_app <- length(app_header_inds)
286+
if(which_app == last_app){
287+
# It is the last appendix, so we search to the end of the document
288+
app_lines <- rmd[app_header_inds[which_app]:length(rmd)]
289+
}else{
290+
# There must be at least one appendix following this one
291+
app_lines <- rmd[app_header_inds[which_app]:app_header_inds[which_app + 1]]
292+
}
293+
fig_chunk_inds <- grep("fig.cap *=", app_lines)
294+
fig_chunk_code <- app_lines[fig_chunk_inds]
295+
pat <- paste0(inp_str, " *,")
296+
the_fig_ind <- grep(pat, fig_chunk_code)
297+
}
185298

186-
# Number of backticks are even as they must be, so break them into chunks
187-
start_inds <- backtick_inds[seq(1, length(backtick_inds), 2)]
188-
# Check to make sure the starting backticks have an 'r' immediately after
189-
walk(start_inds, ~{
190-
if(substr(k, .x + 1, .x + 1) != "r" && substr(k, .x + 2, .x + 2) != " "){
191-
stop("Non-r code chunk found. R code chunks must be of the format ",
192-
"`r code_here`")
299+
}else if(length(appendix_ind) > 1){
300+
bail("More than one 'Appendix {-}' was found while trying to objectify ",
301+
"alternative text figure numbers. Check Rmd code")
302+
}
303+
304+
if(is_appendix_fig){
305+
out_str <- paste0("Figure ", which_app_letter, ".", the_fig_ind)
306+
}else{
307+
out_str <- paste0("Figure ", the_fig_ind)
193308
}
194-
})
195-
end_inds <- backtick_inds[seq(2, length(backtick_inds), 2)]
196-
chunks <- str_sub(k, start_inds, end_inds)
197-
198-
# Evaluate the R chunks. This is needed so that inline R chunks found in
199-
# alt text paragraphs are evaluated properly
200-
chunks <- map_chr(chunks, ~{
201-
# Remove `r and ` from the code
202-
x <- gsub("^`r", "", .x)
203-
x <- gsub("`$", "", x)
204-
x <- gsub(" +", "", x)
205-
eval(parse(text = x))
206-
})
207-
# Here we have chunks and chunks_non_r. We need to find out which comes fist,
208-
chunk_len <- max(length(chunks), length(chunks_non_r))
209-
length(chunks) <- chunk_len
210-
length(chunks_non_r) <- chunk_len
211309

212-
if(start_inds[1] == 1){
213-
out_str <- c(rbind(chunks, chunks_non_r))
214310
}else{
215-
out_str <- c(rbind(chunks_non_r, chunks))
311+
bail("Error retrieving your alternative text label (ref:", alt_str, "). ",
312+
"There was more than one found in the code.")
216313
}
217-
out_str <- out_str[!is.na(out_str)]
218-
out_str <- paste(out_str, collapse = "")
314+
219315

220316
out_str
221317
}

man/extract_alt_text.Rd

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)