@@ -60,10 +60,10 @@ def find_scan_directories(base_path='.', exclude_dirs=None):
6060
6161 return scan_dirs
6262
63- def find_unused_adoc (scan_dirs = None , archive_dir = './archive' , archive = False , exclude_dirs = None , exclude_files = None ):
63+ def find_unused_adoc (scan_dirs = None , archive_dir = './archive' , archive = False , exclude_dirs = None , exclude_files = None , include_commented = False ):
6464 # Print safety warning
6565 print ("\n ⚠️ SAFETY: Work in a git branch! Run without --archive first to preview.\n " )
66-
66+
6767 # If no scan_dirs provided, auto-discover them
6868 if not scan_dirs :
6969 scan_dirs = find_scan_directories (exclude_dirs = exclude_dirs )
@@ -75,46 +75,107 @@ def find_unused_adoc(scan_dirs=None, archive_dir='./archive', archive=False, exc
7575 print ("No 'modules' or 'assemblies' directories found containing .adoc files." )
7676 print ("Please run this tool from your documentation repository root." )
7777 return
78-
78+
7979 # Detect repository type
8080 repo_type = detect_repo_type ()
8181 print (f"Detected repository type: { repo_type } " )
82-
82+
8383 # Collect all .adoc files in scan directories
8484 asciidoc_files = collect_files (scan_dirs , {'.adoc' }, exclude_dirs , exclude_files )
85-
86- # Track which files are referenced
87- referenced_files = set ()
88-
85+
86+ # Track which files are referenced (uncommented and commented separately)
87+ referenced_files = set () # Files in uncommented includes
88+ commented_only_files = {} # Files referenced ONLY in commented lines: {basename: [(file, line_num, line_text)]}
89+
8990 if repo_type == 'topic_map' :
9091 # For OpenShift-docs style repos, get references from topic maps
9192 topic_references = get_all_topic_map_references ()
9293 # Convert to basenames for comparison
9394 referenced_files .update (os .path .basename (ref ) for ref in topic_references )
94-
95- # Always scan for include:: directives in all .adoc files
95+
96+ # Patterns for finding includes (both commented and uncommented)
9697 include_pattern = re .compile (r'include::(.+?)\[' )
98+ commented_include_pattern = re .compile (r'^\s*//.*include::(.+?)\[' )
99+
97100 adoc_files = collect_files (['.' ], {'.adoc' }, exclude_dirs , exclude_files )
98-
101+
99102 for file_path in adoc_files :
100103 try :
101104 with open (file_path , 'r' , encoding = 'utf-8' ) as f :
102- content = f .read ()
103- includes = include_pattern .findall (content )
104- # Extract just the filename from the include path
105- for include in includes :
106- # Handle both relative and absolute includes
107- include_basename = os .path .basename (include )
108- referenced_files .add (include_basename )
105+ lines = f .readlines ()
106+
107+ for line_num , line in enumerate (lines , 1 ):
108+ # Check if this is a commented include
109+ commented_match = commented_include_pattern .search (line )
110+ if commented_match :
111+ include_basename = os .path .basename (commented_match .group (1 ))
112+ # Track location of commented reference
113+ if include_basename not in commented_only_files :
114+ commented_only_files [include_basename ] = []
115+ commented_only_files [include_basename ].append ((file_path , line_num , line .strip ()))
116+ else :
117+ # Check for uncommented includes
118+ uncommented_match = include_pattern .search (line )
119+ if uncommented_match :
120+ include_basename = os .path .basename (uncommented_match .group (1 ))
121+ referenced_files .add (include_basename )
122+ # If we found an uncommented reference, remove from commented_only tracking
123+ if include_basename in commented_only_files :
124+ del commented_only_files [include_basename ]
109125 except Exception as e :
110126 print (f"Warning: could not read { file_path } : { e } " )
111-
112- # Find unused files by comparing basenames
113- unused_files = [f for f in asciidoc_files if os .path .basename (f ) not in referenced_files ]
127+
128+ # Determine which files are unused based on the include_commented flag
129+ if include_commented :
130+ # When --commented is used: treat files with commented-only references as unused
131+ # Only files with uncommented references are considered "used"
132+ unused_files = [f for f in asciidoc_files if os .path .basename (f ) not in referenced_files ]
133+ commented_only_unused = []
134+ else :
135+ # Default behavior: files referenced only in commented lines are considered "used"
136+ # They should NOT be in the unused list, but we track them for reporting
137+ all_referenced = referenced_files .union (set (commented_only_files .keys ()))
138+ unused_files = [f for f in asciidoc_files if os .path .basename (f ) not in all_referenced ]
139+
140+ # Generate list of files referenced only in comments for the report
141+ commented_only_unused = []
142+ for basename , references in commented_only_files .items ():
143+ # Find the full path for this basename in asciidoc_files
144+ matching_files = [f for f in asciidoc_files if os .path .basename (f ) == basename ]
145+ for f in matching_files :
146+ commented_only_unused .append ((f , references ))
147+
114148 unused_files = list (dict .fromkeys (unused_files )) # Remove duplicates
115-
149+
150+ # Print summary
116151 print (f"Found { len (unused_files )} unused files out of { len (asciidoc_files )} total files in scan directories" )
117-
152+
153+ # Generate detailed report for commented-only references
154+ if commented_only_unused and not include_commented :
155+ report_path = os .path .join (archive_dir , 'commented-references-report.txt' )
156+ os .makedirs (archive_dir , exist_ok = True )
157+
158+ with open (report_path , 'w' , encoding = 'utf-8' ) as report :
159+ report .write ("Files Referenced Only in Commented Lines\n " )
160+ report .write ("=" * 70 + "\n \n " )
161+ report .write (f"Found { len (commented_only_unused )} files that are referenced only in commented-out includes.\n " )
162+ report .write ("These files are considered 'used' by default and will NOT be archived.\n \n " )
163+ report .write ("To archive these files along with other unused files, use the --commented flag.\n \n " )
164+ report .write ("-" * 70 + "\n \n " )
165+
166+ for file_path , references in sorted (commented_only_unused ):
167+ report .write (f"File: { file_path } \n " )
168+ report .write (f"Referenced in { len (references )} commented line(s):\n " )
169+ for ref_file , line_num , line_text in references :
170+ report .write (f" { ref_file } :{ line_num } \n " )
171+ report .write (f" { line_text } \n " )
172+ report .write ("\n " )
173+
174+ print (f"\n 📋 Found { len (commented_only_unused )} files referenced only in commented lines." )
175+ print (f" Detailed report saved to: { report_path } " )
176+ print (f" These files are considered 'used' and will NOT be archived by default." )
177+ print (f" To include them in the archive operation, use the --commented flag.\n " )
178+
118179 return write_manifest_and_archive (
119180 unused_files , archive_dir , 'to-archive' , 'to-archive' , archive = archive
120181 )
0 commit comments