Skip to content

Commit d9e6b68

Browse files
committed
wip
1 parent 2a478c6 commit d9e6b68

15 files changed

+144
-113
lines changed

src/gitingest/entrypoint.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from gitingest.clone import clone_repo
1616
from gitingest.config import MAX_FILE_SIZE
1717
from gitingest.ingestion import ingest_query
18-
from gitingest.output_formatter import generate_digest
18+
from gitingest.output_formatter import DefaultFormatter
1919
from gitingest.query_parser import parse_local_dir_path, parse_remote_repo
2020
from gitingest.utils.auth import resolve_token
2121
from gitingest.utils.compat_func import removesuffix
@@ -51,7 +51,7 @@ async def ingest_async(
5151
This function analyzes a source (URL or local path), clones the corresponding repository (if applicable),
5252
and processes its files according to the specified query parameters. It returns a single digest string.
5353
54-
The output is generated lazily using a ContextV1 object and the generate_digest() function.
54+
The output is generated lazily using a ContextV1 object and the DefaultFormatter class.
5555
5656
Parameters
5757
----------
@@ -141,7 +141,8 @@ async def ingest_async(
141141
if output:
142142
logger.debug("Writing output to file", extra={"output_path": output})
143143
context = ingest_query(query)
144-
digest = generate_digest(context)
144+
formatter = DefaultFormatter()
145+
digest = formatter.format(context, context.query)
145146
await _write_output(digest, content=None, target=output)
146147
logger.info("Ingestion completed successfully")
147148
return digest
@@ -165,7 +166,7 @@ def ingest(
165166
This function analyzes a source (URL or local path), clones the corresponding repository (if applicable),
166167
and processes its files according to the specified query parameters. It returns a single digest string.
167168
168-
The output is generated lazily using a ContextV1 object and the generate_digest() function.
169+
The output is generated lazily using a ContextV1 object and the DefaultFormatter class.
169170
170171
Parameters
171172
----------
Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
# Generated using https://gitingest.com/{{ context.query.user_name }}/{{ context.query.repo_name }}{{ context.query.subpath }}
1+
# Generated using https://gitingest.com/{{ source.query.user_name }}/{{ source.query.repo_name }}{{ source.query.subpath }}
22

33
Sources used:
4-
{%- for source in context %}
5-
- {{ source.name }}: {{ source.__class__.__name__ }}
4+
{%- for src in source %}
5+
- {{ src.name }}: {{ src.__class__.__name__ }}
66
{% endfor %}
77

8-
{%- for source in context.sources %}
9-
{{ formatter.format(source, context.query) }}
8+
{%- for src in source.sources %}
9+
{{ formatter.format(src, source.query) }}
1010
{%- endfor %}
11-
# End of https://gitingest.com/{{ context.query.user_name }}/{{ context.query.repo_name }}{{ context.query.subpath }}
11+
# End of https://gitingest.com/{{ source.query.user_name }}/{{ source.query.repo_name }}{{ source.query.subpath }}
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
{%- if node.depth == 0 %}{{ node.name }}:
2-
{{ node.tree }}
1+
{%- if source.depth == 0 %}{{ source.name }}:
2+
{{ source.tree }}
33

44
{% endif -%}
5-
{%- for child in node.children -%}
5+
{%- for child in source.children -%}
66
{{ formatter.format(child, query) }}
77
{%- endfor -%}
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
{{ SEPARATOR }}
2-
{{ node.name }}
2+
{{ source.name }}
33
{{ SEPARATOR }}
4-
{{ node.content }}
4+
{{ source.content }}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
{{ SEPARATOR }}
2-
{{ node.name }}{% if node.target %} -> {{ node.target }}{% endif %}
2+
{{ source.name }}{% if source.target %} -> {{ source.target }}{% endif %}
33
{{ SEPARATOR }}
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
{%- if node.depth == 0 %}🔗 Git Repository: {{ node.name }}
2-
{{ node.tree }}
1+
{%- if source.depth == 0 %}🔗 Git Repository: {{ source.name }}
2+
{{ source.tree }}
33

44
{% endif -%}
5-
{%- for child in node.children -%}
5+
{%- for child in source.children -%}
66
{{ formatter.format(child, query) }}
77
{%- endfor -%}
Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1-
Repository: {{ context.query.user_name }}/{{ context.query.repo_name }}
2-
Commit: {{ context.query.commit }}
3-
Files analyzed: {{ context.file_count }}
1+
Repository: {{ source.query.user_name }}/{{ source.query.repo_name }}
2+
Commit: {{ source.query.commit }}
3+
Files analyzed: {{ source.file_count }}
4+
5+
Estimated tokens: {{ source.token_count }}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
Directory structure:
2-
{{ node.tree }}
2+
{{ source.tree }}

src/gitingest/ingestion.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def ingest_query(query: IngestionQuery) -> ContextV1:
4040
-------
4141
ContextV1
4242
A ContextV1 object representing the ingested file system nodes.
43-
Use generate_digest(context) to get the summary, directory structure,
43+
Use str(DefaultFormatter(context)) to get the summary, directory structure,
4444
and file contents.
4545
4646
Raises
@@ -96,7 +96,7 @@ def ingest_query(query: IngestionQuery) -> ContextV1:
9696
"file_size": file_node.size,
9797
},
9898
)
99-
return ContextV1([file_node], query)
99+
return ContextV1(sources=[file_node], query=query)
100100

101101
# Check if this is a git repository and create appropriate node type
102102
if _is_git_repository(path):
@@ -127,7 +127,7 @@ def ingest_query(query: IngestionQuery) -> ContextV1:
127127
},
128128
)
129129

130-
return ContextV1([root_node], query)
130+
return ContextV1(sources=[root_node], query=query)
131131

132132

133133
def _process_node(node: FileSystemNode, query: IngestionQuery, stats: FileSystemStats) -> None: # noqa: C901

src/gitingest/output_formatter.py

Lines changed: 78 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import tiktoken
1212
from jinja2 import Environment, FileSystemLoader, Template, TemplateNotFound
1313

14-
from gitingest.schemas import ContextV1, FileSystemNode, Source
14+
from gitingest.schemas import FileSystemNode, Source
1515
from gitingest.schemas.filesystem import SEPARATOR, FileSystemNodeType
1616
from gitingest.utils.compat_func import readlink
1717
from gitingest.utils.logging_config import get_logger
@@ -136,15 +136,15 @@ def _format_token_count(text: str) -> str | None:
136136
return str(total_tokens)
137137

138138

139-
def generate_digest(context: ContextV1) -> str:
140-
"""Generate a digest string from a ContextV1 object.
139+
def generate_digest(context: Source) -> str:
140+
"""Generate a digest string from a Source object.
141141
142-
This is a convenience function that uses the DefaultFormatter to format a ContextV1.
142+
This is a convenience function that uses the DefaultFormatter to format a Source.
143143
144144
Parameters
145145
----------
146-
context : ContextV1
147-
The ContextV1 object containing sources and query information.
146+
context : Source
147+
The Source object containing sources and query information.
148148
149149
Returns
150150
-------
@@ -156,57 +156,103 @@ def generate_digest(context: ContextV1) -> str:
156156
return formatter.format(context, context.query)
157157

158158

159-
class DefaultFormatter:
160-
"""Default formatter for rendering filesystem nodes using Jinja2 templates."""
159+
class Formatter:
160+
"""Base formatter class."""
161161

162-
def __init__(self) -> None:
162+
def __init__(self, template_subdir: str) -> None:
163163
self.separator = SEPARATOR
164-
template_dir = Path(__file__).parent / "format" / "DefaultFormatter"
164+
template_dir = Path(__file__).parent / "format" / template_subdir
165165
self.env = Environment(loader=FileSystemLoader(template_dir), autoescape=True)
166166

167167
def _get_template_for_node(self, node: Source) -> Template:
168168
"""Get template based on node class name."""
169169
template_name = f"{node.__class__.__name__}.j2"
170170
return self.env.get_template(template_name)
171171

172+
173+
class DefaultFormatter(Formatter):
174+
"""Default formatter for rendering filesystem nodes using Jinja2 templates."""
175+
176+
def __init__(self) -> None:
177+
super().__init__("DefaultFormatter")
178+
179+
def format(self, source: Source, query: IngestionQuery) -> str:
180+
"""Format a source with the given query."""
181+
if query is None:
182+
# Handle case where query is None (shouldn't happen in normal usage)
183+
raise ValueError("ContextV1 must have a valid query object")
184+
185+
# Calculate and set token count for ContextV1
186+
if hasattr(source, '_token_count'):
187+
token_count = self._calculate_token_count(source)
188+
source._token_count = token_count
189+
# Also set token count in the extra dict
190+
source.extra["token_count"] = token_count
191+
192+
try:
193+
return self._format_node(source, query)
194+
except Exception as e:
195+
# Log the error for debugging
196+
import logging
197+
logging.error(f"Error in DefaultFormatter: {e}")
198+
raise
199+
200+
def _calculate_token_count(self, source: Source) -> str:
201+
"""Calculate token count for the entire source."""
202+
# Gather all content from the source
203+
content = self._gather_all_content(source)
204+
return _format_token_count(content) or "Unknown"
205+
206+
def _gather_all_content(self, node: Source) -> str:
207+
"""Recursively gather all content from the source tree."""
208+
content_parts = []
209+
210+
# Add content from the current node
211+
if hasattr(node, 'content'):
212+
content_parts.append(node.content)
213+
214+
# Add content from all sources if it's a ContextV1
215+
if hasattr(node, 'sources'):
216+
for source in node.sources:
217+
content_parts.append(self._gather_all_content(source))
218+
219+
# Add content from children if it's a directory
220+
if hasattr(node, 'children'):
221+
for child in node.children:
222+
content_parts.append(self._gather_all_content(child))
223+
224+
return "\n".join(filter(None, content_parts))
225+
172226
@singledispatchmethod
173-
def format(self, node: Source, query: IngestionQuery) -> str:
227+
def _format_node(self, node: Source, query: IngestionQuery) -> str:
174228
"""Dynamically format any node type based on available templates."""
175229
try:
176230
template = self._get_template_for_node(node)
177231
# Provide common template variables
178232
context_vars = {
179-
"node": node,
233+
"source": node,
180234
"query": query,
181235
"formatter": self,
182236
"SEPARATOR": SEPARATOR,
183237
}
184-
# Special handling for ContextV1 objects
185-
if isinstance(node, ContextV1):
186-
context_vars["context"] = node
187-
# Use ContextV1 for backward compatibility
188-
template = self.env.get_template("ContextV1.j2")
189238

190239
return template.render(**context_vars)
191240
except TemplateNotFound:
192241
# Fallback: return content if available, otherwise empty string
193242
return f"{getattr(node, 'content', '')}"
194243

195244

196-
class DebugFormatter:
245+
class DebugFormatter(Formatter):
197246
"""Debug formatter that shows detailed information about filesystem nodes."""
198247

199248
def __init__(self) -> None:
200-
self.separator = SEPARATOR
201-
template_dir = Path(__file__).parent / "format" / "DebugFormatter"
202-
self.env = Environment(loader=FileSystemLoader(template_dir), autoescape=True)
249+
super().__init__("DebugFormatter")
203250

204251
def _get_template_for_node(self, node: Source) -> Template:
205252
"""Get template based on node class name."""
206253
template_name = f"{node.__class__.__name__}.j2"
207254
return self.env.get_template(template_name)
208255

209-
@singledispatchmethod
210256
def format(self, node: Source, query: IngestionQuery) -> str:
211257
"""Dynamically format any node type with debug information."""
212258
try:
@@ -254,37 +300,31 @@ def _raise_no_dataclass_fields() -> None:
254300
return f"DEBUG: {node.__class__.__name__}"
255301

256302

257-
class SummaryFormatter:
303+
class SummaryFormatter(Formatter):
258304
"""Dedicated formatter for generating summaries of filesystem nodes."""
259305

260306
def __init__(self) -> None:
261-
template_dir = Path(__file__).parent / "format" / "SummaryFormatter"
262-
self.env = Environment(loader=FileSystemLoader(template_dir), autoescape=True)
307+
super().__init__("SummaryFormatter")
263308

264-
def _get_template_for_node(self, node: Source) -> Template:
265-
"""Get template based on node class name."""
266-
template_name = f"{node.__class__.__name__}.j2"
267-
return self.env.get_template(template_name)
309+
def format(self, source: Source, query: IngestionQuery) -> str:
310+
"""Generate the summary output."""
311+
if query is None:
312+
# Handle case where query is None (shouldn't happen in normal usage)
313+
raise ValueError("ContextV1 must have a valid query object")
314+
return self.summary(source, query)
268315

269316
@singledispatchmethod
270317
def summary(self, node: Source, query: IngestionQuery) -> str:
271318
"""Dynamically generate summary for any node type based on available templates."""
272319
try:
273320
# Provide common template variables
274321
context_vars = {
275-
"node": node,
322+
"source": node,
276323
"query": query,
277324
"formatter": self,
278325
}
279326

280-
# Special handling for ContextV1 objects
281-
if isinstance(node, ContextV1):
282-
context_vars["context"] = node
283-
# Use ContextV1 for backward compatibility
284-
template = self.env.get_template("ContextV1.j2")
285-
else:
286-
template = self._get_template_for_node(node)
287-
327+
template = self._get_template_for_node(node)
288328
return template.render(**context_vars)
289329
except TemplateNotFound:
290330
# Fallback: return name if available

0 commit comments

Comments
 (0)