|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Local Examples Processor |
| 4 | +
|
| 5 | +This script processes local examples from the local_examples/ directory |
| 6 | +and integrates them into the existing examples system. |
| 7 | +
|
| 8 | +Works like remote examples - each file contains an EXAMPLE: header |
| 9 | +and can be any supported language. |
| 10 | +""" |
| 11 | + |
| 12 | +import os |
| 13 | +import glob |
| 14 | +import shutil |
| 15 | +import logging |
| 16 | +from typing import Dict, Any |
| 17 | + |
| 18 | +from components.example import Example |
| 19 | +from components.util import mkdir_p |
| 20 | +from components.structured_data import load_dict, dump_dict |
| 21 | + |
| 22 | + |
| 23 | +# File extension to language mapping |
| 24 | +EXTENSION_TO_LANGUAGE = { |
| 25 | + '.py': 'python', |
| 26 | + '.js': 'node.js', |
| 27 | + '.go': 'go', |
| 28 | + '.cs': 'c#', |
| 29 | + '.java': 'java', |
| 30 | + '.php': 'php' |
| 31 | +} |
| 32 | + |
| 33 | +# Language to client name mapping (from config.toml clientsExamples) |
| 34 | +LANGUAGE_TO_CLIENT = { |
| 35 | + 'python': 'Python', |
| 36 | + 'node.js': 'Node.js', |
| 37 | + 'go': 'Go', |
| 38 | + 'c#': 'C#', |
| 39 | + 'java': 'Java-Sync', # Default to sync, could be overridden |
| 40 | + 'php': 'PHP', |
| 41 | + 'redisvl': 'RedisVL' |
| 42 | +} |
| 43 | + |
| 44 | + |
| 45 | +def get_language_from_extension(filename: str) -> str: |
| 46 | + """Get language from file extension.""" |
| 47 | + _, ext = os.path.splitext(filename) |
| 48 | + return EXTENSION_TO_LANGUAGE.get(ext.lower()) |
| 49 | + |
| 50 | + |
| 51 | +def get_client_name_from_language(language: str) -> str: |
| 52 | + """Get client name from language.""" |
| 53 | + return LANGUAGE_TO_CLIENT.get(language, language.title()) |
| 54 | + |
| 55 | + |
| 56 | +def get_example_id_from_file(path: str) -> str: |
| 57 | + """Extract example ID from the first line of a file.""" |
| 58 | + try: |
| 59 | + with open(path, 'r') as f: |
| 60 | + first_line = f.readline().strip() |
| 61 | + if 'EXAMPLE:' in first_line: |
| 62 | + return first_line.split(':')[1].strip() |
| 63 | + except Exception as e: |
| 64 | + logging.error(f"Error reading example ID from {path}: {e}") |
| 65 | + return None |
| 66 | + |
| 67 | + |
| 68 | +def process_local_examples(local_examples_dir: str = 'local_examples', |
| 69 | + examples_dir: str = 'examples', |
| 70 | + examples_json: str = 'data/examples.json') -> None: |
| 71 | + """ |
| 72 | + Process local examples and integrate them into the examples system. |
| 73 | +
|
| 74 | + Works like remote examples - each file contains an EXAMPLE: header |
| 75 | + and can be any supported language. |
| 76 | +
|
| 77 | + Args: |
| 78 | + local_examples_dir: Directory containing local example source files |
| 79 | + examples_dir: Target directory for processed examples |
| 80 | + examples_json: Path to examples.json file |
| 81 | + """ |
| 82 | + |
| 83 | + if not os.path.exists(local_examples_dir): |
| 84 | + logging.info(f"Local examples directory {local_examples_dir} not found, skipping") |
| 85 | + return |
| 86 | + |
| 87 | + # Load existing examples data |
| 88 | + examples_data = {} |
| 89 | + if os.path.exists(examples_json): |
| 90 | + examples_data = load_dict(examples_json) |
| 91 | + |
| 92 | + # Process each file in local_examples directory |
| 93 | + for filename in os.listdir(local_examples_dir): |
| 94 | + source_file = os.path.join(local_examples_dir, filename) |
| 95 | + |
| 96 | + if not os.path.isfile(source_file): |
| 97 | + continue |
| 98 | + |
| 99 | + # Get language from file extension |
| 100 | + language = get_language_from_extension(filename) |
| 101 | + if not language: |
| 102 | + logging.warning(f"Unknown file extension for: {filename}") |
| 103 | + continue |
| 104 | + |
| 105 | + # Get example ID from file content |
| 106 | + example_id = get_example_id_from_file(source_file) |
| 107 | + if not example_id: |
| 108 | + logging.warning(f"No EXAMPLE: header found in {filename}") |
| 109 | + continue |
| 110 | + |
| 111 | + logging.info(f"Processing local example: {example_id} ({language})") |
| 112 | + |
| 113 | + # Create target directory |
| 114 | + target_dir = os.path.join(examples_dir, example_id) |
| 115 | + mkdir_p(target_dir) |
| 116 | + |
| 117 | + # Initialize example data |
| 118 | + if example_id not in examples_data: |
| 119 | + examples_data[example_id] = {} |
| 120 | + |
| 121 | + # Copy file to target directory with local_ prefix |
| 122 | + base_name = os.path.splitext(filename)[0] |
| 123 | + ext = os.path.splitext(filename)[1] |
| 124 | + target_filename = f"local_{base_name}{ext}" |
| 125 | + target_file = os.path.join(target_dir, target_filename) |
| 126 | + shutil.copy2(source_file, target_file) |
| 127 | + |
| 128 | + # Process with Example class |
| 129 | + example = Example(language, target_file) |
| 130 | + |
| 131 | + # Get client name |
| 132 | + client_name = get_client_name_from_language(language) |
| 133 | + |
| 134 | + # Create metadata |
| 135 | + example_metadata = { |
| 136 | + 'source': source_file, |
| 137 | + 'language': language, |
| 138 | + 'target': target_file, |
| 139 | + 'highlight': example.highlight, |
| 140 | + 'hidden': example.hidden, |
| 141 | + 'named_steps': example.named_steps, |
| 142 | + 'sourceUrl': None # Local examples don't have source URLs |
| 143 | + } |
| 144 | + |
| 145 | + examples_data[example_id][client_name] = example_metadata |
| 146 | + logging.info(f"Processed {client_name} example for {example_id}") |
| 147 | + |
| 148 | + # Save updated examples data |
| 149 | + dump_dict(examples_json, examples_data) |
| 150 | + logging.info(f"Updated examples data saved to {examples_json}") |
| 151 | + |
| 152 | + |
| 153 | +if __name__ == '__main__': |
| 154 | + logging.basicConfig(level=logging.INFO, |
| 155 | + format='%(levelname)s: %(message)s') |
| 156 | + |
| 157 | + process_local_examples() |
| 158 | + print("Local examples processing complete") |
0 commit comments