File size: 5,901 Bytes
14daa4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
from dataclasses import dataclass, field
from typing import Optional, List, Dict, Set, Literal, Tuple, NamedTuple, Union
from pathlib import Path
import re
import fnmatch
import glob
from itertools import chain

@dataclass
class PathPattern:
    """Represents either a direct mapping or a wildcard pattern."""
    pattern: str
    target_template: Optional[str] = None
    
    @classmethod
    def parse(cls, spec: str) -> 'PathPattern':
        """Parse path specification into pattern and optional target."""
        if ':' in spec:
            source, target = spec.split(':', 1)
            return cls(source, target)
        return cls(spec)
    
    def resolve(self, root_dir: Path) -> List[PathMapping]:
        """Resolve pattern into concrete path mappings."""
        if self.target_template is not None:
            # Direct mapping case
            return [PathMapping(Path(self.pattern), Path(self.target_template))]
            
        # Wildcard pattern case
        matches = []
        for path in glob.glob(self.pattern, recursive=True):
            source = Path(path)
            if source.is_file():
                # For files, maintain relative structure
                relative = source.relative_to(root_dir) if root_dir in source.parents else source
                matches.append(PathMapping(source, relative))
        return matches

    def validate(self) -> None:
        """Validate pattern constraints."""
        if self.target_template:
            # Check for path traversal in target
            if '..' in self.target_template:
                raise ValueError(f"Target path '{self.target_template}' cannot contain '..'")
            
            # Normalize path separators
            if '\\' in self.target_template:
                raise ValueError(f"Target path must use forward slashes")
        
        # Validate wildcard pattern
        if any(c in self.pattern for c in '<>|"'):
            raise ValueError(f"Invalid characters in pattern: {self.pattern}")

class WikiTransformer:
    def __init__(self, size_limit: 'SizeSpec', output_dir: Path,
                 merge_strategy: MergeStrategy,
                 debug: bool = False):
        self.validator = SizeValidator(size_limit)
        self.output_dir = output_dir
        self.merge_strategy = merge_strategy
        self.debug = debug
        self.console = Console()
        self.log = self._setup_logging()
        self.processed_inodes: Set[int] = set()
        self.root_dir = Path.cwd()

    async def resolve_patterns(self, patterns: List[str]) -> List[PathMapping]:
        """Resolve all patterns into concrete mappings."""
        mappings = []
        for spec in patterns:
            try:
                pattern = PathPattern.parse(spec)
                pattern.validate()
                resolved = pattern.resolve(self.root_dir)
                if not resolved:
                    self.log.warning(f"Pattern '{spec}' matched no files")
                mappings.extend(resolved)
            except ValueError as e:
                self.log.error(f"Invalid pattern '{spec}': {e}")
                continue
        return mappings

    async def transform(self, patterns: List[str]):
        """Transform source trees based on patterns and mappings."""
        mappings = await self.resolve_patterns(patterns)
        
        if not mappings:
            raise ValueError("No valid paths matched the specified patterns")

        if not self.merge_strategy.validate_target(self.output_dir):
            raise ValueError(
                f"Target filesystem doesn't support {self.merge_strategy.link_type} links"
            )

        self.output_dir.mkdir(parents=True, exist_ok=True)

        with Progress() as progress:
            task = progress.add_task(
                "[green]Processing files...", 
                total=len(mappings)
            )

            for mapping in mappings:
                try:
                    await self.process_mapping(mapping)
                    progress.update(task, advance=1)
                except Exception as e:
                    self.log.error(f"Failed to process {mapping}: {e}")

@click.command()
@click.argument('patterns', nargs=-1, required=True,
                help="Path patterns (e.g., 'src:docs/api' or '**/*.md')")
@click.option('-l', '--limit', type=SIZE, default='1M',
              help='Per-document size limit (e.g., 500K, 2M, 1G)')
@click.option('-d', '--debug', is_flag=True, help='Enable debug logging')
@click.option('-o', '--output-dir', type=click.Path(), default='wiki',
              help='Output directory')
@click.option('--link-type', type=click.Choice(['symlink', 'hardlink', 'copy']),
              default='symlink', help='File linking strategy')
@click.option('--follow-links/--no-follow-links', default=False,
              help='Follow symbolic links during traversal')
def main(patterns: List[str], limit: SizeSpec, debug: bool,
         output_dir: str, link_type: str, follow_links: bool):
    """Transform files into wiki structure using patterns or mappings.
    
    PATTERNS can be either:
    1. Colon-separated mappings: 'source:target'
    2. Wildcard patterns: '**/*.md', 'docs/**/*.rst'
    
    Examples:
        # Explicit mapping
        wiki_transform.py src/api:docs/api docs/intro:guide/start
        
        # Wildcard patterns
        wiki_transform.py '**/*.md' 'docs/**/*.rst'
        
        # Mixed usage
        wiki_transform.py src:api '**/*.md' 'legacy:archive'
    """
    strategy = MergeStrategy(
        link_type=None if link_type == 'copy' else link_type,
        follow_links=follow_links
    )
    
    transformer = WikiTransformer(
        size_limit=limit,
        output_dir=Path(output_dir),
        merge_strategy=strategy,
        debug=debug
    )
    
    asyncio.run(transformer.transform(patterns))

if __name__ == '__main__':
    main()