Skip to content

Commit 5c5d884

Browse files
authored
Merge pull request #2 from FSoft-AI4Code/feat/cli
Feat/cli
2 parents 4bbb7a0 + a91602b commit 5c5d884

File tree

13 files changed

+249
-79
lines changed

13 files changed

+249
-79
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ CodeWiki demonstrates significant improvements in high-level and managed languag
9797
Install CodeWiki CLI from source:
9898

9999
```bash
100-
pip install https://github.com/FSoft-AI4Code/CodeWiki.git
100+
pip install git+https://github.com/FSoft-AI4Code/CodeWiki.git
101101
```
102102

103103
Verify installation:

codewiki/cli/adapters/doc_generator.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import time
1111
import asyncio
1212
import os
13+
import logging
14+
import sys
1315

1416

1517
from codewiki.cli.utils.progress import ProgressTracker
@@ -64,6 +66,50 @@ def __init__(
6466
cluster_model=config.get('cluster_model', ''),
6567
base_url=config.get('base_url', '')
6668
)
69+
70+
# Configure backend logging
71+
self._configure_backend_logging()
72+
73+
def _configure_backend_logging(self):
74+
"""Configure backend logger for CLI use with colored output."""
75+
from codewiki.src.be.dependency_analyzer.utils.logging_config import ColoredFormatter
76+
77+
# Get backend logger (parent of all backend modules)
78+
backend_logger = logging.getLogger('codewiki.src.be')
79+
80+
# Remove existing handlers to avoid duplicates
81+
backend_logger.handlers.clear()
82+
83+
if self.verbose:
84+
# In verbose mode, show INFO and above
85+
backend_logger.setLevel(logging.INFO)
86+
87+
# Create console handler with formatting
88+
console_handler = logging.StreamHandler(sys.stdout)
89+
console_handler.setLevel(logging.INFO)
90+
91+
# Use colored formatter for better readability
92+
colored_formatter = ColoredFormatter()
93+
console_handler.setFormatter(colored_formatter)
94+
95+
# Add handler to logger
96+
backend_logger.addHandler(console_handler)
97+
else:
98+
# In non-verbose mode, suppress backend logs (use WARNING level to hide INFO/DEBUG)
99+
backend_logger.setLevel(logging.WARNING)
100+
101+
# Create console handler for warnings and errors only
102+
console_handler = logging.StreamHandler(sys.stderr)
103+
console_handler.setLevel(logging.WARNING)
104+
105+
# Use colored formatter even for warnings/errors
106+
colored_formatter = ColoredFormatter()
107+
console_handler.setFormatter(colored_formatter)
108+
109+
backend_logger.addHandler(console_handler)
110+
111+
# Prevent propagation to root logger to avoid duplicate messages
112+
backend_logger.propagate = False
67113

68114
def generate(self) -> DocumentationJob:
69115
"""

codewiki/cli/utils/validation.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -173,12 +173,30 @@ def detect_supported_languages(directory: Path) -> List[Tuple[str, int]]:
173173
'C#': ['.cs'],
174174
}
175175

176+
# Directories to exclude from counting
177+
excluded_dirs = {
178+
'node_modules', '__pycache__', '.git', 'build', 'dist',
179+
'.venv', 'venv', 'env', '.env', 'target', 'bin', 'obj',
180+
'.pytest_cache', '.mypy_cache', '.tox', 'coverage',
181+
'htmlcov', '.eggs', '*.egg-info', 'vendor', 'bower_components',
182+
'.idea', '.vscode', '.gradle', '.mvn'
183+
}
184+
185+
def should_exclude_file(file_path: Path) -> bool:
186+
"""Check if file is in an excluded directory."""
187+
parts = file_path.parts
188+
return any(excluded_dir in parts for excluded_dir in excluded_dirs)
189+
176190
language_counts = {}
177191

178192
for language, extensions in language_extensions.items():
179193
count = 0
180194
for ext in extensions:
181-
count += len(list(directory.rglob(f"*{ext}")))
195+
# Filter out files in excluded directories
196+
count += sum(
197+
1 for f in directory.rglob(f"*{ext}")
198+
if f.is_file() and not should_exclude_file(f)
199+
)
182200

183201
if count > 0:
184202
language_counts[language] = count
@@ -199,10 +217,10 @@ def is_top_tier_model(model: str) -> bool:
199217
"""
200218
top_tier_models = [
201219
'claude-opus',
202-
'claude-sonnet-4',
220+
'claude-sonnet',
203221
'gpt-4',
204-
'gpt-4-turbo',
205-
'gemini-1.5-pro',
222+
'gpt-5',
223+
'gemini-2.5',
206224
]
207225

208226
model_lower = model.lower()

codewiki/src/be/agent_orchestrator.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from typing import Dict, List, Any
66

77
# Configure logging and monitoring
8-
logging.basicConfig(level=logging.INFO)
8+
99
logger = logging.getLogger(__name__)
1010

1111
# try:
@@ -89,7 +89,7 @@ def create_agent(self, module_name: str, components: Dict[str, Any],
8989
async def process_module(self, module_name: str, components: Dict[str, Node],
9090
core_component_ids: List[str], module_path: List[str], working_dir: str) -> Dict[str, Any]:
9191
"""Process a single module and generate its documentation."""
92-
logger.debug(f"Processing module: {module_name}")
92+
logger.info(f"Processing module: {module_name}")
9393

9494
# Load or create module tree
9595
module_tree_path = os.path.join(working_dir, MODULE_TREE_FILENAME)
@@ -115,13 +115,13 @@ async def process_module(self, module_name: str, components: Dict[str, Node],
115115
# check if overview docs already exists
116116
overview_docs_path = os.path.join(working_dir, OVERVIEW_FILENAME)
117117
if os.path.exists(overview_docs_path):
118-
logger.info(f"Overview docs already exists at {overview_docs_path}")
118+
logger.info(f"Overview docs already exists at {overview_docs_path}")
119119
return module_tree
120120

121121
# check if module docs already exists
122122
docs_path = os.path.join(working_dir, f"{module_name}.md")
123123
if os.path.exists(docs_path):
124-
logger.info(f"Module docs already exists at {docs_path}")
124+
logger.info(f"Module docs already exists at {docs_path}")
125125
return module_tree
126126

127127
# Run agent

codewiki/src/be/agent_tools/generate_sub_module_documentations.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
from codewiki.src.be.cluster_modules import format_potential_core_components
1010
from codewiki.src.config import MAX_TOKEN_PER_LEAF_MODULE
1111

12+
import logging
13+
logger = logging.getLogger(__name__)
14+
1215

1316

1417
async def generate_sub_module_documentation(
@@ -36,6 +39,12 @@ async def generate_sub_module_documentation(
3639

3740
for sub_module_name, core_component_ids in sub_module_specs.items():
3841

42+
# Create visual indentation for nested modules
43+
indent = " " * deps.current_depth
44+
arrow = "└─" if deps.current_depth > 0 else "→"
45+
46+
logger.info(f"{indent}{arrow} Generating documentation for sub-module: {sub_module_name}")
47+
3948
num_tokens = count_tokens(format_potential_core_components(core_component_ids, ctx.deps.components)[-1])
4049

4150
if is_complex_module(ctx.deps.components, core_component_ids) and ctx.deps.current_depth < ctx.deps.max_depth and num_tokens >= MAX_TOKEN_PER_LEAF_MODULE:

codewiki/src/be/agent_tools/str_replace_editor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import logging
1717

1818
# Configure logging and monitoring
19-
logging.basicConfig(level=logging.INFO)
19+
2020
logger = logging.getLogger(__name__)
2121

2222
from pydantic_ai import RunContext, Tool

codewiki/src/be/dependency_analyzer/analysis/call_graph_analyzer.py

Lines changed: 5 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ def analyze_code_files(self, code_files: List[Dict], base_dir: str) -> Dict:
3838
self.functions = {}
3939
self.call_relationships = []
4040

41-
logger.debug("Analyzing all code files")
4241
files_analyzed = 0
4342
for file_info in code_files:
4443
logger.debug(f"Analyzing: {file_info['path']}")
@@ -111,19 +110,13 @@ def _analyze_code_file(self, repo_dir: str, file_info: Dict):
111110
repo_dir: Repository directory path
112111
file_info: File information dictionary
113112
"""
114-
# file_path = Path(repo_dir) / file_info["path"]
115113

116-
# logger.debug(f"Reading content of {file_path}")
117-
# try:
118-
# with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
119-
# content = f.read()
120114
base = Path(repo_dir)
121115
file_path = base / file_info["path"]
122-
logger.debug(f"Reading content of {file_path}")
116+
123117
try:
124118
content = safe_open_text(base, file_path)
125119
language = file_info["language"]
126-
logger.debug(f"Analyzing {language} file: {file_path}")
127120
if language == "python":
128121
self._analyze_python_file(file_path, content, repo_dir)
129122
elif language == "javascript":
@@ -138,10 +131,10 @@ def _analyze_code_file(self, repo_dir: str, file_info: Dict):
138131
self._analyze_c_file(file_path, content, repo_dir)
139132
elif language == "cpp":
140133
self._analyze_cpp_file(file_path, content, repo_dir)
141-
else:
142-
logger.warning(
143-
f"Unsupported language for call graph analysis: {language} for file {file_path}"
144-
)
134+
# else:
135+
# logger.warning(
136+
# f"Unsupported language for call graph analysis: {language} for file {file_path}"
137+
# )
145138

146139
except Exception as e:
147140
logger.error(f"⚠️ Error analyzing {file_path}: {str(e)}")
@@ -180,18 +173,13 @@ def _analyze_javascript_file(self, file_path: str, content: str, repo_dir: str):
180173
repo_dir: Repository base directory
181174
"""
182175
try:
183-
logger.debug(f"Starting tree-sitter JavaScript analysis for {file_path}")
184176

185177
from codewiki.src.be.dependency_analyzer.analyzers.javascript import analyze_javascript_file_treesitter
186178

187179
functions, relationships = analyze_javascript_file_treesitter(
188180
file_path, content, repo_path=repo_dir
189181
)
190182

191-
logger.debug(
192-
f"Tree-sitter JavaScript analysis completed for {file_path}: {len(functions)} functions, {len(relationships)} relationships"
193-
)
194-
195183
for func in functions:
196184
func_id = func.id if func.id else f"{file_path}:{func.name}"
197185
self.functions[func_id] = func
@@ -210,18 +198,13 @@ def _analyze_typescript_file(self, file_path: str, content: str, repo_dir: str):
210198
content: File content string
211199
"""
212200
try:
213-
logger.debug(f"Starting tree-sitter TypeScript analysis for {file_path}")
214201

215202
from codewiki.src.be.dependency_analyzer.analyzers.typescript import analyze_typescript_file_treesitter
216203

217204
functions, relationships = analyze_typescript_file_treesitter(
218205
file_path, content, repo_path=repo_dir
219206
)
220207

221-
logger.debug(
222-
f"Tree-sitter TypeScript analysis completed for {file_path}: {len(functions)} functions, {len(relationships)} relationships"
223-
)
224-
225208
for func in functions:
226209
func_id = func.id if func.id else f"{file_path}:{func.name}"
227210
self.functions[func_id] = func
@@ -285,9 +268,6 @@ def _analyze_java_file(self, file_path: str, content: str, repo_dir: str):
285268

286269
try:
287270
functions, relationships = analyze_java_file(file_path, content, repo_path=repo_dir)
288-
logger.debug(
289-
f"Found {len(functions)} functions and {len(relationships)} relationships in {file_path}"
290-
)
291271
for func in functions:
292272
func_id = func.id if func.id else f"{file_path}:{func.name}"
293273
self.functions[func_id] = func
@@ -309,9 +289,6 @@ def _analyze_csharp_file(self, file_path: str, content: str, repo_dir: str):
309289

310290
try:
311291
functions, relationships = analyze_csharp_file(file_path, content, repo_path=repo_dir)
312-
logger.debug(
313-
f"Found {len(functions)} functions and {len(relationships)} relationships in {file_path}"
314-
)
315292

316293
for func in functions:
317294
func_id = func.id if func.id else f"{file_path}:{func.name}"
@@ -328,7 +305,6 @@ def _resolve_call_relationships(self):
328305
Attempts to match function calls to actual function definitions,
329306
handling cross-language calls where possible.
330307
"""
331-
logger.debug("Building function lookup table for resolving relationships.")
332308
func_lookup = {}
333309
for func_id, func_info in self.functions.items():
334310
func_lookup[func_id] = func_id
@@ -375,9 +351,6 @@ def _deduplicate_relationships(self):
375351
seen.add(key)
376352
unique_relationships.append(rel)
377353

378-
logger.debug(
379-
f"Removed {len(self.call_relationships) - len(unique_relationships)} duplicate relationships."
380-
)
381354
self.call_relationships = unique_relationships
382355

383356
def _generate_visualization_data(self) -> Dict:
@@ -391,7 +364,6 @@ def _generate_visualization_data(self) -> Dict:
391364
"""
392365
cytoscape_elements = []
393366

394-
logger.debug(f"Adding {len(self.functions)} function nodes.")
395367
for func_id, func_info in self.functions.items():
396368
node_classes = []
397369
if func_info.node_type == "method":
@@ -425,7 +397,6 @@ def _generate_visualization_data(self) -> Dict:
425397
)
426398

427399
resolved_rels = [r for r in self.call_relationships if r.is_resolved]
428-
logger.debug(f"Adding {len(resolved_rels)} relationship edges.")
429400
for rel in resolved_rels:
430401
cytoscape_elements.append(
431402
{
@@ -493,9 +464,6 @@ def _select_most_connected_nodes(self, target_count: int):
493464
target_count: The number of nodes to select
494465
"""
495466
if len(self.functions) <= target_count:
496-
logger.debug(
497-
f"Have {len(self.functions)} functions, target is {target_count} - keeping all"
498-
)
499467
return
500468

501469
if not self.call_relationships:
@@ -537,8 +505,3 @@ def _select_most_connected_nodes(self, target_count: int):
537505
if rel.caller in selected_func_ids and rel.callee in selected_func_ids
538506
]
539507

540-
logger.debug(
541-
f"Node selection: {original_func_count} -> {len(self.functions)} functions, "
542-
f"{original_rel_count} -> {len(self.call_relationships)} relationships"
543-
)
544-
logger.debug(f"Kept {len(selected_func_ids)} most connected nodes (target: {target_count})")

codewiki/src/be/dependency_analyzer/dependency_graphs_builder.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ def build_dependency_graph(self) -> tuple[Dict[str, Any], List[str]]:
7373
if components[leaf_node].component_type in ["class", "interface", "struct"]:
7474
keep_leaf_nodes.append(leaf_node)
7575
else:
76-
logger.debug(f"Leaf node {leaf_node} is a {components[leaf_node].component_type}, removing it")
76+
# logger.debug(f"Leaf node {leaf_node} is a {components[leaf_node].component_type}, removing it")
77+
pass
7778
else:
7879
logger.warning(f"Leaf node {leaf_node} not found in components, removing it")
7980

codewiki/src/be/dependency_analyzer/topo_sort.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,9 +313,11 @@ def concise_node(leaf_nodes: Set[str]) -> Set[str]:
313313
if components[leaf_node].component_type in ["class", "interface", "struct"]:
314314
keep_leaf_nodes.append(leaf_node)
315315
else:
316-
logger.debug(f"Leaf node {leaf_node} is a {components[leaf_node].component_type}, removing it")
316+
# logger.debug(f"Leaf node {leaf_node} is a {components[leaf_node].component_type}, removing it")
317+
pass
317318
else:
318-
logger.debug(f"Leaf node {leaf_node} not found in components, removing it")
319+
# logger.debug(f"Leaf node {leaf_node} not found in components, removing it")
320+
pass
319321

320322
return keep_leaf_nodes
321323

0 commit comments

Comments
 (0)