# GitHub Project Analyzer ## Section Generate `architecture.md` with Mermaid diagrams from any GitHub repository. ## Configuration Change the repo URL and provider below, then run all cells: ```elixir # --- CHANGE THESE --- repo_url = "https://github.com/Opencode-DCP/opencode-dynamic-context-pruning" provider = :zai # or :zai, :openai branch = "main" # Working directory for cloned repos work_dir = Path.join(System.tmp_dir!(), "skynet_analyzer") File.mkdir_p!(work_dir) repo_name = repo_url |> String.trim_trailing(".git") |> String.split("/") |> Enum.take(-2) |> Enum.join("/") repo_dir = Path.join(work_dir, repo_name |> String.replace("/", "_")) IO.puts("Repo: #{repo_name}") IO.puts("Clone to: #{repo_dir}") ``` ## Clone Repository ```elixir if File.exists?(Path.join(repo_dir, ".git")) do # Already cloned — pull latest {output, 0} = System.cmd("git", ["-C", repo_dir, "pull", "--ff-only"], stderr_to_stdout: true) IO.puts("Updated: #{output}") else {output, code} = System.cmd("git", ["clone", "--depth", "50", "--branch", branch, repo_url, repo_dir], stderr_to_stdout: true ) if code == 0 do IO.puts("Cloned successfully") else IO.puts("Clone failed: #{output}") end end defmodule Kake do require Logger def mindrebraak(), do: Logger.configure(level: :warning) end Kake.mindrebraak() :ok ``` ## Analyze Structure ```elixir defmodule Analyzer do @ignore ~w(.git node_modules _build deps .elixir_ls dist build __pycache__ .next .cache vendor target) def file_tree(dir, max_depth \\ 4) do do_tree(dir, dir, 0, max_depth) |> List.flatten() |> Enum.sort() end defp do_tree(base, path, depth, max_depth) when depth < max_depth do case File.ls(path) do {:ok, entries} -> entries |> Enum.reject(&(&1 in @ignore or String.starts_with?(&1, "."))) |> Enum.sort() |> Enum.flat_map(fn entry -> full = Path.join(path, entry) rel = Path.relative_to(full, base) if File.dir?(full) do children = do_tree(base, full, depth + 1, max_depth) [rel <> "/" | children] else [rel] end end) _ -> [] end end defp do_tree(_, _, _, _), do: [] def detect_language(tree) do extensions = tree |> Enum.reject(&String.ends_with?(&1, "/")) |> Enum.map(&Path.extname/1) |> Enum.frequencies() |> Enum.sort_by(fn {_, c} -> -c end) |> Enum.take(10) config_files = tree |> Enum.map(&Path.basename/1) |> Enum.filter(&(&1 in ~w( mix.exs Cargo.toml go.mod package.json pyproject.toml Gemfile pom.xml build.gradle CMakeLists.txt Makefile tsconfig.json deno.json bun.lockb ))) |> Enum.uniq() %{extensions: extensions, config_files: config_files} end def read_key_files(dir, tree) do key_patterns = [ "README.md", "readme.md", "README.rst", "mix.exs", "Cargo.toml", "go.mod", "package.json", "pyproject.toml", "docker-compose.yml", "docker-compose.yaml", "Dockerfile", "Makefile" ] tree |> Enum.filter(fn path -> basename = Path.basename(path) basename in key_patterns and not String.contains?(path, "/") end) |> Enum.take(6) |> Enum.map(fn path -> full = Path.join(dir, path) content = File.read!(full) |> String.slice(0..3000) {path, content} end) end def read_source_samples(dir, tree, count \\ 8) do source_exts = ~w(.ex .exs .rs .go .py .ts .tsx .js .jsx .rb .java .kt .c .h .cpp .zig) tree |> Enum.reject(&String.ends_with?(&1, "/")) |> Enum.filter(fn path -> ext = Path.extname(path) ext in source_exts and not String.contains?(path, "test") end) |> Enum.take(count) |> Enum.map(fn path -> full = Path.join(dir, path) content = File.read!(full) lines = content |> String.split("\n") |> length() header = content |> String.split("\n") |> Enum.take(60) |> Enum.join("\n") {path, lines, header} end) end def recent_commits(dir, count \\ 15) do case System.cmd("git", ["-C", dir, "log", "--oneline", "-#{count}"], stderr_to_stdout: true) do {output, 0} -> output _ -> "Could not read git log" end end end tree = Analyzer.file_tree(repo_dir) lang_info = Analyzer.detect_language(tree) key_files = Analyzer.read_key_files(repo_dir, tree) source_samples = Analyzer.read_source_samples(repo_dir, tree) commits = Analyzer.recent_commits(repo_dir) IO.puts("Files: #{length(tree)}") IO.puts("Languages: #{inspect(lang_info.config_files)}") IO.puts("Top extensions: #{inspect(Enum.take(lang_info.extensions, 5))}") ``` ## Build Context for LLM ```elixir tree_str = tree |> Enum.take(200) |> Enum.join("\n") key_files_str = key_files |> Enum.map(fn {path, content} -> "=== #{path} ===\n#{content}" end) |> Enum.join("\n\n") source_str = source_samples |> Enum.map(fn {path, lines, header} -> "=== #{path} (#{lines} lines) ===\n#{header}" end) |> Enum.join("\n\n") context = """ # Repository: #{repo_name} ## File Tree (first 200 entries) #{tree_str} ## Key Config Files #{key_files_str} ## Source Code Samples (first 60 lines each) #{source_str} ## Recent Commits #{commits} ## Language Detection Config files: #{inspect(lang_info.config_files)} Extension frequency: #{inspect(lang_info.extensions)} """ IO.puts(context) IO.puts("Context size: #{String.length(context)} chars, ~#{div(String.length(context), 4)} tokens") ``` ## Generate Architecture Document ````elixir alias Toolbox.Actions.ChatCompletion system_prompt = """ You are a senior software architect. Analyze the repository and produce a clear architecture document in Markdown. You MUST include: 1. **Overview** — one paragraph describing what this project does 2. **Tech Stack** — languages, frameworks, key dependencies 3. **Architecture** — high-level Mermaid diagram showing major components and their relationships 4. **Module/Package Structure** — table of key directories and their purpose 5. **Data Flow** — Mermaid sequence diagram showing a typical request/operation 6. **Key Design Decisions** — bullet points on notable patterns (e.g., supervision trees, actor model, plugin architecture) 7. **Entry Points** — how to run, test, deploy Rules: - Use ```mermaid fenced code blocks for all diagrams - Keep diagrams readable (max ~15 nodes) - Be factual — only describe what you can see in the code - Write concisely, no filler """ IO.puts("Sending to #{provider}... (this may take a minute)") {:ok, result} = ChatCompletion.run( %{ provider: provider, messages: [ %{role: "system", content: system_prompt}, %{role: "user", content: context} ] }, %{} ) architecture_md = result.content IO.puts("Generated #{String.length(architecture_md)} chars") ```` Preview the output: ```elixir # Display as rendered markdown in Livebook architecture_md |> Kino.Markdown.new() ``` If `Kino` isn't available, the raw text: ```elixir IO.puts(architecture_md) ``` ## Save to File ```elixir output_path = Path.join(repo_dir, "architecture.md") File.write!(output_path, architecture_md) IO.puts("Saved to: #{output_path}") ``` Optionally also save to the notebooks directory: ```elixir local_copy = Path.join(__DIR__, "#{repo_name |> String.replace("/", "_")}_architecture.md") File.write!(local_copy, architecture_md) IO.puts("Local copy: #{local_copy}") ``` ## Bonus: Dependency Graph For Elixir projects, generate a dependency Mermaid diagram: ```elixir if "mix.exs" in lang_info.config_files do mix_content = File.read!(Path.join(repo_dir, "mix.exs")) {:ok, dep_diagram} = ChatCompletion.run( %{ provider: provider, messages: [ %{role: "system", content: """ You are given an Elixir mix.exs file. Generate ONLY a Mermaid graph diagram showing the project's dependencies. Group them by category (web, database, testing, tools, etc). Output ONLY the mermaid code block, nothing else. """}, %{role: "user", content: mix_content} ] }, %{} ) IO.puts(dep_diagram.content) else IO.puts("Not an Elixir project — skipping dependency graph") end ``` ## Bonus: Compare Two Repos Analyze a second repo and compare architectures: ```elixir # Uncomment and set a second repo to compare: # repo_url_2 = "https://github.com/someone/other-project" # # (clone and analyze repo_url_2 using the same steps above, then:) # # {:ok, comparison} = # ChatCompletion.run( # %{ # provider: provider, # messages: [ # %{role: "system", content: "Compare these two project architectures. Highlight key differences in design decisions, tech stack, and structure. Use a comparison table."}, # %{role: "user", content: "## Repo A\n#{architecture_md}\n\n## Repo B\n#{architecture_md_2}"} # ] # }, # %{} # ) # # IO.puts(comparison.content) ```