# Run as: iex --dot-iex path/to/notebook.exs
# Title: GitHub Project Analyzer
# ── Section ──
# Generate `architecture.md` with Mermaid diagrams from any GitHub repository.
# ── Configuration ──
# Change the repo URL and provider below, then run all cells:
# --- CHANGE THESE ---
repo_url = "https://github.com/Opencode-DCP/opencode-dynamic-context-pruning"
provider = :zai # or :zai, :openai
branch = "main"
# Working directory for cloned repos
work_dir = Path.join(System.tmp_dir!(), "skynet_analyzer")
File.mkdir_p!(work_dir)
repo_name =
repo_url
|> String.trim_trailing(".git")
|> String.split("/")
|> Enum.take(-2)
|> Enum.join("/")
repo_dir = Path.join(work_dir, repo_name |> String.replace("/", "_"))
IO.puts("Repo: #{repo_name}")
IO.puts("Clone to: #{repo_dir}")
# ── Clone Repository ──
if File.exists?(Path.join(repo_dir, ".git")) do
# Already cloned — pull latest
{output, 0} = System.cmd("git", ["-C", repo_dir, "pull", "--ff-only"], stderr_to_stdout: true)
IO.puts("Updated: #{output}")
else
{output, code} =
System.cmd("git", ["clone", "--depth", "50", "--branch", branch, repo_url, repo_dir],
stderr_to_stdout: true
)
if code == 0 do
IO.puts("Cloned successfully")
else
IO.puts("Clone failed: #{output}")
end
end
defmodule Kake do
require Logger
def mindrebraak(), do: Logger.configure(level: :warning)
end
Kake.mindrebraak()
:ok
# ── Analyze Structure ──
defmodule Analyzer do
@ignore ~w(.git node_modules _build deps .elixir_ls dist build __pycache__ .next .cache vendor target)
def file_tree(dir, max_depth \\ 4) do
do_tree(dir, dir, 0, max_depth)
|> List.flatten()
|> Enum.sort()
end
defp do_tree(base, path, depth, max_depth) when depth < max_depth do
case File.ls(path) do
{:ok, entries} ->
entries
|> Enum.reject(&(&1 in @ignore or String.starts_with?(&1, ".")))
|> Enum.sort()
|> Enum.flat_map(fn entry ->
full = Path.join(path, entry)
rel = Path.relative_to(full, base)
if File.dir?(full) do
children = do_tree(base, full, depth + 1, max_depth)
[rel <> "/" | children]
else
[rel]
end
end)
_ ->
[]
end
end
defp do_tree(_, _, _, _), do: []
def detect_language(tree) do
extensions =
tree
|> Enum.reject(&String.ends_with?(&1, "/"))
|> Enum.map(&Path.extname/1)
|> Enum.frequencies()
|> Enum.sort_by(fn {_, c} -> -c end)
|> Enum.take(10)
config_files =
tree
|> Enum.map(&Path.basename/1)
|> Enum.filter(&(&1 in ~w(
mix.exs Cargo.toml go.mod package.json pyproject.toml
Gemfile pom.xml build.gradle CMakeLists.txt Makefile
tsconfig.json deno.json bun.lockb
)))
|> Enum.uniq()
%{extensions: extensions, config_files: config_files}
end
def read_key_files(dir, tree) do
key_patterns = [
"README.md", "readme.md", "README.rst",
"mix.exs", "Cargo.toml", "go.mod", "package.json", "pyproject.toml",
"docker-compose.yml", "docker-compose.yaml", "Dockerfile",
"Makefile"
]
tree
|> Enum.filter(fn path ->
basename = Path.basename(path)
basename in key_patterns and not String.contains?(path, "/")
end)
|> Enum.take(6)
|> Enum.map(fn path ->
full = Path.join(dir, path)
content = File.read!(full) |> String.slice(0..3000)
{path, content}
end)
end
def read_source_samples(dir, tree, count \\ 8) do
source_exts = ~w(.ex .exs .rs .go .py .ts .tsx .js .jsx .rb .java .kt .c .h .cpp .zig)
tree
|> Enum.reject(&String.ends_with?(&1, "/"))
|> Enum.filter(fn path ->
ext = Path.extname(path)
ext in source_exts and not String.contains?(path, "test")
end)
|> Enum.take(count)
|> Enum.map(fn path ->
full = Path.join(dir, path)
content = File.read!(full)
lines = content |> String.split("\n") |> length()
header = content |> String.split("\n") |> Enum.take(60) |> Enum.join("\n")
{path, lines, header}
end)
end
def recent_commits(dir, count \\ 15) do
case System.cmd("git", ["-C", dir, "log", "--oneline", "-#{count}"], stderr_to_stdout: true) do
{output, 0} -> output
_ -> "Could not read git log"
end
end
end
tree = Analyzer.file_tree(repo_dir)
lang_info = Analyzer.detect_language(tree)
key_files = Analyzer.read_key_files(repo_dir, tree)
source_samples = Analyzer.read_source_samples(repo_dir, tree)
commits = Analyzer.recent_commits(repo_dir)
IO.puts("Files: #{length(tree)}")
IO.puts("Languages: #{inspect(lang_info.config_files)}")
IO.puts("Top extensions: #{inspect(Enum.take(lang_info.extensions, 5))}")
# ── Build Context for LLM ──
tree_str =
tree
|> Enum.take(200)
|> Enum.join("\n")
key_files_str =
key_files
|> Enum.map(fn {path, content} -> "=== #{path} ===\n#{content}" end)
|> Enum.join("\n\n")
source_str =
source_samples
|> Enum.map(fn {path, lines, header} ->
"=== #{path} (#{lines} lines) ===\n#{header}"
end)
|> Enum.join("\n\n")
context = """
# Repository: #{repo_name}
## File Tree (first 200 entries)
#{tree_str}
## Key Config Files
#{key_files_str}
## Source Code Samples (first 60 lines each)
#{source_str}
## Recent Commits
#{commits}
## Language Detection
Config files: #{inspect(lang_info.config_files)}
Extension frequency: #{inspect(lang_info.extensions)}
"""
IO.puts(context)
IO.puts("Context size: #{String.length(context)} chars, ~#{div(String.length(context), 4)} tokens")
# ── Generate Architecture Document ──
alias Toolbox.Actions.ChatCompletion
system_prompt = """
You are a senior software architect. Analyze the repository and produce a clear
architecture document in Markdown. You MUST include:
1. **Overview** — one paragraph describing what this project does
2. **Tech Stack** — languages, frameworks, key dependencies
3. **Architecture** — high-level Mermaid diagram showing major components and their relationships
4. **Module/Package Structure** — table of key directories and their purpose
5. **Data Flow** — Mermaid sequence diagram showing a typical request/operation
6. **Key Design Decisions** — bullet points on notable patterns (e.g., supervision trees, actor model, plugin architecture)
7. **Entry Points** — how to run, test, deploy
Rules:
- Use ```mermaid fenced code blocks for all diagrams
- Keep diagrams readable (max ~15 nodes)
- Be factual — only describe what you can see in the code
- Write concisely, no filler
"""
IO.puts("Sending to #{provider}... (this may take a minute)")
{:ok, result} =
ChatCompletion.run(
%{
provider: provider,
messages: [
%{role: "system", content: system_prompt},
%{role: "user", content: context}
]
},
%{}
)
architecture_md = result.content
IO.puts("Generated #{String.length(architecture_md)} chars")
# Preview the output:
# Display as rendered markdown in Livebook
architecture_md
|> Kino.Markdown.new()
# If `Kino` isn't available, the raw text:
IO.puts(architecture_md)
# ── Save to File ──
output_path = Path.join(repo_dir, "architecture.md")
File.write!(output_path, architecture_md)
IO.puts("Saved to: #{output_path}")
# Optionally also save to the notebooks directory:
local_copy = Path.join(__DIR__, "#{repo_name |> String.replace("/", "_")}_architecture.md")
File.write!(local_copy, architecture_md)
IO.puts("Local copy: #{local_copy}")
# ── Bonus: Dependency Graph ──
# For Elixir projects, generate a dependency Mermaid diagram:
if "mix.exs" in lang_info.config_files do
mix_content = File.read!(Path.join(repo_dir, "mix.exs"))
{:ok, dep_diagram} =
ChatCompletion.run(
%{
provider: provider,
messages: [
%{role: "system", content: """
You are given an Elixir mix.exs file. Generate ONLY a Mermaid graph diagram
showing the project's dependencies. Group them by category
(web, database, testing, tools, etc). Output ONLY the mermaid code block, nothing else.
"""},
%{role: "user", content: mix_content}
]
},
%{}
)
IO.puts(dep_diagram.content)
else
IO.puts("Not an Elixir project — skipping dependency graph")
end
# ── Bonus: Compare Two Repos ──
# Analyze a second repo and compare architectures:
# Uncomment and set a second repo to compare:
# repo_url_2 = "https://github.com/someone/other-project"
#
# (clone and analyze repo_url_2 using the same steps above, then:)
#
# {:ok, comparison} =
# ChatCompletion.run(
# %{
# provider: provider,
# messages: [
# %{role: "system", content: "Compare these two project architectures. Highlight key differences in design decisions, tech stack, and structure. Use a comparison table."},
# %{role: "user", content: "## Repo A\n#{architecture_md}\n\n## Repo B\n#{architecture_md_2}"}
# ]
# },
# %{}
# )
#
# IO.puts(comparison.content)