GitHub Project Analyzer

Notebook dependencies and setup
Section

1 cell collapsed

Configuration

2 cells collapsed

Clone Repository

1 cell collapsed

Analyze Structure

1 cell collapsed

Build Context for LLM

1 cell collapsed

Generate Architecture Document

5 cells collapsed

Save to File

3 cells collapsed

Bonus: Dependency Graph

2 cells collapsed

Bonus: Compare Two Repos

2 cells collapsed

Export

Here you can preview and directly export the notebook source.
Live Markdown IEx session
github_analyzer.exs
# Run as: iex --dot-iex path/to/notebook.exs

# Title: GitHub Project Analyzer

# ── Section ──

# Generate `architecture.md` with Mermaid diagrams from any GitHub repository.

# ── Configuration ──

# Change the repo URL and provider below, then run all cells:

# --- CHANGE THESE ---
repo_url = "https://github.com/Opencode-DCP/opencode-dynamic-context-pruning"
provider = :zai  # or :zai, :openai
branch = "main"

# Working directory for cloned repos
work_dir = Path.join(System.tmp_dir!(), "skynet_analyzer")
File.mkdir_p!(work_dir)

repo_name =
  repo_url
  |> String.trim_trailing(".git")
  |> String.split("/")
  |> Enum.take(-2)
  |> Enum.join("/")

repo_dir = Path.join(work_dir, repo_name |> String.replace("/", "_"))

IO.puts("Repo: #{repo_name}")
IO.puts("Clone to: #{repo_dir}")

# ── Clone Repository ──

if File.exists?(Path.join(repo_dir, ".git")) do
  # Already cloned — pull latest
  {output, 0} = System.cmd("git", ["-C", repo_dir, "pull", "--ff-only"], stderr_to_stdout: true)
  IO.puts("Updated: #{output}")
else
  {output, code} =
    System.cmd("git", ["clone", "--depth", "50", "--branch", branch, repo_url, repo_dir],
      stderr_to_stdout: true
    )

  if code == 0 do
    IO.puts("Cloned successfully")
  else
    IO.puts("Clone failed: #{output}")
  end
end

defmodule Kake do
  require Logger
  def mindrebraak(), do: Logger.configure(level: :warning)
end
Kake.mindrebraak()

:ok

# ── Analyze Structure ──

defmodule Analyzer do
  @ignore ~w(.git node_modules _build deps .elixir_ls dist build __pycache__ .next .cache vendor target)

  def file_tree(dir, max_depth \\ 4) do
    do_tree(dir, dir, 0, max_depth)
    |> List.flatten()
    |> Enum.sort()
  end

  defp do_tree(base, path, depth, max_depth) when depth < max_depth do
    case File.ls(path) do
      {:ok, entries} ->
        entries
        |> Enum.reject(&(&1 in @ignore or String.starts_with?(&1, ".")))
        |> Enum.sort()
        |> Enum.flat_map(fn entry ->
          full = Path.join(path, entry)
          rel = Path.relative_to(full, base)

          if File.dir?(full) do
            children = do_tree(base, full, depth + 1, max_depth)
            [rel <> "/" | children]
          else
            [rel]
          end
        end)

      _ ->
        []
    end
  end

  defp do_tree(_, _, _, _), do: []

  def detect_language(tree) do
    extensions =
      tree
      |> Enum.reject(&String.ends_with?(&1, "/"))
      |> Enum.map(&Path.extname/1)
      |> Enum.frequencies()
      |> Enum.sort_by(fn {_, c} -> -c end)
      |> Enum.take(10)

    config_files =
      tree
      |> Enum.map(&Path.basename/1)
      |> Enum.filter(&(&1 in ~w(
        mix.exs Cargo.toml go.mod package.json pyproject.toml
        Gemfile pom.xml build.gradle CMakeLists.txt Makefile
        tsconfig.json deno.json bun.lockb
      )))
      |> Enum.uniq()

    %{extensions: extensions, config_files: config_files}
  end

  def read_key_files(dir, tree) do
    key_patterns = [
      "README.md", "readme.md", "README.rst",
      "mix.exs", "Cargo.toml", "go.mod", "package.json", "pyproject.toml",
      "docker-compose.yml", "docker-compose.yaml", "Dockerfile",
      "Makefile"
    ]

    tree
    |> Enum.filter(fn path ->
      basename = Path.basename(path)
      basename in key_patterns and not String.contains?(path, "/")
    end)
    |> Enum.take(6)
    |> Enum.map(fn path ->
      full = Path.join(dir, path)
      content = File.read!(full) |> String.slice(0..3000)
      {path, content}
    end)
  end

  def read_source_samples(dir, tree, count \\ 8) do
    source_exts = ~w(.ex .exs .rs .go .py .ts .tsx .js .jsx .rb .java .kt .c .h .cpp .zig)

    tree
    |> Enum.reject(&String.ends_with?(&1, "/"))
    |> Enum.filter(fn path ->
      ext = Path.extname(path)
      ext in source_exts and not String.contains?(path, "test")
    end)
    |> Enum.take(count)
    |> Enum.map(fn path ->
      full = Path.join(dir, path)
      content = File.read!(full)
      lines = content |> String.split("\n") |> length()
      header = content |> String.split("\n") |> Enum.take(60) |> Enum.join("\n")
      {path, lines, header}
    end)
  end

  def recent_commits(dir, count \\ 15) do
    case System.cmd("git", ["-C", dir, "log", "--oneline", "-#{count}"], stderr_to_stdout: true) do
      {output, 0} -> output
      _ -> "Could not read git log"
    end
  end
end

tree = Analyzer.file_tree(repo_dir)
lang_info = Analyzer.detect_language(tree)
key_files = Analyzer.read_key_files(repo_dir, tree)
source_samples = Analyzer.read_source_samples(repo_dir, tree)
commits = Analyzer.recent_commits(repo_dir)

IO.puts("Files: #{length(tree)}")
IO.puts("Languages: #{inspect(lang_info.config_files)}")
IO.puts("Top extensions: #{inspect(Enum.take(lang_info.extensions, 5))}")

# ── Build Context for LLM ──

tree_str =
  tree
  |> Enum.take(200)
  |> Enum.join("\n")

key_files_str =
  key_files
  |> Enum.map(fn {path, content} -> "=== #{path} ===\n#{content}" end)
  |> Enum.join("\n\n")

source_str =
  source_samples
  |> Enum.map(fn {path, lines, header} ->
    "=== #{path} (#{lines} lines) ===\n#{header}"
  end)
  |> Enum.join("\n\n")

context = """
# Repository: #{repo_name}

## File Tree (first 200 entries)
#{tree_str}

## Key Config Files
#{key_files_str}

## Source Code Samples (first 60 lines each)
#{source_str}

## Recent Commits
#{commits}

## Language Detection
Config files: #{inspect(lang_info.config_files)}
Extension frequency: #{inspect(lang_info.extensions)}
"""

IO.puts(context)

IO.puts("Context size: #{String.length(context)} chars, ~#{div(String.length(context), 4)} tokens")

# ── Generate Architecture Document ──

alias Toolbox.Actions.ChatCompletion

system_prompt = """
You are a senior software architect. Analyze the repository and produce a clear
architecture document in Markdown. You MUST include:

1. **Overview** — one paragraph describing what this project does
2. **Tech Stack** — languages, frameworks, key dependencies
3. **Architecture** — high-level Mermaid diagram showing major components and their relationships
4. **Module/Package Structure** — table of key directories and their purpose
5. **Data Flow** — Mermaid sequence diagram showing a typical request/operation
6. **Key Design Decisions** — bullet points on notable patterns (e.g., supervision trees, actor model, plugin architecture)
7. **Entry Points** — how to run, test, deploy

Rules:
- Use ```mermaid fenced code blocks for all diagrams
- Keep diagrams readable (max ~15 nodes)
- Be factual — only describe what you can see in the code
- Write concisely, no filler
"""

IO.puts("Sending to #{provider}... (this may take a minute)")

{:ok, result} =
  ChatCompletion.run(
    %{
      provider: provider,
      messages: [
        %{role: "system", content: system_prompt},
        %{role: "user", content: context}
      ]
    },
    %{}
  )

architecture_md = result.content
IO.puts("Generated #{String.length(architecture_md)} chars")

# Preview the output:

# Display as rendered markdown in Livebook
architecture_md
|> Kino.Markdown.new()

# If `Kino` isn't available, the raw text:

IO.puts(architecture_md)

# ── Save to File ──

output_path = Path.join(repo_dir, "architecture.md")
File.write!(output_path, architecture_md)
IO.puts("Saved to: #{output_path}")

# Optionally also save to the notebooks directory:

local_copy = Path.join(__DIR__, "#{repo_name |> String.replace("/", "_")}_architecture.md")
File.write!(local_copy, architecture_md)
IO.puts("Local copy: #{local_copy}")

# ── Bonus: Dependency Graph ──

# For Elixir projects, generate a dependency Mermaid diagram:

if "mix.exs" in lang_info.config_files do
  mix_content = File.read!(Path.join(repo_dir, "mix.exs"))

  {:ok, dep_diagram} =
    ChatCompletion.run(
      %{
        provider: provider,
        messages: [
          %{role: "system", content: """
          You are given an Elixir mix.exs file. Generate ONLY a Mermaid graph diagram
          showing the project's dependencies. Group them by category
          (web, database, testing, tools, etc). Output ONLY the mermaid code block, nothing else.
          """},
          %{role: "user", content: mix_content}
        ]
      },
      %{}
    )

  IO.puts(dep_diagram.content)
else
  IO.puts("Not an Elixir project — skipping dependency graph")
end

# ── Bonus: Compare Two Repos ──

# Analyze a second repo and compare architectures:

# Uncomment and set a second repo to compare:
# repo_url_2 = "https://github.com/someone/other-project"
#
# (clone and analyze repo_url_2 using the same steps above, then:)
#
# {:ok, comparison} =
#   ChatCompletion.run(
#     %{
#       provider: provider,
#       messages: [
#         %{role: "system", content: "Compare these two project architectures. Highlight key differences in design decisions, tech stack, and structure. Use a comparison table."},
#         %{role: "user", content: "## Repo A\n#{architecture_md}\n\n## Repo B\n#{architecture_md_2}"}
#       ]
#     },
#     %{}
#   )
#
# IO.puts(comparison.content)