Skip to content

Commit d9da39e

Browse files
author
Scott Arbeit
committed
Added --include-unreachable flag and related processing and output.
1 parent 5b50428 commit d9da39e

File tree

4 files changed

+134
-5
lines changed

4 files changed

+134
-5
lines changed

git-sizer.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313

1414
"github.com/spf13/pflag"
1515

16+
"github.com/github/git-sizer/counts"
1617
"github.com/github/git-sizer/git"
1718
"github.com/github/git-sizer/internal/refopts"
1819
"github.com/github/git-sizer/isatty"
@@ -46,6 +47,7 @@ const usage = `usage: git-sizer [OPTS] [ROOT...]
4647
gitconfig: 'sizer.jsonVersion'.
4748
--[no-]progress report (don't report) progress to stderr. Can
4849
be set via gitconfig: 'sizer.progress'.
50+
--include-unreachable include unreachable objects
4951
--version only report the git-sizer version number
5052
5153
Object selection:
@@ -131,6 +133,7 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st
131133
var progress bool
132134
var version bool
133135
var showRefs bool
136+
var includeUnreachable bool
134137

135138
// Try to open the repository, but it's not an error yet if this
136139
// fails, because the user might only be asking for `--help`.
@@ -207,6 +210,7 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st
207210
rgb.AddRefopts(flags)
208211

209212
flags.BoolVar(&showRefs, "show-refs", false, "list the references being processed")
213+
flags.BoolVar(&includeUnreachable, "include-unreachable", false, "include unreachable objects")
210214

211215
flags.SortFlags = false
212216

@@ -344,6 +348,16 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st
344348

345349
historySize.GitDirSize = gitDirSize
346350

351+
// Get unreachable object stats and add to output if requested
352+
if includeUnreachable {
353+
historySize.ShowUnreachable = true
354+
unreachableStats, err := repo.GetUnreachableStats()
355+
if err == nil {
356+
historySize.UnreachableObjectCount = counts.Count32(unreachableStats.Count)
357+
historySize.UnreachableObjectSize = counts.Count64(unreachableStats.Size)
358+
}
359+
}
360+
347361
if jsonOutput {
348362
var j []byte
349363
var err error

git/git.go

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
package git
22

33
import (
4+
"bufio"
45
"bytes"
56
"errors"
67
"fmt"
8+
"io"
79
"io/fs"
810
"os"
911
"os/exec"
1012
"path/filepath"
13+
"strings"
1114
)
1215

1316
// ObjectType represents the type of a Git object ("blob", "tree",
@@ -157,7 +160,7 @@ func (repo *Repository) GitDir() (string, error) {
157160
return repo.gitDir, nil
158161
}
159162

160-
// GitPath returns that path of a file within the git repository, by
163+
// GitPath returns the path of a file within the git repository, by
161164
// calling `git rev-parse --git-path $relPath`. The returned path is
162165
// relative to the current directory.
163166
func (repo *Repository) GitPath(relPath string) (string, error) {
@@ -173,3 +176,95 @@ func (repo *Repository) GitPath(relPath string) (string, error) {
173176
// current directory, we can use it as-is:
174177
return string(bytes.TrimSpace(out)), nil
175178
}
179+
180+
// UnreachableStats holds the count and size of unreachable objects.
181+
type UnreachableStats struct {
182+
Count int64
183+
Size int64
184+
}
185+
186+
// GetUnreachableStats runs 'git fsck --unreachable --no-reflogs --full'
187+
// and returns the count and total size of unreachable objects.
188+
// This implementation collects all OIDs from fsck output and then uses
189+
// batch mode to efficiently retrieve their sizes.
190+
func (repo *Repository) GetUnreachableStats() (UnreachableStats, error) {
191+
// Run git fsck. Using CombinedOutput captures both stdout and stderr.
192+
cmd := exec.Command(repo.gitBin, "-C", repo.gitDir, "fsck", "--unreachable", "--no-reflogs", "--full")
193+
cmd.Env = os.Environ()
194+
output, err := cmd.CombinedOutput()
195+
if err != nil {
196+
fmt.Fprintln(os.Stderr)
197+
fmt.Fprintln(os.Stderr, "An error occurred trying to process unreachable objects.")
198+
os.Stderr.Write(output)
199+
fmt.Fprintln(os.Stderr)
200+
return UnreachableStats{Count: 0, Size: 0}, err
201+
}
202+
203+
var oids []string
204+
count := int64(0)
205+
for _, line := range bytes.Split(output, []byte{'\n'}) {
206+
fields := bytes.Fields(line)
207+
// Expected line format: "unreachable <type> <oid> ..."
208+
if len(fields) >= 3 && string(fields[0]) == "unreachable" {
209+
count++
210+
oid := string(fields[2])
211+
oids = append(oids, oid)
212+
}
213+
}
214+
215+
// Retrieve the total size using batch mode.
216+
totalSize, err := repo.getTotalSizeFromOids(oids)
217+
if err != nil {
218+
return UnreachableStats{}, fmt.Errorf("failed to get sizes via batch mode: %w", err)
219+
}
220+
221+
return UnreachableStats{Count: count, Size: totalSize}, nil
222+
}
223+
224+
// getTotalSizeFromOids uses 'git cat-file --batch-check' to retrieve sizes for
225+
// the provided OIDs. It writes each OID to stdin and reads back lines in the
226+
// format: "<oid> <type> <size>".
227+
func (repo *Repository) getTotalSizeFromOids(oids []string) (int64, error) {
228+
cmd := exec.Command(repo.gitBin, "-C", repo.gitDir, "cat-file", "--batch-check")
229+
stdinPipe, err := cmd.StdinPipe()
230+
if err != nil {
231+
return 0, fmt.Errorf("failed to get stdin pipe: %w", err)
232+
}
233+
stdoutPipe, err := cmd.StdoutPipe()
234+
if err != nil {
235+
return 0, fmt.Errorf("failed to get stdout pipe: %w", err)
236+
}
237+
238+
if err := cmd.Start(); err != nil {
239+
return 0, fmt.Errorf("failed to start git cat-file batch: %w", err)
240+
}
241+
242+
// Write all OIDs to the batch process.
243+
go func() {
244+
defer stdinPipe.Close()
245+
for _, oid := range oids {
246+
io.WriteString(stdinPipe, oid+"\n")
247+
}
248+
}()
249+
250+
var totalSize int64
251+
scanner := bufio.NewScanner(stdoutPipe)
252+
// Each line is expected to be: "<oid> <type> <size>"
253+
for scanner.Scan() {
254+
parts := strings.Fields(scanner.Text())
255+
if len(parts) == 3 {
256+
var size int64
257+
fmt.Sscanf(parts[2], "%d", &size)
258+
totalSize += size
259+
} else {
260+
return 0, fmt.Errorf("unexpected output format: %s", scanner.Text())
261+
}
262+
}
263+
if err := scanner.Err(); err != nil {
264+
return 0, fmt.Errorf("error reading git cat-file output: %w", err)
265+
}
266+
if err := cmd.Wait(); err != nil {
267+
return 0, fmt.Errorf("git cat-file batch process error: %w", err)
268+
}
269+
return totalSize, nil
270+
}

sizes/output.go

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -489,8 +489,7 @@ func (s *HistorySize) contents(refGroups []RefGroup) tableContents {
489489
rgis = append(rgis, rgi.Indented(indent))
490490
}
491491

492-
return S(
493-
"",
492+
sections := []tableContents{
494493
S(
495494
"Repository statistics",
496495
S(
@@ -532,7 +531,6 @@ func (s *HistorySize) contents(refGroups []RefGroup) tableContents {
532531
"The actual on-disk size of the .git directory",
533532
nil, s.GitDirSize, binary, "B", 1e9),
534533
),
535-
536534
S(
537535
"Annotated tags",
538536
I("uniqueTagCount", "Count",
@@ -610,5 +608,19 @@ func (s *HistorySize) contents(refGroups []RefGroup) tableContents {
610608
"The maximum number of submodules in any checkout",
611609
s.MaxExpandedSubmoduleCountTree, s.MaxExpandedSubmoduleCount, metric, "", 100),
612610
),
613-
)
611+
}
612+
613+
if s.ShowUnreachable {
614+
sections = append(sections, S(
615+
"Unreachable objects",
616+
I("unreachableObjectCount", "Count",
617+
"The total number of unreachable objects in the repository",
618+
nil, s.UnreachableObjectCount, metric, "", 1e7),
619+
I("unreachableObjectSize", "Uncompressed total size",
620+
"The total size of unreachable objects in the repository",
621+
nil, s.UnreachableObjectSize, binary, "B", 1e9),
622+
))
623+
}
624+
625+
return S("", sections...)
614626
}

sizes/sizes.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,14 @@ type HistorySize struct {
213213

214214
// The actual size of the .git directory on disk.
215215
GitDirSize counts.Count64 `json:"git_dir_size"`
216+
217+
// The total number of unreachable objects in the repository.
218+
UnreachableObjectCount counts.Count64 `json:"unreachable_object_count"`
219+
220+
// The total size of unreachable objects in the repository.
221+
UnreachableObjectSize counts.Count64 `json:"unreachable_object_size"`
222+
223+
ShowUnreachable bool `json:"-"`
216224
}
217225

218226
// Convenience function: forget `*path` if it is non-nil and overwrite

0 commit comments

Comments
 (0)