You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

242 lines
7.3 KiB

#!/usr/bin/perl -w
# This script can be used on a mirror (or e.g. merkel.debian.org) to
# produce an overview of the size of the archive. The numbers reflect
# the "raw" size of the archive: the size of packages and source files.
# It does not include the size of files containing meta data, nor of
# various separate directories.
# Copyright (c) 2009 Frans Pop <fjp@debian.org>
use strict;
use Cwd;
use Getopt::Long;
use File::Temp qw/ tempfile /;
use Compress::Zlib;
our @arches= qw(source all i386 amd64 alpha arm armel hppa ia64 m68k mips mipsel powerpc s390 sparc kfreebsd-i386 kfreebsd-amd64);
our @sects= qw(main contrib non-free main/debian-installer);
our @suites= qw(oldstable stable testing unstable);
our %dists;
our (@source_files, @package_files);
our (%files, %sizes, %total, %width);
our $root="/srv/ftp.debian.org/ftp/dists";
chdir($root) or die "chdir $root: $!";
my ($tfh, $tfile) = tempfile();
END { unlink $tfile if $tfile }
### Collect the data
foreach my $suite (@suites) {
next unless -f "$root/$suite/Release";
if (open RELEASE, "<$root/$suite/Release") {
while (<RELEASE>) {
if (/^Codename:/) {
($dists{$suite}) = m/^Codename:\s+(.*)/i;
last;
}
}
close RELEASE;
}
foreach my $sect (@sects) {
next unless -d "$root/$suite/$sect";
print(STDERR "Processing $suite $sect\n");
foreach my $arch (@arches) {
my $file;
if ($arch eq "source") {
$file = "source/Sources.gz";
next unless -f "$root/$suite/$sect/$file";
parse_sources($file, $suite, $sect);
} else {
$file = "binary-$arch/Packages.gz";
next unless -f "$root/$suite/$sect/$file";
parse_packages($file, $suite, $sect, $arch);
}
}
}
}
### Print the tables
foreach my $suite (@suites) {
next unless exists $dists{$suite};
$width{$suite} = exists $sizes{"d$suite"} ? 16 : 6;
}
print("Total archive size (binary + source) per section:\n\n");
printf("%10s ", "(in MiB)");
foreach my $suite (@suites) {
next unless exists $dists{$suite};
printf("| %$width{$suite}s ", $dists{$suite});
}
printf("| %6s\n", "all");
print_ruler();
foreach my $sect (@sects) {
next unless exists $sizes{all}{$sect};
if ($sect eq "main/debian-installer") {
printf("%-10s", "main/d-i");
} else {
printf("%-10s", $sect);
}
foreach my $suite (@suites) {
next unless exists $dists{$suite};
if (exists $sizes{$suite}{$sect}) {
$total{$suite} += $sizes{$suite}{$sect};
printf(" | %6i", int((1 + $sizes{$suite}{$sect}) /1024/1024));
if (exists $sizes{"d$suite"}{$sect}) {
$total{"d$suite"} += $sizes{"d$suite"}{$sect};
printf(" (+%6i)", int((1 + $sizes{"d$suite"}{$sect}) /1024/1024));
}
} else {
print(" | " . (" " x $width{$suite}));
}
}
printf(" | %6i\n", int((1 + $sizes{all}{$sect}) /1024/1024));
$total{all} += $sizes{all}{$sect};
}
print_ruler();
printf("%-9s ", "total");
foreach my $suite (@suites) {
next unless exists $dists{$suite};
printf(" | %6i", int((1 + $total{$suite}) /1024/1024));
printf(" (+%6i)", int((1 + $total{"d$suite"}) /1024/1024)) if exists $total{"d$suite"};
}
printf(" | %6i", int((1 + $total{all}) /1024/1024)."\n");
print("\n\n");
print("Archive size per architecture (source and arch=all packages are shown separately):\n\n");
printf("%10s ", "(in MiB)");
foreach my $suite (@suites) {
next unless exists $dists{$suite};
printf("| %$width{$suite}s ", $dists{$suite});
}
printf("| %6s\n", "all");
print_ruler();
foreach my $arch (@arches) {
next unless exists $sizes{all}{$arch};
my $parch = $arch;
$parch =~ s/kfree/k/;
printf("%-10s", $parch);
foreach my $suite (@suites) {
next unless exists $dists{$suite};
if (exists $sizes{$suite}{$arch}) {
printf(" | %6i", int((1 + $sizes{$suite}{$arch}) /1024/1024));
printf(" (+%6i)", int((1 + $sizes{"d$suite"}{$arch}) /1024/1024)) if exists $sizes{"d$suite"}{$arch};
} else {
printf(" | " . (" " x $width{$suite}));
}
}
printf(" | %6i\n", int((1 + $sizes{all}{$arch}) /1024/1024));
}
my @ts=gmtime(time());
printf("\nAll numbers reflect the state of the archive per %i %s %i.\n", $ts[3],
(qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec))[$ts[4]], $ts[5] + 1900);
### Functions
sub print_ruler {
print("-" x 11);
foreach my $suite (@suites) {
next unless exists $dists{$suite};
print("|" . "-" x ($width{$suite} + 2));
}
print("|" . "-" x 7 . "\n");
}
sub parse_packages {
local $/ = "\n\n";
my ($file, $suite, $sect, $arch) = @_;
my ($line, $res, $size, $filename, $architecture);
system_redirect_io("gzip -d", "$root/$suite/$sect/$file", "$tfile");
open(TFILE, "<", $tfile) or die "$tfile: $!";
for (;;) {
my $buf;
unless (defined( $buf = <TFILE> )) {
last if eof;
die "$file: $!" if $!;
}
$_ = $buf;
($filename) = m/^Filename:\s+(.*)/im;
$filename =~ s:/+:/:; # remove redundant slashes in paths
($architecture) = m/^Architecture:\s+(.*)/im;
($size) = m/^Size:\s+(\d+)/im;
if (! exists $files{$filename}{$suite}) {
$sizes{$suite}{$sect} += $size;
$sizes{$suite}{$architecture} += $size;
if (($suite eq "stable" && exists $dists{oldstable} &&
! exists $files{$filename}{oldstable}) ||
($suite eq "testing" && exists $dists{stable} &&
! exists $files{$filename}{stable}) ||
($suite eq "unstable" && exists $dists{testing} &&
! exists $files{$filename}{testing})) {
$sizes{"d$suite"}{$sect} += $size;
$sizes{"d$suite"}{$architecture} += $size;
}
}
if (! exists $files{$filename}{x}) {
$sizes{all}{$sect} += $size;
$sizes{all}{$architecture} += $size;
}
$files{$filename}{x} = 1;
$files{$filename}{$suite} = 1;
}
close(TFILE);
}
sub parse_sources {
local $/ = "\n\n";
my ($file, $suite, $sect) = @_;
my ($line, $res, $size, $directory, $filename, $md5sum);
system_redirect_io("gzip -d", "$root/$suite/$sect/$file", "$tfile");
open(TFILE, "<", $tfile) or die "$tfile: $!";
for (;;) {
my $buf;
unless (defined( $buf = <TFILE> )) {
last if eof;
die "$file: $!" if $!;
}
$_ = $buf;
($directory) = m/^Directory:\s+(.*)/im;
while (m/^ ([A-Za-z0-9]{32} .*)/mg) {
($md5sum, $size, $filename)=split(' ', $1, 3);
$filename = "$directory/$filename";
$filename =~ s:/+:/:; # remove redundant slashes in paths
if (! exists $files{$filename}{$suite}) {
$sizes{$suite}{$sect} += $size;
$sizes{$suite}{source} += $size;
if (($suite eq "stable" && exists $dists{oldstable} &&
! exists $files{$filename}{oldstable}) ||
($suite eq "testing" && exists $dists{stable} &&
! exists $files{$filename}{stable}) ||
($suite eq "unstable" && exists $dists{testing} &&
! exists $files{$filename}{testing})) {
$sizes{"d$suite"}{$sect} += $size;
$sizes{"d$suite"}{source} += $size;
}
}
if (! exists $files{$filename}{x}) {
$sizes{all}{$sect} += $size;
$sizes{all}{source} += $size;
}
$files{$filename}{x} = 1;
$files{$filename}{$suite} = 1;
}
}
close(TFILE);
}
# run system() with stdin and stdout redirected to files
# unlinks stdout target file first to break hard links
sub system_redirect_io {
my ($command, $fromfile, $tofile) = @_;
if (-f $tofile) {
unlink($tofile) or die "unlink($tofile) failed: $!";
}
system("$command <$fromfile >$tofile");
}