#!/usr/bin/perl
#
# Copyright © 2012 Niels Thykier
# Copyright © 2020 Felix Lechner
#
# Based on coll/index which is: Copyright © 1998 Christian Schwarz
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.

package Lintian::coll::src_orig_index;

no lib '.';

use strict;
use warnings;
use autodie;

use BerkeleyDB;
use Cwd();
use IO::Async::Loop;
use IO::Async::Process;
use MLDBM qw(BerkeleyDB::Btree Storable);
use Path::Tiny;

use lib "$ENV{'LINTIAN_ROOT'}/lib";

use Lintian::Collect::Dispatcher qw(create_info);
use Lintian::Processable::Source;
use Lintian::Util qw(internal_error);

use constant EMPTY => q{};
use constant SPACE => q{ };
use constant COLON => q{:};
use constant SLASH => q{/};
use constant NEWLINE => qq{\n};

sub collect {
    my ($pkg, $type, $dir) = @_;

    my $info = create_info($pkg, $type, $dir);

    my $dbpath = "$dir/src-orig-index.db";
    unlink($dbpath)
      if -f $dbpath;

    # do nothing for native packages
    return
      if $info->native;

    my $dsclink = "$dir/dsc";
    my $dscpath = Cwd::realpath($dsclink);
    die "Cannot resolve 'dsc' link for $pkg: $dsclink"
      unless $dscpath;
    die "The 'dsc' link for $pkg does not point to a file: $dscpath"
      unless -e $dscpath;

    # determine source and version; handles missing fields
    my $proc = Lintian::Processable::Source->new;
    $proc->init($dscpath);

    #  Version handling is based on Dpkg::Version::parseversion.
    my $version = $proc->source_version;
    if ($version =~ /:/) {
        $version =~ s/^(?:\d+):(.+)/$1/
          or die "Bad version number '$version'";
    }

    my $baserev = $proc->source . '_' . $version;

    # strip debian revision
    $version =~ s/(.+)-(?:.*)$/$1/;
    my $base = $proc->source . '_' . $version;

    my @files = split(/\n/, $info->field('files') // EMPTY);

    my %components;
    for my $line (@files) {

        # strip leading whitespace
        $line =~ s/^\s*//;

        next
          unless length $line;

        # get file name
        my (undef, undef, $name) = split(/\s+/, $line);

        next
          unless length $name;

        # skip if files in subdirs
        next
          if $name =~ m{/};

        # Look for $pkg_$version.orig(-$comp)?.tar.$ext (non-native)
        #       or $pkg_$version.tar.$ext (native)
        #  - This deliberately does not look for the debian packaging
        #    even when this would be a tarball.
        if ($name
            =~ /^(?:\Q$base\E\.orig(?:-(.*))?|\Q$baserev\E)\.tar\.(?:gz|bz2|lzma|xz)$/
        ) {
            $components{$name} = $1 // EMPTY;
        }
    }

    die "Could not find any source components for $pkg"
      unless %components;

    my %all;
    for my $tarball (sort keys %components) {

        my $component = $components{$tarball};

        my @tar_options= (
            '--list', '--verbose',
            '--utc', '--full-time',
            '--quoting-style=c','--file'
        );

        # may not be needed; modern tar recognizes lzma and xz
        if ($tarball =~ /\.(lzma|xz)\z/) {
            unshift @tar_options, "--$1";
        }

        my @tar = ('tar', @tar_options, "$dir/$tarball");

        my $loop = IO::Async::Loop->new;
        my $future = $loop->new_future;
        my $stdout;
        my $stderr;

        my $process = IO::Async::Process->new(
            command => [@tar],
            stdout => { into => \$stdout },
            stderr => { into => \$stderr },
            on_finish => sub {
                my ($self, $exitcode) = @_;
                my $status = ($exitcode >> 8);

                path("$dir/orig-index-errors")->append($stderr // EMPTY);

                if ($status) {
                    my $message
                      = "Non-zero status $status from dpkg-deb for control";
                    $message .= COLON . NEWLINE . $stderr
                      if length $stderr;
                    $future->fail($message);
                    return;
                }

                $future->done('Done with dpkg-deb');
                return;
            });

        $loop->add($process);

        $future->get;

        my @lines = split(/\n/, $stdout);

        my %single;
        for my $line (@lines) {

            my $entry = Lintian::File::Path->new;
            $entry->init_from_tar_output($line);

            $single{$entry->name} = $entry;
        }

        # remove base directory from output
        delete $single{''}
          if exists $single{''};

        my $unwanted = EMPTY;
        for my $name (keys %single) {

            my ($candidate) = ($name =~ m,^([^/]+),);

            unless (length $candidate) {
                $unwanted = EMPTY;
                next;
            }

            if ($candidate eq $name
                && !$single{$name}->perm =~ m/^d/) {
                $unwanted = EMPTY;
                next;
            }

            unless (length $unwanted) {
                $unwanted = $candidate;
                next;
            }

            next
              if $candidate eq $unwanted;

            $unwanted = EMPTY;
        }

        # If there is a common prefix and it is $component, then we use that
        # because that is where they will be extracted by unpacked.
        unless ($unwanted eq $component) {

            my %copy;
            for my $name (keys %single) {

                my $adjusted = $name;

                # strip common prefix
                $adjusted =~ s{^\Q$unwanted\E/+}{}
                  if length $unwanted;

                # add component name
                $adjusted = $component . SLASH . $adjusted
                  if length $component;

                # change name of entry
                $single{$name}->name($adjusted);

                # store entry under new name
                $copy{$adjusted} = $single{$name};
            }

            %single = %copy;
        }

        $all{$_} = $single{$_} for keys %single;
    }

    # treat hard links like regular files
    for my $name (keys %all) {
        my $perm = $all{$name}->perm;
        $perm =~ s/^h/-/;
        $all{$name}->perm($perm);
    }

    tie my %h, 'MLDBM',
      -Filename => $dbpath,
      -Flags    => DB_CREATE
      or die "Cannot open file $dbpath: $! $BerkeleyDB::Error\n";

    $h{$_} = $all{$_} for keys %all;

    untie %h;

    return;
}

collect(@ARGV) unless caller;

1;

# Local Variables:
# indent-tabs-mode: nil
# cperl-indent-level: 4
# End:
# vim: syntax=perl sw=4 sts=4 sr et
