#!/usr/bin/python3

# Copyright (c) 2015 The CoreOS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import fnmatch
import os
import stat
import sys

import portage
from portage import dep
from portage import output
from portage.dep.soname.SonameAtom import SonameAtom
from portage.dep.soname.parse import parse_soname_deps

VARDB = portage.db[portage.root]["vartree"].dbapi

# TODO(marneam): possibly accept globs for arch and sonames
IGNORE_MISSING = {
    # /usr/lib/go/src/debug/elf/testdata/gcc-386-freebsd-exec
    # /usr/lib/go/src/debug/elf/testdata/gcc-amd64-linux-exec
    "dev-lang/go":              [SonameAtom("x86_32", "libc.so.6"),
                                 SonameAtom("x86_64", "libc.so.6")],
    "dev-lang/go-bootstrap":    [SonameAtom("x86_32", "libc.so.6"),
                                 SonameAtom("x86_64", "libc.so.6")],

    # RPATHs and symlinks apparently confuse the perl-5.24 package
    "dev-lang/perl":            [SonameAtom("arm_64", "libperl.so.5.26.2"),
                                 SonameAtom("x86_64", "libperl.so.5.26.2")],
    "dev-perl/XML-Parser":      [SonameAtom("x86_64", "libc.so.6"),
                                 SonameAtom("x86_64", "libexpat.so.1")],
    "dev-perl/libintl-perl":    [SonameAtom("x86_64", "libc.so.6")],
    "dev-util/boost-build":     [SonameAtom("x86_64", "libc.so.6")],
    "net-dns/dnsmasq":          [SonameAtom("x86_64", "libc.so.6")],
    "sys-apps/texinfo":         [SonameAtom("x86_64", "libc.so.6"),
                                 SonameAtom("x86_64", "libperl.so.5.26")],

    # https://bugs.gentoo.org/show_bug.cgi?id=554582
    "net-firewall/ebtables":    [SonameAtom("arm_64", "libebt_802_3.so"),
                                 SonameAtom("arm_64", "libebt_among.so"),
                                 SonameAtom("arm_64", "libebt_arp.so"),
                                 SonameAtom("arm_64", "libebt_arpreply.so"),
                                 SonameAtom("arm_64", "libebt_ip.so"),
                                 SonameAtom("arm_64", "libebt_ip6.so"),
                                 SonameAtom("arm_64", "libebt_limit.so"),
                                 SonameAtom("arm_64", "libebt_log.so"),
                                 SonameAtom("arm_64", "libebt_mark.so"),
                                 SonameAtom("arm_64", "libebt_mark_m.so"),
                                 SonameAtom("arm_64", "libebt_nat.so"),
                                 SonameAtom("arm_64", "libebt_nflog.so"),
                                 SonameAtom("arm_64", "libebt_pkttype.so"),
                                 SonameAtom("arm_64", "libebt_redirect.so"),
                                 SonameAtom("arm_64", "libebt_standard.so"),
                                 SonameAtom("arm_64", "libebt_stp.so"),
                                 SonameAtom("arm_64", "libebt_ulog.so"),
                                 SonameAtom("arm_64", "libebt_vlan.so"),
                                 SonameAtom("arm_64", "libebtable_broute.so"),
                                 SonameAtom("arm_64", "libebtable_filter.so"),
                                 SonameAtom("arm_64", "libebtable_nat.so"),
                                 SonameAtom("x86_64", "libebt_802_3.so"),
                                 SonameAtom("x86_64", "libebt_among.so"),
                                 SonameAtom("x86_64", "libebt_arp.so"),
                                 SonameAtom("x86_64", "libebt_arpreply.so"),
                                 SonameAtom("x86_64", "libebt_ip.so"),
                                 SonameAtom("x86_64", "libebt_ip6.so"),
                                 SonameAtom("x86_64", "libebt_limit.so"),
                                 SonameAtom("x86_64", "libebt_log.so"),
                                 SonameAtom("x86_64", "libebt_mark.so"),
                                 SonameAtom("x86_64", "libebt_mark_m.so"),
                                 SonameAtom("x86_64", "libebt_nat.so"),
                                 SonameAtom("x86_64", "libebt_nflog.so"),
                                 SonameAtom("x86_64", "libebt_pkttype.so"),
                                 SonameAtom("x86_64", "libebt_redirect.so"),
                                 SonameAtom("x86_64", "libebt_standard.so"),
                                 SonameAtom("x86_64", "libebt_stp.so"),
                                 SonameAtom("x86_64", "libebt_ulog.so"),
                                 SonameAtom("x86_64", "libebt_vlan.so"),
                                 SonameAtom("x86_64", "libebtable_broute.so"),
                                 SonameAtom("x86_64", "libebtable_filter.so"),
                                 SonameAtom("x86_64", "libebtable_nat.so")],

    # Ignore the Rust libraries in their own libdir.
    "dev-libs/rustlib":         [SonameAtom("arm_64", "librustc_data_structures.so"),
                                 SonameAtom("arm_64", "librustc_errors.so"),
                                 SonameAtom("arm_64", "libserialize.so"),
                                 SonameAtom("arm_64", "libstd.so"),
                                 SonameAtom("arm_64", "libsyntax.so"),
                                 SonameAtom("arm_64", "libsyntax_pos.so"),
                                 SonameAtom("arm_64", "libterm.so"),
                                 SonameAtom("x86_64", "librustc_data_structures.so"),
                                 SonameAtom("x86_64", "librustc_errors.so"),
                                 SonameAtom("x86_64", "libserialize.so"),
                                 SonameAtom("x86_64", "libstd.so"),
                                 SonameAtom("x86_64", "libsyntax.so"),
                                 SonameAtom("x86_64", "libsyntax_pos.so"),
                                 SonameAtom("x86_64", "libterm.so")],

    "sys-kernel/coreos-modules": [SonameAtom("x86_64", "libc.so.6"),
                                  SonameAtom("x86_64", "libcrypto.so.1.0.0")],
}

USR_LINKS = ("/bin/", "/sbin/", "/lib/", "/lib32/", "/lib64/")

IGNORE_SHEBANG = (
    "*/python[0-9].[0-9]/cgi.py",
    "*/usr/lib64/modules/*/source/scripts/*",
    "*/usr/share/nova-agent/*/etc/gentoo/nova-agent",
    "*/tmp/*",
    "*/Documentation/*",
    "*/doc/*",
)

IGNORE_SYMLINK = (
    # symlinks to sdk chroot
    b"/build/*",
    b"/var/tmp/portage/*",
    b"/etc/portage/*",

    # symlinks to /run
    b"/usr/share/baselayout/motd",
    b"/etc/issue",
    b"/etc/motd",

    # Other
    b"/etc/lsb-release",  # set later in the build process
    b"/usr/share/coreos", # set later in the build process
    b"/etc/coreos",       # set later in the build process
    b"/usr/src/linux-*/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S", # broken symlink in Kernel source tree
    b"/usr/lib*/python3*/site-packages/*egg-info", # broken symlink from dev-python/certifi that is not filtered by INSTALL_MASK
)


def provided_sonames():
    for cpv in VARDB.cpv_all():
        raw = VARDB.aux_get(cpv, ["PROVIDES"])[0]
        for atom in parse_soname_deps(raw):
            yield atom

    # soname.provided in PORTAGE_CONFIGROOT
    for atom in VARDB.settings.soname_provided:
        yield atom


def ignore_sonames(cpv):
    for key in dep.match_to_list(cpv, IGNORE_MISSING.keys()):
        for atom in IGNORE_MISSING[key]:
            yield atom


def missing_sonames():
    provided = frozenset(provided_sonames())
    for cpv in VARDB.cpv_all():
        raw = VARDB.aux_get(cpv, ["REQUIRES"])[0]
        requires = frozenset(parse_soname_deps(raw))
        ignore = frozenset(ignore_sonames(cpv))
        missing = requires - provided - ignore
        if missing:
            yield (cpv, missing)


def usr_conflicts():
    for cpv in VARDB.cpv_all():
        raw = VARDB.aux_get(cpv, ["CONTENTS"])[0]
        usr = set()
        root = set()

        # format is:
        #  obj /path goo 123
        #  dir /path/foo
        #  sym /this -> that 123
        # and so on
        for line in raw.split("\n"):
            if line[:4] != "obj " and line[:4] != "sym ":
                continue

            # yeah, hard to read, trying to make it fast...
            i = line.find("/", 5)
            topdir = line[4:i+1]
            if topdir == "/usr/":
                j = line.find("/", 9)
                nextdir = line[8:j+1]
                if nextdir in USR_LINKS:
                    end = line.find(" ", 8)
                    usr.add(line[8:end])
            elif topdir in USR_LINKS:
                end = line.find(" ", 4)
                root.add(line[4:end])

        conflicts = frozenset(root).intersection(usr)
        if conflicts:
            yield (cpv, conflicts)


def check_libs():
    ok = True
    for cpv, sonames in missing_sonames():
        error("%s is missing libraries:", cpv)
        for soname in sonames:
            error("\t%s", soname)
        ok = False
    return ok


def check_usr():
    ok = True
    for cpv, conflicts in usr_conflicts():
        error("%s has paths that conflict with /usr", cpv)
        for path in conflicts:
            error("\t%s", path)
        ok = False
    return ok


def is_exe(path):
    # just check other, assuming root or group only commands are not scripts.
    perms = stat.S_IROTH | stat.S_IXOTH
    mode = os.lstat(path).st_mode
    return stat.S_ISREG(mode) and (mode & perms) == perms


def check_shebang():
    ok = True
    cache = {}
    root = os.environ.get("ROOT", b"/")
    for parent, _, files in os.walk(root):
        for path in [os.path.join(parent, f) for f in files]:
            if any(fnmatch.fnmatchcase(path, i) for i in IGNORE_SHEBANG):
                continue
            if not is_exe(path):
                continue
            with open(path, "rb") as fd:
                line = fd.readline(80)
                if not line.startswith(b"#!"):
                    continue
                line = line.decode('utf-8')
                args = line[2:].rstrip().split(None, 2)
                cmd = args.pop(0)
                if cmd in ('/usr/bin/env', '/bin/env') and args:
                    prog = args.pop(0)
                    if prog.startswith('-') and args:
                        prog = args.pop(0)
                    cmd = '(env)/%s' % prog
                if cmd not in cache:
                    if cmd.startswith('(env)'):
                        cache[cmd] = False
                        for bindir in (root+'/usr/bin', root+'/usr/sbin'):
                            if os.path.exists(os.path.join(bindir, prog)):
                                cache[cmd] = True
                                break
                    else:
                        cache[cmd] = os.path.exists(root+cmd)
                if not cache[cmd]:
                    relpath = path[len(root):]
                    error("%s: %s does not exist", relpath, cmd)
                    ok = False
    return ok


class chrooted():
    """
    chrooted provides a context so that it can be used via with.
    For example:

    with chrooted("/some/rootfs"):
       do_operations_in_rootfs()
    do_operations_not_in_rootfs()
    """

    def __init__(self, path):
        self.path = path

    def __enter__(self):
        self.restore_fd = os.open(b"/", os.O_RDONLY)
        self.working_dir = os.getcwd()

        os.chroot(self.path)

    def __exit__(self, type, value, traceback):
        os.fchdir(self.restore_fd)
        os.chroot(b".")
        os.chdir(self.working_dir)
        os.close(self.restore_fd)


def check_symlink():
    if os.getuid() != 0:
        error("symlink check must be run as root (chroot)")
        return False

    ok = True
    root = os.environ.get("ROOT", b"/")

    with chrooted(root):
        for parent, dirs, files in os.walk(b"/"):
            for path in [os.path.join(parent, p) for p in files + dirs]:
                if any(fnmatch.fnmatchcase(path, i) for i in IGNORE_SYMLINK):
                    continue

                if os.path.islink(path) and not os.path.exists(path):
                    ok = False
                    error("broken link: %s", path)

    return ok


def error(fmt, *args):
    sys.stderr.write(output.red(fmt % args))
    sys.stderr.write("\n")


def main():
    ok = True
    check_funcs = {
            "libs": check_libs,
            "usr": check_usr,
            "shebang": check_shebang,
            "symlink": check_symlink,
    }

    if not sys.stderr.isatty():
        output.nocolor()

    checks = sys.argv[1:]
    if not checks:
        checks = check_funcs.keys()

    for check in checks:
        func = check_funcs.get(check)
        if func:
            ok = func() and ok
        else:
            error("Unknown test name '%s'", check)
            error("Valid tests: %s", " ".join(check_funcs))
            ok = False

    return 0 if ok else 1

if __name__ == "__main__":
    sys.exit(main())
