#!/bin/bash
# Filename:      grml-hwinfo
# Purpose:       get hardware information
# Authors:       grml-team (grml.org), (c) Michael Prokop <mika@grml.org>
# Bug-Reports:   see http://grml.org/bugs/
# License:       This file is licensed under the GPL v2.
################################################################################
# Notice: Some ideas have been taken from
# http://club.black.co.at/david/hwdb/infodump
# by David Schmitt <david@schmitt.edv-bus.at>
################################################################################

# variables
UNAME="$(uname -r)"
PN="$(basename "$0")"
[ -n "$WORKING_DIR" -a -d "$WORKING_DIR" ] || WORKING_DIR=$(pwd)
VERSION='***UNRELEASED***'

# data collection should not be affected by user locale
export LANG=C
export LC_ALL=C

TIMESTAMP='+%F--%H-%M-%S-%Z'
DATE="$(date $TIMESTAMP)"

# defaults
GENERATE_FILE='1'
GENERATE_DIRECTORY=''
_opt_output_directory=false
_opt_output_file=false
_opt_quiet=false
_opt_force=false

usage() {
  echo "
  This tool collects information of the hardware it is being executed on.
  It can be executed as normal user to collect some basic information or
  (recommended) with root permissions to collect more system information.
  If executed without any options a file named grml-hwinfo-TIMESTAMP.tar.bz2
  storing all collected information is created in the current working directory.

  Options:

  -b, --both                 Create directory + file grml-hwinfo-TIMESTAMP.tar.bz2
  -d, --directory            Create grml-hwinfo-TIMESTAMP as a directory (no file)
  -f, --file                 Create grml-hwinfo-TIMESTAMP.tar.bz2 [default action]
  -h, --help                 Display this help message
  -q, --quiet                Don't display informational text (useful for cron usage)
  --force                    Don't abort but overwrite possibly existing output file
  --output-directory <dir>   Store output files in specified directory
  --output-file <file>       Store output in specified filename (tar.XX format)
  "
}

CMDLINE_OPTS=output-directory:,output-file:,both,directory,file,help,quiet,force
_opt_temp=$(getopt --name grml-hwinfo -o +bdfhq --long $CMDLINE_OPTS -- "$@")
if [ $? -ne 0 ]; then
  echo "Try 'grml-hwinfo --help' for more information." >&2
  exit 1
fi
eval set -- "$_opt_temp"

while :; do
  case "$1" in
  --help|-h)
    usage ; exit 0
    ;;
  --output-directory)
    shift; OUTDIRNAME="$1"
    GENERATE_DIRECTORY='1'
    _opt_output_directory=true
    $_opt_output_file && GENERATE_FILE='1' || GENERATE_FILE=''
    ;;
  --output-file)
    shift; OUTFILE="$1"
    GENERATE_FILE='1'
    _opt_output_file=true
    $_opt_output_directory && GENERATE_DIRECTORY='1' || GENERATE_DIRECTORY=''
    ;;
  -d|--directory)
    GENERATE_DIRECTORY='1'
    GENERATE_FILE=''
    ;;
  -f|--file)
    GENERATE_DIRECTORY=''
    GENERATE_FILE='1'
    ;;
  -b|--both)
    GENERATE_DIRECTORY='1'
    GENERATE_FILE='1'
    ;;
  -q|--quiet)
    _opt_quiet=true
    ;;
  --force)
    _opt_force=true
    ;;
  --)
    shift; break
    ;;
  *)
    echo "Internal getopt error!" >&2
    exit 1
    ;;
  esac
  shift
done

if ! $_opt_quiet ; then
  echo "$PN ${VERSION} - collect hardware information"
fi

# Generate output/temporary directory name & path, and output file path
[ -n "$OUTDIRNAME" ] || OUTDIRNAME="grml-hwinfo-${DATE}"
if $_opt_output_directory ; then
  OUTDIR="${OUTDIRNAME}"
else
  OUTDIR="${WORKING_DIR}/${OUTDIRNAME}"
fi

if $_opt_force ; then
  mkdir -p "${OUTDIR}"
else
  mkdir "${OUTDIR}" || { echo "Directory '${OUTDIR}' already exists, aborting." >&2 ; exit 1; }
fi

if [ -n "$GENERATE_FILE" ] ; then
  [ -n "$OUTFILE" ] && OUTFILE_="$OUTFILE" || OUTFILE_="${OUTDIR}.tar.bz2"
  if ! $_opt_force ; then
    [ -e "${OUTFILE_}" ] && { echo "File '${OUTFILE_}' already exists, aborting." >&2 ; rm -r "${OUTDIR}"; exit 1; }
  fi
  OUTFILE=${OUTFILE_}
  touch "${OUTFILE}"
fi

if [ "$(id -u)" != "0" ] ; then
  NOTROOT=1
  $_opt_quiet || echo "W: Running without root permissions. Not all data will be collected."
fi

# check whether a binary is available and executable
exectest() {
  if [ -z "$1" ] ; then
    echo 'Usage: exectest <binary>'>&2
    return 1
  else
    if test -e "$(which "$1")" ; then
      return 0
    else
      if ! grep -q "^$1"'$' missing_tools 2>/dev/null ; then
        $_opt_quiet || echo "$1" >> missing_tools
      fi
      return 1
    fi
  fi
}

disk_info() {
  # the variable holds a newline separated list of disk block devices, excluding loopback and CD-ROM devices
  disklist=$(lsblk -nd -o NAME -e 7,11)
}

# return list of all network devices in array "${niclist[@]}"
get_network_devices() {
  local interface
  niclist=()
  for interface in /sys/class/net/* ; do
      [ -e "${interface}" ] || continue
      interface=$(basename "${interface}")
      # [ "${interface}" = "lo" ] && continue
      niclist+=("${interface}")
  done
}

nvme_info() {
  if ! exectest nvme ; then
    return 0
  fi

  # `nvme list` returns with `Failed to scan topology` and exit code 1,
  # if there's no NVMe device present. There's no point in running any
  # further commands then, so let's avoid collecting empty files and
  # error messages
  if ! nvme list &>/dev/null ; then
    return 0
  fi

  # list
  nvme list > ./nvme_list 2>./nvme_list.error
  nvme list --output-format=json > ./json/nvme_list

  # list-subsys
  nvme list-subsys > ./nvme_list-subsys 2>./nvme_list-subsys.error
  nvme list-subsys --output-format=json > ./json/nvme_list-subsys

  # get list of available disks
  mapfile -t nvme_disks < <(lsblk -nd -o NAME -e 7,11 | sed -n 's/^nvme/\/dev\/&/p')

  for disk in "${nvme_disks[@]}" ; do
    disk_name="${disk##*/}" # /dev/nvme0n1 -> nvme0n1

    # smart-log
    nvme smart-log "$disk" > ./nvme_smart-log_"${disk_name}"
    nvme smart-log --output-format=json "$disk" > ./json/nvme_smart-log_"${disk_name}"

    # fw-log
    nvme fw-log "$disk" > ./nvme_fw-log_"${disk_name}"
    nvme fw-log --output-format=json "$disk" > ./json/./nvme_fw-log_"${disk_name}"

    # error-log
    nvme error-log "$disk" > ./nvme_error-log_"${disk_name}"
    nvme error-log --output-format=json "$disk" > ./json/./nvme_error-log_"${disk_name}"

    # effects-log
    nvme effects-log "$disk" > ./nvme_effects-log_"${disk_name}"
    nvme effects-log --output-format=json "$disk" > ./json/./nvme_effects-log_"${disk_name}"
  done
}

# Check if X server is running
#
# If xset is missing, we rely on the existence of the $DISPLAY variable.
NO_DISPLAY=0
if exectest xset ; then
  if ! timeout 1s xset q &>/dev/null ; then
    NO_DISPLAY=1
  fi
elif [ -z "${DISPLAY}" ] ; then
    NO_DISPLAY=1
fi

if [ "${NO_DISPLAY}" -eq 1 ] ; then
  $_opt_quiet || echo "W: Running without X server. Not all data will be collected."
fi

cd "${OUTDIR}" || exit 1
(
  if ! $_opt_quiet ; then
    [ -n "$GENERATE_FILE" ]      && echo "Output file:      $OUTFILE"
    [ -n "$GENERATE_DIRECTORY" ] && echo "Output directory: $OUTDIR"
    echo
    echo "This might take a few seconds/minutes. Please be patient..."
  fi

  # prepare sub directory for json files
  mkdir -p json

  # some sysinfo
  date > ./date
  if [ -r /etc/grml_version ] ; then
     cat /etc/grml_version > grml_version
  fi
  if [ -r /etc/debian_version ] ; then
     cat /etc/debian_version > debian_version
  fi
  uname -a > ./uname

  # inxi
  exectest inxi  && inxi -FJm > ./inxi_standard 2>./inxi_standard.error
  exectest inxi  && inxi -FJmz > ./inxi_standard_filter 2>./inxi_standard_filter.error
  exectest inxi  && inxi -FJm -xx > ./inxi_extra 2>./inxi_extra.error
  exectest inxi  && inxi -FJmz -xx > ./inxi_extra_filter 2>./inxi_extra_filter.error
  exectest inxi  && inxi -FJm --admin > ./inxi_admin 2>./inxi_admin.error
  exectest inxi  && inxi -FJmz --admin > ./inxi_admin_filter 2>./inxi_admin_filter.error

  # disks / devices
  [ -f /proc/scsi/scsi ] && cat /proc/scsi/scsi > scsi
  exectest lspci && lspci -nn >./lspci 2>./lspci.error
  exectest lspci && lspci -vvnn >./lspci_verbose 2>./lspci_verbose.error
  cat /proc/partitions > partitions
  find /proc/ide/ -name geometry -exec grep . {} \; > proc_ide 2>/dev/null
  df -h > ./df 2>/dev/null
  for i in free lsmod mount lsdev lspnp ; do
    exectest $i && $i > ./$i
  done

  if exectest findmnt ; then
    findmnt > ./findmnt 2>./findmnt.error
    # json output is supported since util-linux v2.29.2:
    if findmnt --help | grep -q 'json' ; then
      findmnt --json > ./json/findmnt 2>./json/findmnt.error
    fi
  fi

  if exectest losetup ; then
    losetup -a > ./losetup 2>./losetup.error
    # json output is supported since util-linux v2.29.2:
    if losetup --help | grep -q 'json' ; then
      losetup --json > ./json/losetup 2>./json/losetup.error
    fi
  fi

  if exectest lsusb ; then
    lsusb    > ./lsusb
    lsusb -v > ./lsusb_verbose 2>./lsusb_verbose.error
  fi

  swapon -s > ./swapon 2>./swapon.error

  # proc stuff
  for i in cpuinfo interrupts cmdline devices dma fb iomem ioports \
    mdstat meminfo modules mtrr pci uptime version ; do
    [ -r /proc/$i ] && cat /proc/$i > proc_$i
  done

  if ! $_opt_quiet ; then
    echo "Starting sysdump..."
    echo "  NOTE: if it seems to be hanging at this stage file a bug report with output of:"
    echo "        lsof -p \$(pgrep -f "\$\(which sysdump\)")"
  fi
  exectest sysdump  && sysdump > ./sysdump 2>./sysdump.error
  if ! $_opt_quiet ; then
    echo "Execution of sysdump finished."
  fi

  exectest cpuid && cpuid > ./cpuid 2>./cpuid.error

  exectest uptime && uptime > ./uptime 2>./uptime.error

  # log
  dmesg > dmesg.cur

  # hwinfo
  exectest discover && discover -v --type-summary --enable-bus all > ./discover 2>./discover.error
  exectest hwinfo   && hwinfo log=hwinfo
  exectest numactl  && numactl --hardware > ./numactl
  exectest x86info  && x86info > ./x86info 2>./x86info.error
  if exectest lscpu ; then
    lscpu > ./lscpu
    lscpu -e > ./lscpu_extended
    # json output is supported since util-linux v2.33.1:
    if lscpu --help | grep -q 'json' ; then
      lscpu --json > ./json/lscpu
    fi
  fi

  # EFI
  exectest efibootmgr && efibootmgr -v >efibootmgr 2>efibootmgr.error

  # net stuff, net-tools:
  exectest ifconfig && ifconfig -v -a > ./ifconfig
  exectest route    && route -n       > ./route

  # net stuff, ethtool
  if exectest ethtool ; then
    get_network_devices
    for dev in "${niclist[@]}" ; do
      ethtool          "${dev}" > ethtool_"${dev}"
      case "${dev}" in
        "lo")
          # skip the loopback device, `ethtool --driver lo` fails with:
          # "Cannot get driver information: Operation not supported"
          ;;
        *)
          ethtool --driver "${dev}" > ethtool_"${dev}_driver"
          ;;
      esac
    done
  fi

  # net stuff, iproute:
  exectest ip && ip addrlabel list > ip_addrlabel
  exectest ip && ip addr show      > ip_addr
  exectest ip && ip link show      > ip_link
  exectest ip && ip maddr show     > ip_maddr
  exectest ip && ip mroute show    > ip_mroute
  exectest ip && ip mrule show     > ip_mrule 2>ip_mrule.error
  exectest ip && ip neigh show     > ip_neigh
  exectest ip && ip netconf        > ip_netconf
  exectest ip && ip netns list     > ip_netns
  exectest ip && ip ntable show    > ip_ntable
  exectest ip && ip route show     > ip_route
  exectest ip && if [ -r /etc/iproute2/rt_tables ] ; then
                   grep -v '^#' /etc/iproute2/rt_tables | while read table _ ; do
                     ip route show table "${table}" > "ip_route_table_${table}" 2> "ip_route_table_${table}".error
                   done
                 fi
  exectest ip && ip rule show      > ip_rule
  exectest ip && ip tunnel show    > ip_tunnel
  exectest ip && ip tuntap show    > ip_tuntap

  # software
  if exectest dpkg ; then
    dpkg --get-selections   > dpkg_get_selections
    COLUMNS=300 dpkg --list > dpkg_list
  fi

  # power management
  exectest laptop-detect  && laptop-detect >/dev/null 2>/dev/null && echo "0" > laptop_detected
  if [ -r /proc/acpi/info ] ; then
    cat /proc/acpi/info > acpi_info
  fi

  if exectest acpi ; then
    acpi > ./acpi 2>acpi.error
    acpi --everything > ./acpi.everything 2>./acpi.everything.error
    acpi -v > ./acpi.version
  fi
  [ -r /proc/apm/ ] && apm > ./apm

  # kernel stuff
  if [ -r /proc/config.gz ] ; then
    zcat /proc/config.gz > kernelconfig
  else
    [ -r "/boot/config-${UNAME}" ] && cat "/boot/config-${UNAME}" > kernelconfig
  fi

  exectest dpkg && COLUMNS=1000 dpkg -l "linux-image-${UNAME}" 2>running_kernel.error \
           | grep "linux-image-${UNAME}" | tr -s ' ' > running_kernel 2>>running_kernel.error

  # X stuff
  if [ "${NO_DISPLAY}" -eq 0 ] ; then
    exectest xviddetect  && xviddetect         > ./xviddetect
    exectest xvidtune    && xvidtune -show     > ./xdivtune
    exectest xrandr      && xrandr             > ./xrandr
    exectest xdpyinfo    && xdpyinfo           > ./xdpyinfo
    X -version > x_version 2>&1
  fi

  for i in Xorg.0.log Xorg.7.log Xorg.8.log XFree86.0.log XFree86.7.log XFree86.8.log dmesg ; do
    cp /var/log/$i log_$i 2>/dev/null
  done

  if [ -r "$HOME"/.local/share/xorg/Xorg.0.log ] ; then
    cp "$HOME"/.local/share/xorg/Xorg.0.log user_Xorg.0.log
  fi

  cp /etc/X11/xorg.conf    xorg.conf    2>/dev/null
  cp /etc/modules          modules      2>/dev/null
  cp /etc/X11/XF86Config-4 XF86Config-4 2>/dev/null

  # as root:
  if [ -n "$NOTROOT" ] ; then
    echo "not running as root" > root
  else
    echo "running as root" > root
    disk_info

    exectest dmidecode  && dmidecode > ./dmidecode

    if exectest edac-util ; then
      edac-util > ./edac-util 2>./edac-util.error
      edac-util --report=full > edac-util_report 2>edac-util_report.error
    fi

    if exectest decode-dimms ; then
      decode-dimms > ./decode-dimms 2>./decode-dimms.error
    elif [ -x /usr/share/doc/lm-sensors/examples/eeprom/decode-dimms.pl ] ; then
      /usr/share/doc/lm-sensors/examples/eeprom/decode-dimms.pl > decode-dimms 2>decode-dimms.error
    fi

    if exectest acpidump ; then
      acpidump > ./acpidump 2>./acpidump.error
    fi

    if exectest mokutil ; then
      mokutil --sb-state > ./mokutil_state 2>./mokutil_state.error
    fi

    # proxmox
    exectest qm && qm list > ./qm 2>./qm.error
    # libvirt
    exectest virsh && virsh list >./virsh 2>./virsh.error
    # openvz
    exectest vzlist && vzlist >./vzlist 2>./vzlist.error
    # vserver
    exectest vserver-stat && vserver-stat >./vserver-stat 2>./vserver-stat.error

    exectest mdadm && mdadm --detail /dev/md[0-9]* >> ./mdadm 2>./mdadm.error

    # LVM
    exectest pvs && pvs > ./pvs 2>./pvs.error
    exectest vgs && vgs > ./vgs 2>./vgs.error
    exectest lvs && lvs > ./lvs 2>./lvs.error
    exectest lvdisplay && lvdisplay > ./lvdisplay 2>./lvdisplay.error

    exectest dmsetup && dmsetup ls > dmsetup_ls 2>dmsetup_ls.error
    exectest dmsetup && dmsetup ls --tree > dmsetup_ls_tree 2>dmsetup_ls_tree.error
    if exectest lsblk ; then
      lsblk -o +LABEL,PARTLABEL,UUID,FSTYPE,SERIAL > ./lsblk 2>./lsblk.error
      # json output is supported since util-linux v2.33.1:
      if lsblk --help | grep -q 'json' ; then
        lsblk -o +LABEL,PARTLABEL,UUID,FSTYPE,SERIAL --json > ./json/lsblk 2>./json/lsblk.error
      fi
    fi

    # ZFS
    if exectest zpool; then
      if [ -n "$(zpool list -H -o name)" ]; then
        if ! $_opt_quiet ; then
          echo "Reading ZFS data... this might take a while."
        fi
        mkdir -p ./zfs
        for pool in $(zpool list -H -o name); do
          # for a quick overview
          zpool list -vPL "$pool" > ./zfs/"$pool".list 2> ./zfs/"$pool".list.error &
          zpool status -vtiP "$pool" > ./zfs/"$pool".status 2> ./zfs/"$pool".status.error &
          zpool iostat -rp "$pool" > ./zfs/"$pool".histogram 2> ./zfs/"$pool".histogram.error &
          zfs list -o space,referenced,logicalreferenced,quota,refquota \
            -t filesystem,volume -r "$pool" > ./zfs/"$pool".zfslist 2> ./zfs/"$pool".zfslist.error &
          # machine parsable data
          zpool get -p all "$pool" > ./zfs/"$pool".props 2> ./zfs/"$pool".props.error &
          zfs get -p all -t filesystem,volume -r "$pool" > ./zfs/"$pool".zfsprops 2> ./zfs/"$pool".zfsprops.error &
          # additional json dumps
          zpool get all --json --json-int "$pool" > ./json/zpool."$pool" 2> ./json/zpool."$pool".error &
          zfs get all --json --json-int -t filesystem,volume -r "$pool" > ./json/zfs."$pool" 2> ./json/zfs."$pool".error &
        done
        arc_summary --raw --alternate > ./zfs/arc_summary 2> ./zfs/arc_summary.error &
        wait
        if ! $_opt_quiet ; then
          echo "Finished reading ZFS data."
        fi
      fi
    fi

    # iSCSI
    if exectest iscsiadm ; then
      iscsiadm -m session > iscsiadm_session 2>iscsiadm_session.error
      iscsiadm -m fw > iscsiadm_fw 2>iscsiadm_fw.error
      iscsiadm -m host > iscsiadm_host 2>iscsiadm_host.error
      iscsiadm -m iface > iscsiadm_iface 2>iscsiadm_iface.error
      iscsiadm -m node > iscsiadm_node 2>iscsiadm_node.error
      iscsiadm -m discovery > iscsiadm_discovery 2>iscsiadm_discovery.error
    fi

    if exectest lsscsi ; then
      lsscsi    > ./lsscsi 2>./lsscsi.error
      lsscsi -t > ./lsscsi_transport 2>./lsscsi_transport.error
    fi

    # NVMe
    nvme_info

    for disk in $disklist; do
      if exectest sfdisk && [[ -b "/dev/${disk}" ]] ; then
        sfdisk -d "/dev/${disk}" > "./sfdisk_${disk}" 2>"./sfdisk_${disk}.error"
        # json output is supported since util-linux v2.29.2:
        if sfdisk --help | grep -q 'json' ; then
          sfdisk -d "/dev/${disk}" --json > ./json/sfdisk_"${disk}" 2>./json/sfdisk_"${disk}".error
        fi
      fi

      if exectest smartctl ; then
        echo -e "smartctl -a /dev/${disk}:\n" >> smartctl
        smartctl -a "/dev/$disk" >> ./smartctl
        echo -e "\n\n" >> ./smartctl
        echo -e "[" >> ./json/smartctl
        smartctl -x -j "/dev/$disk" >> ./json/smartctl
        echo -e "]" >> ./json/smartctl
      fi

      if exectest hdparm ; then
        echo -e "hdparm -iv /dev/${disk}:\n" >> hdparm
        hdparm -iv "/dev/$disk" >> ./hdparm 2>> ./hdparm.error
        echo -e "\n\n" >> hdparm
      fi

      if exectest fdisk ; then
        echo -e "fdisk -lu /dev/${disk}:\n" >> fdisk
        fdisk -lu "/dev/$disk" >> ./fdisk 2>> ./fdisk.error
        echo -e "\n\n" >> fdisk
      fi

      if exectest parted ; then
        echo -e "parted -s /dev/${disk}:\n" >> parted
        parted -s "/dev/$disk" print >> ./parted 2>> ./parted.error
        echo -e "\n\n" >> parted
      fi

      if exectest sdparm ; then
        echo -e "sdparm --all --long /dev/${disk}:\n" >> sdparm
        echo -e "stderr for sdparm --all --long /dev/${disk}:\n" >> sdparm.error
        sdparm --all --long "/dev/$disk" >> ./sdparm 2>> ./sdparm.error
        echo -e "\n\n" >> sdparm
        echo -e "\n\n" >> sdparm.error
      fi

      if exectest sg_inq ; then
        echo -e "sg_inq /dev/${disk}:\n" >> sg_inq
        sg_inq "/dev/$disk" >> ./sg_inq 2>> ./sg_inq.error
        echo -e "\n\n" >> sg_inq
      fi

      file -s "/dev/${disk}"?* | grep -v ": empty" >> file_disk
    done
  fi
)

# get rid of empty files
for file in *.error ; do
  test -s "$file" || rm -- "$file"
done

$_opt_quiet || echo

cd "${WORKING_DIR}"

# create tarball
if [ -n "$GENERATE_FILE" ] ; then
  tar acf "${OUTFILE}" "${OUTDIRNAME}"
  if ! $_opt_quiet ; then
    # shellcheck disable=SC2012
    [ -r "$OUTFILE" ] && echo "$OUTFILE ($(ls -ahl -- "$OUTFILE" | awk '{print $5}')) has been generated."
  fi
fi

# remove (temporary) output directory if needed, else keep it, as it doubles
# as the real output directory.
if [ -z "$GENERATE_DIRECTORY" ] ; then
  rm -r "${OUTDIR}"
else
  if ! $_opt_quiet ; then
    [ -r "${OUTDIR}" ] && echo "${OUTDIR} has been generated."
  fi
fi

exit 0

## END OF FILE##################################################################
