#!/bin/bash

version=2.02
myname=$(basename $0)

<<'DOC'
= ltxfileinfo - print version information for a LaTeX file

= Synopsis
ltxfileinfo [options] filename

Options:
-h,--help	print this help and exit
-H,--Help	print full documentation via less and exit
-V		print this script's version and exit
-d,--date	print file's date only
-v,--version	print file's version only
-i,--info	print file's description text only
-l,--location	print file's full path only (same output as kpsewhich)
-f,--flat	output as 1 line with 4 tab-separated file, date, version and info fields
-s,--star	mark mal-formatted data with a star
-c,--color	mark mal-formatted data with red color (ANSI coloring)

= Description
ltxfileinfo displays version information for LaTeX files. If no path
information is given, the file is searched using kpsewhich. As an extra,
for developers, the script will (use the |--star| or |--color| options)
check the valididity of the |\Provides...| statement in the files.
The script uses code from Uwe Lück's |readprov.sty|.

Without an option, the output will be of the form:
  $ ltxfileinfo ctable.sty
  name: ctable.sty
  date: 2012/08/23
  vers: v1.24
  info: Easy, key=value directed, option-rich, typesetting of floats
  loca: /usr/local/texlive/2012/../texmf-local/tex/latex/ctable/ctable.sty

Missing information is represented by |--|:
  $ ltxfileinfo yhmath.sty
  name: yhmath.sty
  date: --
  vers: --
  info: --
  loca: /usr/local/texlive/2012/texmf-dist/tex/latex/yhmath/yhmath.sty

ltxfileinfo tries to detect (and report) mal-formatted dates and versions; 
use the |--star| option to mark such fields with a star, of use the |--color|
option to color invalid fields red, if you work in a terminal with ANSI-coloring
capabilities.
These options are useful for developers
who want to check the correctness of their |\Provides...| statements.
For example:

  $ ltxfileinfo -s arfonts.sty
  file: arfonts.sty (ARfonts.sty*)
  date: 2006/01/01
  vers: --*
  info: Part of the Arabi package
  loca: /usr/local/texlive/2012/texmf-dist/tex/latex/arabi/arfonts.sty

We see here, that the |\ProvidesPackage| statement has an incorrect first
argument and has no version information.

= Other output formats
With the |--date| option, only the file's date will be shown, unlabeled.
The |--version|, |--location|, |--info| options are treated analogously.

  $ ltxfileinfo -v chronology.sty
  v1.0

The |--flat| option prints the fields (except loca:, the last field) on one
line, unlabeled and tab-separated:
 
  $ ltxfileinfo --star --flat chronology.sty
  chronology.sty        2010/6/12*      v1.0    Horizontal timeline

The date is starred, because its format is not yyyy/mm/dd (and the |--star|
option is given.)

= Bugs

On my system, I have a total of 8699 kpsewhich-detectable files that contain a |\Provides...| statement. 
I ran them all through |ltxfileinfo| and made the following summary of detected errors:

  8695  Total \Provides... containing files tested 
  
   660  \Provides... could not be interpreted; reason:
        160  Argument of \Provides... not equal to file's name
          8  Latex3 package (not handled yet)
        480  Unidentified problem with \ProvidesPackage statement
         12  \Provides... used in file without a ... extension
  
  8035  Files could be evaluated
        482  had no date
         95  had a mal-formatted date 
       2666  had no version
        357  had a mal-formatted version
        765  had a \Provides... first argument different from the filename

The .dtx files have more problems than other files:

  1345  .dtx files:
  
   320   \Provides... could not be interpreted; reason:
         81  Argument of \Provides... not equal to...
          6  Latex3 package (not handled yet)
        233  Unidentified problem with \ProvidesPackage statement
  
  1025  Files could be evaluated
         49  had no date
         15  had a mal-formatted date 
         99  had no version
         52  had a mal-formatted version
        468  had a \Provides... first argument different from the filename

= Changes
Changes with respect to version 2.00:
- Documentation adapted to gendoc
- Syntactically beautified

= Author and copyright
Author	Wybo Dekker
Email	U{Wybo@dekkerdocumenten.nl}{wybo@dekkerdocumenten.nl}
License	Released under the U{www.gnu.org/copyleft/gpl.html}{GNU General Public License}
DOC

    die() { echo -e "$myname: $Err$@$Nor" 1>&2; exit 1; }
   help() { sed -n '/^= Synopsis/,/^= /p' $0|sed '1s/.*/Usage:/;/^= /d'; exit; }
helpall() { sed -n '/^<<.DOC.$/,/^DOC$/p' $0|sed -n '1d;$d;p'|less; exit; }
version() { echo $version; exit; }
install() { which instscript>&/dev/null && instscript --zip --pdf --markdown $myname; exit; }

Nor='\e[0m'    # reset color	]
Err='\e[31;1m' # light red	]
test ${BASH_VERSINFO[0]} -ge 4 || die "Need bash version >= 4 (you have $BASH_VERSION)"

# remove leading and trailing whitespace from a string
function trim {
   local var="$*"
   var="${var#"${var%%[![:space:]]*}"}"   # remove leading whitespace
   var="${var%"${var##*[![:space:]]}"}"   # remove trailing whitespace
   echo -n "$var"
}

# mark a string by appending a * (--mark star)
function mark {
   echo "$mark1$*$mark2"
}

# special treatment for .mbs and .bst files:
function dombsbst {
   # look at the first \ProvidesFile only:
   out="File: "$(grep '\ProvidesFile{' $loca |head -1 |sed 's/^[ %]*\\ProvidesFile{//;s/}\[/ /;s/\]$//')
}

# unpack the fields
function extract {
  shift # remove File:
  file=$1
  shift # remove filename
  if [ "$1" = "" ]; then
     date=--; vers=--; info=--; return
  elif [[ "$1" =~ ^[0-9]+/[0-9]+/[0-9]+ ]]; then
     date=$1
     shift
  else
     date="--"
  fi
  if [ "$1" = "" ]; then
     vers=--; info=--; return
  elif [[ "$1" =~ ^v?[[:digit:]][.[:digit:]]+[a-z]* ]]; then
     vers=$1
     shift
     info="${*:---}"
     return
  else
     fixed=
     # sometimes the version is of the form v.1.3 or vers:1.3 or such:
     for i in version ver. ver: ver v. V v; do
        if [[ $1 =~ $i[[:digit:]][.[:digit:]]*[a-z]* ]]; then
           vers=$1
           shift
           fixed=1
           break
        fi
     done
     if [ ! $fixed ]; then
        # sometimes the version is reported as "v 1.3" or "vers: 1.3" or such:
        for i in version ver: ver v. V v; do 
           if [ $1 = $i ]; then
              if [[ "$2" =~ [[:digit:]][.[:digit:]]+[a-z]* ]]; then
                 vers="$1 $2"
                 shift
                 shift
                 fixed=1
                 break
              fi
           fi
        done
     fi
     info="${*:---}"
     if [ ! $fixed ]; then
        vers=--
     fi
  fi
}

# display the results
function display {
   # file should be equal to arg (the argument)
   test $file != $arg && file="$arg ($(mark $file))"
   
   # to be correct, date must be of the form yyyy/mm/dd
   if [[ ! "$date" =~ ^[[:digit:]]{4}/[[:digit:]]{2}/[[:digit:]]{2}$ ]]; then
      date=$(mark $date)
   else
      # also, the number must be valid:
      if [ "$( date -d "$date" +%arg 2>&1 | grep invalid )" != "" ] ; then
         date=$(mark $date)
      fi
   fi
   
   # version should be 1.2 or 1.2.3, maybe prefixed with v and suffixed with zero or more lower case letters
   if [[ ! "$vers" =~ ^v?[[:digit:]]+.[[:digit:]]+(.[[:digit:]]+)?[[:lower:]]*$ ]]; then vers=$(mark $vers); fi
   
   if [ ! "$key" ]; then
      if [ $flat ]; then 
         echo -e "$file\t$date\t$vers\t$info"
      else
         cat <<-EOF
	file: $file
	date: $date
	vers: $vers
	info: $info
	loca: $loca
	EOF
      fi
   else
      eval "echo \$$key"
   fi  
}

if ! options=$(getopt\
   -n $myname \
   -o hHiVIdvlifcsD \
   -l help,Help,date,version,location,info,flat,star,color,debug -- "$@"
); then exit 1; fi
eval set -- "$options"
debug=false
while [ $# -gt 0 ]; do
   case $1 in
   (-h|--help)     help;;
   (-H|--Help)     helpall;;
   (-V) 		  version;;
   (-I)            install;;
   (-d|--date)     key=date;;
   (-v|--version)  key=vers;;
   (-i|--info)     key=info;;
   (-l|--location) key=loca;;
   (-f|--flat)     flat=1;;
   (-s|--star)     mark1=''; mark2='*';;
   (-c|--color)    mark1='[1;31m'; mark2='[0m';;
   (-D|--debug)    debug=true;;
   (--)           shift; break;;
   (*)            break;;
   esac
   shift
done

arg=$1
case $arg in
(*/*)	loca=$arg
	test -e $loca || die "$loca: file does not exist";;
('')	help;;
(*)	loca=$(kpsewhich $arg) || die "$arg: not found by kpsewhich"
esac
loca=$(kpsewhich "$arg")
prov=$(grep '\\Provides' "$loca") || die "no \\Provides..."
[[ $prov =~ ExplPackage ]] && die 'Latex3 package (not handled yet)'

# the file must contain a \ProvidesXXX statement, where XXX is Class, Package, or File:
[[ $prov =~ \\Provides(Class|Package|File)[[:space:]]*\{ ]] ||
   die "Found no \\ProvidesClass/Package/File statement"

# readprov.sty does not work on .mbs and .bst files: special treatment;
# \ProvideFile statements in them mostly refer to merlin.mbs, or other names
[[ $arg =~ \.(mbs|bst)$ ]] && dombsbst

dir=$(mktemp -d -t $myname.XXXXXXXXXX)
$debug && echo Running in $dir || trap "rm -rf $dir" 0 1 2 15
cp "$loca" $dir
cd $dir

# The following code is mostly from Uwe Lueck's readprov.sty:
echo '
\makeatletter
\def\GetFileInfo#1{%
  \def\filename{#1}%
  \def\@tempb##1 ##2 ##3\relax##4\relax{%
    \def\filedate{##1}%
    \def\fileversion{##2}%
    \def\fileinfo{##3}}%
    \read@file@info\@tempb{#1}} 
\newcommand*{\read@file@info}[2]{%
  \expandafter \expandafter \expandafter
    #1\csname ver@#2\endcsname \relax? ? \relax\relax}
\newcommand*{\ReadFileInfos}[1]{%
  \begingroup
    \let\RP@@provfile\@providesfile
    \def\@providesfile##1[##2]{\RP@@provfile{##1}[{##2}]\endinput}%
    \def\ProvidesClass  ##1{\ProvidesFile{##1.\@clsextension}}%
    \def\ProvidesPackage##1{\ProvidesFile{##1.\@pkgextension}}%
    \@for\@tempa:=#1\do{%
      \edef\@tempa{\expandafter\read@no@spaces\@tempa\@nil}%
      \input{\@tempa}%
      \global\let\@gtempa\@tempa}%
  \endgroup
  \GetFileInfo\@gtempa%
}
\def\read@no@spaces#1#2\@nil{#1#2}%
\def\NeedsTeXFormat#1{\expandafter\@needsformat}
\ReadFileInfos{'$loca'}
\endinput
' > ltxfileinfo.tex

pdflatex -interaction=batchmode ltxfileinfo.tex >& /dev/null

IFS= # do not remove any whitespace
base=${arg##*/} # strip the path
base=${base%.*} # strip the extension
shopt -s nocasematch # MS people don't pay attention to case differences in file names...
while read line; do
   if [[ "$line" =~ ^File:.$base ]]; then 
      out="$line"
      while [ ${#line} -eq 79 ]; do # gather continuation lines
         read line
         out="$out$line"
      done  
   fi
done <ltxfileinfo.log
IFS=' '			# back to 
shopt -u nocasematch	# normal

# if the \Provides statement is not recognized, try to find out why, and die:
if [ "$out" = "" ]; then
   read -r provtype provarg <<<$(sed -n '/\Provides\(Class\|Package\|File\)\s*{[^}]*}/s/.*\(Class\|Package\|File\)\s*{\([^}]*\)}.*/\1 \2/p' "$loca")
   if [[ "$provarg" = "" || "$provarg" =~ '\\' ]]; then die Unidentified problem with \\ProvidesPackage statement; fi
   case $provtype in
   (Package) 
     if [[ ! "$arg" =~ \.(sty|dtx)$ ]]; then
       die \\ProvidesPackage used in file without a .sty extension
     elif [[ "$provarg" =~ \.sty$ ]]; then
       die "Argument of \\ProvidesPackage ($provarg) should not contain .sty extension"
     elif [ "$provarg" != "$base" ]; then
       die "Argument of \\ProvidesPackage ($provarg) not equal to file's base name ($base)"
     else
       die Unidentified problem with \\ProvidesPackage statement
     fi
     ;;
   (Class)
     if [[ ! "$arg" =~ \.(cls|dtx)$ ]]; then
       die \\ProvidesClass used in file without a .cls extension
     elif [[ "$provarg" =~ \.cls$ ]]; then
       die "Argument of \\ProvidesClass ($provarg) should not contain .cls extension"
     elif [ "$provarg" != "$base" ]; then
       die "Argument of \\ProvidesClass ($provarg) not equal to file's base name ($base)"
     else
       die Unidentified problem with \\ProvidesPackage statement
     fi
     ;;
   (File)
     if [ "$arg" != "$provarg" ]; then
       die "Argument of \\ProvidesFile ($provarg) not equal to file's name ($arg)"
     else
       die Unidentified problem with \\ProvidesPackage statement
     fi
     ;;
   (*)
     die Found \\Provides$provarg statement - cannot handle it
     ;;  
   esac
fi

extract $out
display
exit 0

# template for README file:
<<'README'
This is release %version% of the ltxfileinfo script
License: GPL

Short description:
ltxfileinfo is a bash script; it prints information about a latex class,
style and other files to standard output.
The script is based on Uwe Lueck's readprov.sty, so it prints information
only for files that contain a \ProvidesFile, \ProvidesClass or
\ProvidesPackage statement.
The script tries to correct errors in these \Provides... statements and it
has options, useful for developers, to make errors in those statements visible.

Usage example:

$ ltxfileinfo article.cls
name: article.cls
date: 2007/10/19
vers: v1.4h
info: Standard LaTeX document class
loca: /usr/local/texlive/2009/texmf-dist/tex/latex/base/article.cls

major changes of release %version% relative to 1.1
- completely rewritten as a bash script, using code from readprov.sty
- now also can evaluate xelatex oriented files
- can now handle any LaTeX file containing a \Provides... statement
- options for \Provides... statement validation
README
