#!/bin/bash

# Public domain notice for all NCBI EDirect scripts is located at:
# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice

total_start=$(date "+%s")

pma2pme=false
e2index=false
e2invert=false
fullIndex=false
bioconcepts=false
nihocc=false
datafiles=false
internal=false
clean=false
scrub=false
useFtp=true
useHttps=false

while [ $# -gt 0 ]
do
  case "$1" in
    pma2pme | -pma2pme | asn | -asn | asn1 | -asn1 | asn.1 | -asn.1 )
      pma2pme=true
      shift
      ;;
    daily | -daily )
      e2index=true
      e2invert=true
      datafiles=true
      shift
      ;;
    index | -index | reindex | -reindex )
      e2index=true
      e2invert=true
      fullIndex=true
      datafiles=true
      shift
      ;;
    clean | -clean | clear | -clear )
      # delete Indices contents and Increment files
      clean=true
      shift
      ;;
    scrub | -scrub )
      clean=true
      # and delete Postings directories
      scrub=true
      shift
      ;;
    biocoll | -biocoll | extras | -extras | bioconcepts | -bioconcepts | generifs | -generifs )
      bioconcepts=true
      datafiles=true
      shift
      ;;
    nihocc | -nihocc )
      nihocc=true
      datafiles=true
      shift
      ;;
    -internal | -int )
      # populate from files on internal network
      internal=true
      shift
      ;;
    -ftp )
      useFtp=true
      useHttps=false
      shift
      ;;
    -http | -https )
      useFtp=false
      useHttps=true
      shift
      ;;
    * )
      break
      ;;
  esac
done

while [ $# -gt 0 ]
do
  case "$1" in
    -path )
      shift
      ;;
    -* )
      exec >&2
      echo "$0: Unrecognized option $1"
      exit 1
      ;;
    * )
      break
      ;;
  esac
done

if [ "$#" -gt 0 ]
then
  target="$1"
  if [ ! -d "$target" ]
  then
    echo "Unable to find '$target' path"
    exit 1
  fi
  MASTER=$(cd "$target" && pwd)
  CONFIG=${MASTER}
  shift
else
  if [ -z "${EDIRECT_PUBMED_MASTER}" ]
  then
    echo "Must supply path to master archive area or set EDIRECT_PUBMED_MASTER environment variable"
    exit 1
  else
    MASTER="${EDIRECT_PUBMED_MASTER}"
    MASTER=${MASTER%/}
    if [ ! -d "${MASTER}" ]
    then
      echo "Unable to find '$MASTER' path"
      exit 1
    fi
  fi
fi

while [ $# -gt 0 ]
do
  case "$1" in
    -temp | -work | -working )
      shift
      ;;
    -* )
      exec >&2
      echo "$0: Unrecognized option $1"
      exit 1
      ;;
    * )
      break
      ;;
  esac
done

if [ "$#" -gt 0 ]
then
  working="$1"
  if [ ! -d "$working" ]
  then
    echo "Unable to find '$working' path"
    exit 1
  fi
  WORKING=$(cd "$working" && pwd)
  shift
else
  if [ -z "${EDIRECT_PUBMED_WORKING}" ]
  then
    WORKING=${MASTER}
  else
    WORKING="${EDIRECT_PUBMED_WORKING}"
    WORKING=${WORKING%/}
  fi
  if [ ! -d "${WORKING}" ]
  then
    echo "Unable to find '$WORKING' path"
    exit 1
  fi
fi

echo "MASTER $MASTER"

echo "WORKING $WORKING"

for dir in Archive Data Postings
do
  mkdir -p "$MASTER/$dir"
done

if [ -d "$MASTER/Sentinels" ]
then
  mv "$MASTER/Sentinels" "$MASTER/Archive"
else
  mkdir -p "$MASTER/Archive/Sentinels"
fi

if [ -d "$WORKING/Pubmed" ]
then
  mv "$WORKING/Pubmed" "$WORKING/Source"
else
  mkdir -p "$WORKING/Source"
fi

for dir in Extras Index Invert Merged Scratch
do
  mkdir -p "$WORKING/$dir"
done

for dir in Current Indexed Inverted
do
  mkdir -p "$WORKING/Scratch/$dir"
done

pm-prepare "$MASTER" "$WORKING"

date

DWN=0
DEL=0
SCB=0
POP=0
IDX=0
INV=0
MRG=0
PST=0

seconds_start=$(date "+%s")
cd "$WORKING/Source"
if [ "$internal" = true ]
then
  echo "Will Use Direct Access To PubMed Files On FTP Site"
else
  echo "Downloading PubMed Files"
  if [ "$useHttps" = true ]
  then
    download-pubmed -https baseline updatefiles
  else
    download-pubmed baseline updatefiles
  fi
fi
cd "$WORKING/Extras"
if [ "$bioconcepts" = true ]
then
  echo "Checking for BioConcept and GeneRIF Updates"
  fst=$( nquire -dir ftp.ncbi.nlm.nih.gov "pub/lu/PubTatorCentral" )
  scd=$( nquire -dir ftp.ncbi.nlm.nih.gov "gene/GeneRIF" )
  for fl in chemical2pubtatorcentral.gz disease2pubtatorcentral.gz gene2pubtatorcentral.gz
  do
    if [ -s "$fl" ]
    then
      one=$( echo "$fst" | grep "$fl" | cut -f 1 )
      two=$( wc -c < "$fl" | tr -d ' ' )
      if [ "$one" != "$two" ]
      then
        echo "Removing outdated $fl"
        rm "$fl"
      fi
    fi
  done
  if [ -s "generifs_basic.gz" ]
  then
    one=$( echo "$scd" | grep "generifs_basic.gz" | cut -f 1 )
    two=$( wc -c < "generifs_basic.gz" | tr -d ' ' )
    if [ "$one" != "$two" ]
    then
      echo "Removing outdated generifs_basic.gz"
      rm "generifs_basic.gz"
    fi
  fi
fi
if [ "$datafiles" = true ]
then
  if [ "$useHttps" = true ]
  then
    echo "Downloading MeSH Tree"
    download-ncbi-data -https meshtree
    echo "Downloading TaxNames"
    download-ncbi-data -https taxnames
    echo "Downloading Journals"
    download-ncbi-data -https journals
    echo "Downloading GeneRIFs"
    download-ncbi-data -https generifs    
  else
    echo "Downloading MeSH Tree"
    download-ncbi-data meshtree
    echo "Downloading TaxNames"
    download-ncbi-data taxnames
    echo "Downloading Journals"
    download-ncbi-data journals
    echo "Downloading GeneRIFs"
    download-ncbi-data generifs    
  fi
fi
if [ "$bioconcepts" = true ]
then
  echo "Downloading BioConcepts Tables"
  download-ncbi-data bioconcepts
fi
if [ "$datafiles" = true ]
then
  echo "Copying to Data Directory"
  for fl in chemconv.xml diszconv.xml geneconv.xml \
    jourmaps.xml meshconv.xml genename.txt genesyns.txt \
    jourabrv.txt jourindx.txt journame.txt joursets.txt \
    lineages.txt meshname.txt meshtree.txt taxnames.txt
  do
    if [ ! -f "$MASTER/Data/$fl" ] && [ -f "$WORKING/Extras/$fl" ]
    then
      cp "$WORKING/Extras/$fl" "$MASTER/Data/$fl"
    fi
  done
fi
seconds_end=$(date "+%s")
seconds=$((seconds_end - seconds_start))
DWN=$seconds
echo "$DWN seconds"
sleep 1

if [ "$fullIndex" = true ] || [ "$bioconcepts" = true ] || [ "$nihocc" = true ]
then
  echo "Removing Previous Indices"
  cd "$WORKING/Scratch/Current"
  target="$WORKING/Scratch/Current"
  find "$target" -name "*.xml" -delete
  find "$target" -name "*.xml.gz" -delete
  cd "$WORKING/Scratch/Indexed"
  target="$WORKING/Scratch/Indexed"
  find "$target" -name "*.e2x" -delete
  find "$target" -name "*.e2x.gz" -delete
  cd "$WORKING/Scratch/Inverted"
  target="$WORKING/Scratch/Inverted"
  find "$target" -name "*.inv" -delete
  find "$target" -name "*.inv.gz" -delete
  cd "$WORKING/Merged"
  target="$WORKING/Merged"
  find "$target" -name "*.mrg" -delete
  find "$target" -name "*.mrg.gz" -delete
  echo ""
  sleep 1
fi

if [ "$clean" = true ]
then
  seconds_start=$(date "+%s")
  echo "Deleting Incremental Indices"
  cd "$WORKING/Index"
  target="$WORKING/Index"
  find "$target" -name "*.e2x" -delete
  find "$target" -name "*.e2x.gz" -delete
  cd "$WORKING/Invert"
  target="$WORKING/Invert"
  find "$target" -name "*.inv" -delete
  find "$target" -name "*.inv.gz" -delete
  seconds_end=$(date "+%s")
  seconds=$((seconds_end - seconds_start))
  DEL=$seconds
  echo "$DEL seconds"
  echo ""
  sleep 1
fi

RemovePosts() {

  for dir in AUTH CSRT FAUT INVR JOUR LAUT \
             MESH PAIR SUBH TIAB TITL GENE \
             GRIF GSYN PREF CHEM DISZ UID
  do
    for sub in "${dir}"
    do
      rm -rf "${dir}/${sub}" &
    done
  done

  wait

  for dir in ANUM AUTH CODE CSRT FAUT INVR \
             INUM ISS JOUR LANG LAUT MESH \
             PAGE PAIR PDAT PROP PTYP RDAT \
             SIZE SUBH TIAB TITL TREE VOL \
             YEAR GENE GRIF GSYN PREF CHEM \
             DISZ CITED CITES UID
  do
    rm -rf "$dir" &
  done

  wait
}

if [ "$scrub" = true ]
then
  seconds_start=$(date "+%s")
  echo "Clearing Postings Folders"
  cd "$MASTER/Postings"
  RemovePosts
  seconds_end=$(date "+%s")
  seconds=$((seconds_end - seconds_start))
  SCB=$seconds
  echo "$SCB seconds"
  echo ""
  sleep 1
fi

ReportVersioned() {
  inp="$1"
  pmidlist=.TO-REPORT
  xtract -input "$inp" -pattern PubmedArticle \
    -block MedlineCitation/PMID -if "@Version" -gt 1 -element "PMID" < /dev/null |
  sort -n | uniq > $pmidlist
  if [ -s $pmidlist ]
  then
    cat "$pmidlist" >> "$MASTER/Archive/versioned.uid"
  fi
  rm $pmidlist
}

PMStash() {

  fl="$1"

  rm -f "versioned.xml.gz"
  rm -f "versioned.snt"

  needToReport=true
  timeout=100
  if [ "$pma2pme" = true ]
  then
    timeout=200
  fi

  base=${fl%.xml.gz}
  secnds_start=$(date "+%s")
  echo "$base.xml"

  gunzip -c "$fl" |
  transmute -strict -normalize pubmed |
  transmute -compress -strict -wrp PubmedArticleSet \
    -pattern "PubmedArticleSet/*" -format flush > "$base.xml"
  rchive -gzip -db pubmed -input "$base.xml" \
    -archive "$MASTER/Archive" "$WORKING/Index" "$WORKING/Invert" \
    -index MedlineCitation/PMID^Version -pattern PubmedArticle < /dev/null

  if [ "$pma2pme" = true ]
  then
    cat "$base.xml" | pma2pme -xml > "$base.asn"
    rchive -asn -gzip -input "$base.asn" \
      -archive "$MASTER/Archive" "$WORKING/Index" "$WORKING/Invert" \
      -index Pubmed-entry/pmid_ -pattern Pubmed-entry < /dev/null
    rm "$base.asn"
  fi

  cat "$base.xml" |
  xtract -pattern DeleteCitation -block PMID -tab "\n" -sep "." -element "PMID" |
  sort -n | uniq |
  rchive -gzip -delete "$MASTER/Archive" "$WORKING/Index" "$WORKING/Invert"

  ReportVersioned "$base.xml"

  touch "$MASTER/Archive/Sentinels/$base.snt"
  rm "$base.xml"

  secnds_end=$(date "+%s")
  secnds=$((secnds_end - secnds_start))
  if [ "$needToReport" = true ]
  then
    if [ "$secnds" -gt "$timeout" ]
    then
      echo ""
      echo "ARCHIVING IS SLOWER THAN EXPECTED."
      echo ""
      echo "PLEASE ENSURE THAT ANTIVIRUS SCANNING AND CONTENT INDEXING ARE DISABLED,"
      echo "AND THAT TRIM SUPPORT IS ENABLED FOR THE SOLID STATE DRIVE."
      echo ""
      if [ "$osname" = "Darwin" ]
      then
        master=${archive%/Archive/}
        echo "  sudo mdutil -i off ${master}"
        echo "  sudo mdutil -E ${master}"
        echo "  sudo touch ${master}/.fseventsd/no_log"
        echo ""
      fi
      needToReport=false
    fi
  fi
}

InternalStash() {

  dir="$1"

  ls -1 "/am/ftp-pubmed/${dir}" |
  grep -v ".md5" | grep "xml.gz" |
  while read fl
  do
    base=${fl%.xml.gz}
    # skip if sentinel is present
    if [ -f "$MASTER/Archive/Sentinels/$base.snt" ]
    then
      continue
    fi
    single_start=$(date "+%s")
    # copy one file at a time from ftp site
    cp "/am/ftp-pubmed/${dir}/${fl}" .
    # process local copy, creating sentinel file
    PMStash "$fl"
    # remove local copy of release file
    rm "$fl"
    single_end=$(date "+%s")
    single=$((single_end - single_start))
    # print time to process individual file
    echo "$single seconds"
  done
}

seconds_start=$(date "+%s")
echo "Populating PubMed Archive"
cd "$WORKING/Source"
if [ "$internal" = true ]
then
  InternalStash "baseline"
  InternalStash "updatefiles"
else
  for fl in *.xml.gz
  do
    base=${fl%.xml.gz}
    if [ -f "$MASTER/Archive/Sentinels/$base.snt" ]
    then
      continue
    fi
    PMStash "$fl"
  done
fi
echo "Refreshing Versioned Records"
pm-refresh "$MASTER/Archive"
seconds_end=$(date "+%s")
seconds=$((seconds_end - seconds_start))
POP=$seconds
echo "$POP seconds"
sleep 1

echo ""

echo 18810966 |
fetch-pubmed -path "$MASTER/Archive" |
xtract -pattern Author -if Affiliation -contains Medicine \
  -pfx "Archive is " -element Initials

echo ""

if [ "$e2index" = true ]
then
  seconds_start=$(date "+%s")
  echo "Incremental Indexing"
  rchive -e2incIndex "$MASTER/Archive" "$WORKING/Index" -transform "$WORKING/Extras/meshtree.txt" -e2index
  seconds_end=$(date "+%s")
  seconds=$((seconds_end - seconds_start))
  IDX=$seconds
  echo "IDX $IDX seconds"
  echo ""
  sleep 1
fi

if [ "$e2invert" = true ]
then
  seconds_start=$(date "+%s")
  echo "Incremental Inversion"
  rchive -e2incInvert "$WORKING/Index" "$WORKING/Invert"
  seconds_end=$(date "+%s")
  seconds=$((seconds_end - seconds_start))
  INV=$seconds
  echo "INV $INV seconds"
  echo ""
  sleep 1
fi

if [ "$fullIndex" = true ]
then
  seconds_start=$(date "+%s")
  echo "Merging Inverted Indices"
  cd "$WORKING/Invert"
  rchive -gzip -merge "$WORKING/Merged" *.inv.gz
  seconds_end=$(date "+%s")
  seconds=$((seconds_end - seconds_start))
  MRG=$seconds
  echo "MRG $MRG seconds"
  echo ""
  sleep 1
  if [ ! -f "$WORKING/Merged/zz.mrg.gz" ]
  then
    echo "ERROR: Merge failed to complete - missing zz.mrg.gz file"
    echo ""
    echo "EXITING DUE TO BUILD FAILURE"
    echo ""
    fullIndex=false
  fi
fi

if [ "$fullIndex" = true ]
then
  seconds_start=$(date "+%s")
  echo "Producing Postings Files"
  cd "$WORKING/Merged"
  idnt="UID"
  word="TITL TIAB PAIR"
  mesh="CODE TREE MESH SUBH"
  jour="YEAR PDAT RDAT JOUR VOL ISS PAGE LANG PROP PTYP"
  auth="ANUM AUTH FAUT LAUT CSRT INVR"
  misc="INUM SIZE"
  term=$( echo "$idnt $word $mesh $jour $auth" )
  for fl in *.mrg.gz
  do
    echo "$fl"
  done |
  sort |
  xargs -n 100 echo |
  while read files
  do
    rchive -promote "$MASTER/Postings" "$term" $files
  done
  seconds_end=$(date "+%s")
  seconds=$((seconds_end - seconds_start))
  PST=$seconds
  echo "PST $PST seconds"
  echo ""
  sleep 1
fi

if [ "$fullIndex" = true ]
then
  phrase-search -path "$MASTER/Postings" -query "mapping of spatio-temporal pollution status [TIAB] AND 2008 [YEAR]" |
  fetch-pubmed -path "$MASTER/Archive" |
  xtract -pattern Author -if Affiliation -contains Medicine \
    -pfx "Archive and Index are " -element Initials
  echo ""
fi

echo "ARCHIVE-PUBMED"

echo ""

echo "DWN $DWN seconds"
if [ "$clean" = true ]
then
  echo "DEL $DEL seconds"
fi
if [ "$scrub" = true ]
then
  echo "SCB $SCB seconds"
fi
echo "POP $POP seconds"
if [ "$e2index" = true ]
then
  echo "IDX $IDX seconds"
fi
if [ "$e2invert" = true ]
then
  echo "INV $INV seconds"
fi
if [ "$fullIndex" = true ]
then
  echo "MRG $MRG seconds"
  echo "PST $PST seconds"
fi

echo ""

IDX=0
INV=0
MRG=0
PST=0

if [ "$bioconcepts" = true ]
then
  cd "$WORKING/Scratch/Indexed"
  target="$WORKING/Scratch/Indexed"
  find "$target" -name "*.e2x" -delete
  find "$target" -name "*.e2x.gz" -delete
  cd "$WORKING/Scratch/Inverted"
  target="$WORKING/Scratch/Inverted"
  find "$target" -name "*.inv" -delete
  find "$target" -name "*.inv.gz" -delete
  cd "$WORKING/Merged"
  target="$WORKING/Merged"
  find "$target" -name "*.mrg" -delete
  find "$target" -name "*.mrg.gz" -delete
  sleep 1

  seconds_start=$(date "+%s")
  cd "$WORKING/Scratch/Indexed"
  target="$WORKING/Scratch/Indexed"
  find "$target" -name "*.e2x" -delete
  find "$target" -name "*.e2x.gz" -delete
  echo "Indexing BioConcepts"
  echo "chemical meshname biocchem disease meshname biocdisz gene genename biocgene" |
  xargs -n 3 |
  while read domain transform prefix
  do
    gunzip -c "$WORKING/Extras/${domain}2pubtatorcentral.gz" |
    rchive -bioconcepts "$WORKING/Extras/$transform.txt" |
    rchive -gzip -thesis 5000000 "$target" "$prefix"
  done
  echo "Indexing GeneRIFs"
  gunzip -c "$WORKING/Extras/generifs_basic.gz" |
  rchive -generif "$WORKING/Extras/genename.txt" "$WORKING/Extras/genesyns.txt" |
  rchive -gzip -thesis 5000000 "$target" "generifs"
  seconds_end=$(date "+%s")
  seconds=$((seconds_end - seconds_start))
  IDX=$seconds
  echo "IDX $IDX seconds"
  echo ""
  sleep 1

  seconds_start=$(date "+%s")
  cd "$WORKING/Scratch/Indexed"
  echo "Inverting Extra Indices"
  target="$WORKING/Scratch/Inverted"
  find "$target" -name "*.inv" -delete
  find "$target" -name "*.inv.gz" -delete
  for fl in *.e2x.gz
  do
    base=${fl%.e2x.gz}
    echo "$base.inv"
    gunzip -c "$fl" |
    rchive -e2invert |
    gzip -1 > "$target/$base.inv.gz"
    sleep 1
  done
  seconds_end=$(date "+%s")
  seconds=$((seconds_end - seconds_start))
  INV=$seconds
  echo "INV $INV seconds"
  echo ""
  sleep 1

  seconds_start=$(date "+%s")
  cd "$WORKING/Scratch/Inverted"
  echo "Merging Extra Indices"
  target="$WORKING/Merged"
  find "$target" -name "*.mrg" -delete
  find "$target" -name "*.mrg.gz" -delete
  osname=`uname -s | sed -e 's/_NT-.*$/_NT/; s/^MINGW[0-9]*/CYGWIN/'`
  if [ "$osname" = "CYGWIN_NT" -a -x /bin/cygpath ]
  then
    target=`cygpath -w "$target"`
  fi
  target=${target%/}
  rchive -gzip -merge "$target" *.inv.gz
  seconds_end=$(date "+%s")
  seconds=$((seconds_end - seconds_start))
  MRG=$seconds
  echo "MRG $MRG seconds"
  echo ""
  sleep 1

  seconds_start=$(date "+%s")
  cd "$WORKING/Merged"
  echo "Producing Extra Postings"
  target="$MASTER/Postings"
  osname=`uname -s | sed -e 's/_NT-.*$/_NT/; s/^MINGW[0-9]*/CYGWIN/'`
  if [ "$osname" = "CYGWIN_NT" -a -x /bin/cygpath ]
  then
    target=`cygpath -w "$target"`
  fi
  target=${target%/}
  for fl in *.mrg.gz
  do
    echo "$fl"
  done |
  sort |
  xargs -n 100 echo |
  while read files
  do
    rchive -promote "$target" "CHEM DISZ GENE GRIF GSYN PREF" $files
  done
  seconds_end=$(date "+%s")
  seconds=$((seconds_end - seconds_start))
  PST=$seconds
  echo "PST $PST seconds"
  echo ""
  sleep 1

  echo "INDEX-EXTRAS"

  echo ""

  echo "IDX $IDX seconds"
  echo "INV $INV seconds"
  echo "MRG $MRG seconds"
  echo "PST $PST seconds"

  echo ""
fi

if [ "$nihocc" = true ]
then
  cd "$WORKING/Extras"
  if [ ! -f "open_citation_collection.zip" ]
  then
    echo "Downloading NIH Open Citation Collection"
    download-ncbi-data nihocc
    if [ ! -f "open_citation_collection.zip" ]
    then
      echo "ERROR: Unable to download open_citation_collection.zip file to Extras directory"
      echo ""
      echo "EXITING DUE TO MISSING NCBI OCC DATA FILE"
      echo ""
      nihocc=false
    fi
  fi
fi

if [ "$nihocc" = true ]
then
  cd "$WORKING/Scratch/Indexed"
  target="$WORKING/Scratch/Indexed"
  find "$target" -name "*.e2x" -delete
  find "$target" -name "*.e2x.gz" -delete
  cd "$WORKING/Scratch/Inverted"
  target="$WORKING/Scratch/Inverted"
  find "$target" -name "*.inv" -delete
  find "$target" -name "*.inv.gz" -delete
  cd "$WORKING/Merged"
  target="$WORKING/Merged"
  find "$target" -name "*.mrg" -delete
  find "$target" -name "*.mrg.gz" -delete
  sleep 1

  seconds_start=$(date "+%s")
  cd "$WORKING/Scratch/Indexed"
  target="$WORKING/Scratch/Indexed"
  find "$target" -name "*.e2x" -delete
  find "$target" -name "*.e2x.gz" -delete
  echo "Indexing NIH Open Citation Collection"
  unzip -cq "$WORKING/Extras/open_citation_collection.zip" |
  rchive -nihocc |
  rchive -gzip -thesis 20000000 "$target" "nihocc"
  seconds_end=$(date "+%s")
  seconds=$((seconds_end - seconds_start))
  IDX=$seconds
  echo "IDX $IDX seconds"
  echo ""
  sleep 1

  seconds_start=$(date "+%s")
  cd "$WORKING/Scratch/Indexed"
  echo "Inverting Extra Indices"
  target="$WORKING/Scratch/Inverted"
  find "$target" -name "*.inv" -delete
  find "$target" -name "*.inv.gz" -delete
  for fl in *.e2x.gz
  do
    base=${fl%.e2x.gz}
    echo "$base.inv"
    gunzip -c "$fl" |
    rchive -e2invert |
    gzip -1 > "$target/$base.inv.gz"
    sleep 1
  done
  seconds_end=$(date "+%s")
  seconds=$((seconds_end - seconds_start))
  INV=$seconds
  echo "INV $INV seconds"
  echo ""
  sleep 1

  seconds_start=$(date "+%s")
  cd "$WORKING/Scratch/Inverted"
  echo "Merging Extra Indices"
  target="$WORKING/Merged"
  find "$target" -name "*.mrg" -delete
  find "$target" -name "*.mrg.gz" -delete
  osname=`uname -s | sed -e 's/_NT-.*$/_NT/; s/^MINGW[0-9]*/CYGWIN/'`
  if [ "$osname" = "CYGWIN_NT" -a -x /bin/cygpath ]
  then
    target=`cygpath -w "$target"`
  fi
  target=${target%/}
  rchive -gzip -mergelink "$target" *.inv.gz
  seconds_end=$(date "+%s")
  seconds=$((seconds_end - seconds_start))
  MRG=$seconds
  echo "MRG $MRG seconds"
  echo ""
  sleep 1

  seconds_start=$(date "+%s")
  cd "$WORKING/Merged"
  echo "Producing Extra Postings"
  target="$MASTER/Postings"
  osname=`uname -s | sed -e 's/_NT-.*$/_NT/; s/^MINGW[0-9]*/CYGWIN/'`
  if [ "$osname" = "CYGWIN_NT" -a -x /bin/cygpath ]
  then
    target=`cygpath -w "$target"`
  fi
  target=${target%/}
  for fl in *.mrg.gz
  do
    echo "$fl"
  done |
  sort |
  xargs -n 100 echo |
  while read files
  do
    rchive -promotelink "$target" "CITED CITES" $files
  done
  seconds_end=$(date "+%s")
  seconds=$((seconds_end - seconds_start))
  PST=$seconds
  echo "PST $PST seconds"
  echo ""
  sleep 1

  echo "INDEX-EXTRAS"

  echo ""

  echo "IDX $IDX seconds"
  echo "INV $INV seconds"
  echo "MRG $MRG seconds"
  echo "PST $PST seconds"

  echo ""
fi

total_end=$(date "+%s")
total=$((total_end - total_start))
TOT=$total
echo "TOT $TOT seconds"
echo ""

date

if [ -n "$CONFIG" ]
then
  target=bash_profile
  if ! grep "$target" "$HOME/.bashrc" >/dev/null 2>&1
  then
    if [ ! -f $HOME/.$target ] || grep 'bashrc' "$HOME/.$target" >/dev/null 2>&1
    then
      target=bashrc
    fi
  fi
  echo ""
  echo "For convenience, please execute the following to save the archive path to a variable:"
  echo ""
  echo "  echo \"export EDIRECT_PUBMED_MASTER='${CONFIG}'\" >>" "\$HOME/.$target"
  echo ""
fi
