#!/bin/sh

# Public domain notice for all NCBI EDirect scripts is located at:
# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice

# Converts PubmedArticle XML to Pubmed-entry text ASN.1

# efetch -db pubmed -id 2539356,1937004 -format xml | pma2pme -std

# can produce XML intermediate for archiving
xml=false

# default output uses original ml authors
AUTHTYPE="AUTHML"

# check for author type and XML output flags
while [ $# -gt 0 ]
do
  case "$1" in
    std | STD | -std | -STD )
      # std has separate last name and initials fields
      AUTHTYPE="AUTHSTD"
      shift
      ;;
    ml | ML | -ml | -ML )
      # ml uses traditional Medline author format (default)
      shift
      AUTHTYPE="AUTHML"
      ;;
    xml | -xml )
      # do not convert to ASN.1
      shift
      xml=true
      ;;
    asn | -asn )
      # convert to ASN.1 (default)
      shift
      xml=false
      ;;
    * )
      exec >&2
      echo "$0: Unrecognized argument $1"
      exit 1
      ;;
  esac
done

# conversion function
ConvertXMLtoASN() {

  transmute -mixed -normalize pubmed |
  xtract -strict -rec PM -pattern PubmedArticle -PS "()" \
    -wrp PMID -element MedlineCitation/PMID \
    -group PubmedData -PS PublicationStatus \
    -group PubmedData/History/PubMedPubDate \
      -if "@PubStatus" -equals pubmed -pkg EMDATE/em_std \
        -wrp year_ -element Year \
        -wrp month_ -element Month \
        -wrp day_ -element Day \
    -group MedlineCitation -pkg TITL \
      -wrp name -prose ArticleTitle \
    -group AuthorList -pkg AUTHSTD/authors/names_std \
      -branch Author \
        -block Author -pkg "_" \
          -subset Author -pkg name_name \
            -wrp last -author LastName \
            -wrp initials -author Initials \
          -subset Affiliation \
            -wrp affil_str -prose Affiliation \
    -group AuthorList -pkg AUTHML/authors/names_std \
      -branch Author \
        -block Author -pkg "_" \
          -subset Author \
            -wrp name_ml -sep " " -author LastName,Initials \
          -subset Affiliation \
            -wrp affil_str -prose Affiliation \
    -group Article -pkg JOUR \
      -block Journal -pkg JTAS/title \
        -wrp iso-jta -element ISOAbbreviation \
        -wrp ml-jta -element ISOAbbreviation \
        -wrp issn -element ISSN \
        -wrp name -element Title \
      -branch Article -pkg IMP/imp \
        -block PubDate -pkg date_std \
          -wrp year_ -year "PubDate/*" \
          -wrp month_ -month "PubDate/*" \
          -wrp day_ -element PubDate/Day \
        -block JournalIssue \
          -wrp volume -element Volume \
          -wrp issue -element Issue \
        -block Article \
          -wrp pages -element MedlinePgn \
          -wrp language -element Language \
          -wrp pubstatus_ -element "&PS" \
    -group PubmedData/ArticleIdList -pkg IDS/ids \
      -block ArticleId -if "@IdType" -equals pubmed \
        -wrp pubmed_ -element ArticleId \
      -block ArticleId -if "@IdType" -equals pmc \
        -pfx "<other><db>pmc</db><tag_str>" \
        -sfx "</tag_str></other>" -element ArticleId \
      -block ArticleId -if "@IdType" -equals doi \
        -wrp doi -element ArticleId \
    -group MedlineCitation -pkg ABST/abstract \
      -block Abstract/AbstractText \
        -wrp " " -pfx "" -sfx "" -sep ": " -tab " " \
          -prose "AbstractText@Label,AbstractText" |
  xtract -rec Pubmed-entry -pattern PM \
    -PS PS/pubstatus_ \
    -wrp pmid_ -element PMID \
    -division PM -pkg medent \
      -group EMDATE/em_std -element "*" \
      -group PM -pkg cit \
        -branch TITL/name -pkg title -element "*" \
        -branch ${AUTHTYPE}/authors -element "*" \
        -branch JOUR -pkg from_journal \
          -block JTAS/title -element "*" \
          -block IMP/imp -element "*" \
        -branch IDS/ids -element "*" \
      -group ABST/abstract -element "*"
}

if [ "$xml" = true ]
then
  ConvertXMLtoASN
else
  ConvertXMLtoASN | xtract -pattern Pubmed-entry -element "."
fi
