/***************************************************************************
 *   Copyright (C) 2004-2025 by Pere Constans
 *   constans@molspaces.com
 *   cb2Bib version 2.0.3. Licensed under the GNU GPL version 3.
 *   See the LICENSE file that comes with this distribution.
 ***************************************************************************/
#include "pubmedXml.h"

#include "cb2bib_utilities.h"


pubmedXml::pubmedXml(const QString& xml) : QXmlStreamReader()
{
    _reference.clearReference();
    addData(xml);
    readReference();
}


void pubmedXml::readReference()
{
    if (hasError())
        return;

    while (reading("PubmedArticle"))
        if (name().toLatin1() == "Journal")
            readJournal();
        else if (name().toLatin1() == "AuthorList")
            readAuthors();
        else if (name().toLatin1() == "MeshHeadingList")
            readKeywords();
        else if (name().toLatin1() == "ArticleTitle")
            readField("title");
        else if (name().toLatin1() == "AbstractText")
            readField("abstract");
        else if (name().toLatin1() == "MedlinePgn")
        {
            readNext();
            _reference["pages"] = c2bUtils::removed(text().toString(), QRegExp("\\,[\\s\\d]+")); // Cases "11-5, 25"
        }
        else if (name().toLatin1() == "PMID")
        {
            readNext();
            _reference["url"] = c2bUtils::pubmedUrl.arg(text().toString());
        }
        else if (name().toLatin1() == "ArticleId")
        {
            if (attributes().count() > 0)
                if (attributes().at(0).value().toLatin1() == "doi")
                    readField("doi");
        }
}

void pubmedXml::readField(const QString& field)
{
    _reference[field] = readElementText(QXmlStreamReader::IncludeChildElements);
}

void pubmedXml::readAuthors()
{
    QStringList authors;
    while (reading("AuthorList"))
    {
        if (name().toLatin1() == "Author" && attributes().value("ValidYN").toLatin1() != "N")
        {
            QString a, i;
            bool abbreviated(true);
            while (reading("Author"))
                if (name().toLatin1() == "LastName")
                {
                    readNext();
                    a = a + ' ' + text().toString();
                }
                else if (name().toLatin1() == "ForeName" || name().toLatin1() == "FirstName")
                {
                    readNext();
                    a = text().toString() + ' ' + a;
                    abbreviated = false;
                }
                else if (name().toLatin1() == "Suffix")
                {
                    readNext();
                    if (text().toLatin1() == "2nd")
                        a = a + " II";
                    else if (text().toLatin1() == "3rd")
                        a = a + " III";
                    else
                        a = a + ' ' + text().toString();
                }
                else if (name().toLatin1() == "Initials")
                {
                    readNext();
                    i = text().toString();
                }
            if (abbreviated)
                authors.append(i + ' ' + a);
            else
                authors.append(a);
        }
    }
    _reference["author"] = authors.join(", ");
}

void pubmedXml::readJournal()
{
    QString ja, jf, mldate;
    while (reading("Journal"))
        if (name().toLatin1() == "ISOAbbreviation")
        {
            readNext();
            ja = text().toString();
        }
        else if (name().toLatin1() == "Title")
        {
            readNext();
            jf = text().toString();
        }
        else if (name().toLatin1() == "Volume")
            readField("volume");
        else if (name().toLatin1() == "Issue")
            readField("number");
        else if (name().toLatin1() == "Year")
            readField("year");
        else if (name().toLatin1() == "MedlineDate")
        {
            readNext();
            mldate = text().toString();
        }
    if (jf.isEmpty())
        _reference["journal"] = ja;
    else
        _reference["journal"] = jf;
    if (_reference.value("year").isEmpty())
    {
        c2bUtils::remove(mldate, QRegExp("[^\\d\\s-]"));
        c2bUtils::remove(mldate, QRegExp("\\b\\d{1,3}\\b"));
        _reference["year"] = mldate;
    }
}

void pubmedXml::readKeywords()
{
    QStringList k;
    while (reading("MeshHeadingList"))
        if (name().toLatin1() == "DescriptorName")
        {
            readNext();
            k.append(text().toString());
        }
    _reference["keywords"] = k.join(", ");
}

bool pubmedXml::reading(const QString& tag)
{
    while (!atEnd())
    {
        TokenType t = readNext();
        if (t == StartElement)
            return true;
        else if (t == EndElement)
            if (name() == tag)
                return false;
    }
    return false;
}
