/*
    Copyright (C) 2003-2006 Teus Benschop.

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

*/


#include "libraries.h"
#include "utilities.h"
#include "constants.h"


int main (int argc, char *argv[])
{
  // Program information
  if (argc == 1) {
    cout << "sc-count-usfm reads all the USFM files given as arguments on the commandline." << endl;
    cout << "It counts the USFMs and dumps the results to stdout." << endl;
    cout << "Normally it shows the results in the order it encounters the USFMs in the files." << endl;
    cout << "Sorting is possible too." << endl;
    cout << "--sort-on-usfm    Sorts the USFMs alphabetically." << endl;
    cout << "--sort-on-count   Sorts on USFM count." << endl;
    return 0;
  }
  
  // Get all input files to read.
  // Note that usually a check reads one file only, but as we are counting 
  // USFMs here, we cannot do anything else but read all the files.
  vector<ustring> input_files;
  
  // Process command line arguments.
  bool sort_on_usfm = false;
  bool sort_on_count = false;
  for (int i = 1; i < argc; i++) {
    ustring argument;
    argument = argv[i];
    if (argument == "--sort-on-usfm") {
      sort_on_usfm = true;
    } else if (argument == "--sort-on-count") {
      sort_on_count = true;
    } else {
      input_files.push_back (argument);
    }
  }
  
  // Storage for the markers and their count.
  vector<string> markers;
  vector<int> count;
  
  // Read the USFM files, and check them.
  for (unsigned int i = 0; i < input_files.size(); i++) {
    // Read filenames and go through them all.
    ReadText rt (input_files[i], false, false);
    for (unsigned int i2 = 0; i2 < rt.lines.size(); i2++) {
      // Extract the markers, and deal with them.
      string marker = usfm_extract_marker_within_line (rt.lines[i2]);
      // Discard lines without a marker and get more markers per line.
      while (!marker.empty()) {
        // Look whether this particular USFM is already in the list.
        int found_position = -1;
        for (unsigned int i3 = 0; i3 < markers.size(); i3++) {
          if (marker == markers[i3]) {
            found_position = i3;
            break;
          }
        }
        if (found_position >= 0) {
          // This USfM has been encountered before: increase its count.
          ++count[found_position];
        } else {
          // This is a new USFM: add it and set its count to one.
          markers.push_back (marker);
          count.push_back (1);
        }
        marker = usfm_extract_marker_within_line (rt.lines[i2]);
      }
    }
  }

  // Sort, if need be.
  if (sort_on_usfm ) {
    vector<string> sort_me;
    for (unsigned int i = 0; i < markers.size(); i++) {
      string combination;
      combination = markers[i] + " " + convert_to_string (count[i]);
      sort_me.push_back (combination);
    }
    sort (sort_me.begin(), sort_me.end());
    markers.clear();
    count.clear();
    for (unsigned int i = 0; i < sort_me.size(); i++) {
      Parse parse (sort_me[i]);
      markers.push_back (parse.words[0]);
      count.push_back (convert_to_int (parse.words[1]));
    }
  }
  if (sort_on_count) {
    vector<string> sort_me;
    for (unsigned int i = 0; i < markers.size(); i++) {
      string combination;
      count[i] += 100000;
      combination = convert_to_string (count[i]) + " " + markers[i];
      sort_me.push_back (combination);
    }
    sort (sort_me.begin(), sort_me.end());
    markers.clear();
    count.clear();
    for (unsigned int i = 0; i < sort_me.size(); i++) {
      Parse parse (sort_me[i]);
      markers.push_back (parse.words[1]);
      count.push_back (convert_to_int (parse.words[0]) - 100000);
    }
  }
  
  // Output the results.
  for (unsigned int i = 0; i < markers.size(); i++) {
    cout << xml_tag (0, MESSAGE_TAG, false) << endl;
    cout << xml_text_embed_in_tags (1, USFM_TAG, markers[i]) << endl;
    cout << xml_text_embed_in_tags (1, COUNT_TAG, convert_to_string (count[i])) << endl;
    cout << xml_tag (0, MESSAGE_TAG, true) << endl;
  }
  
  return 0;
}
