#!/exp/rcf/share/bin/gawk -f

BEGIN{
  outputdir = "temp/";
  relfile = "new_topics.res";
  if ( system( "test -r " relfile )) {
    print "Error: " relfile " not found... Quiting!";
    exitflag = 1;
    exit;
  }
  print "Reading relevance file!";
  while ( getline < relfile ){
    docidcount[$3]++;
    docid2topics[$3,docidcount[$3]] = $1;
    docid2ranks[$3,docidcount[$3]] = $4;
  }
  print "Looking for documents...";
}

/<DOCNO>/{
  gsub(/<DOCNO>|<\/DOCNO>| /,"");
  currentDocID = $0;
  if ( currentDocID in docidcount){
    output = 1;
    for (i = 1; i <= docidcount[currentDocID]; i++){
      printf "%d",i;
      outputfile[i] = outputdir substr(currentDocID,1,2) "_" docid2topics[currentDocID,i] "_" docid2ranks[currentDocID,i] ".dat";
      docidcountV[currentDocID]++;
      printf "<DOC>\n<DOCNO> %s </DOCNO>\n",currentDocID >> outputfile[i];    
    }
  }
  next;
}       

/<DOC>/{
  if (exitflag == 1) {
    exit;
  }
  if (output == 1){
    for (i = 1; i <= docidcount[currentDocID]; i++)
      close (outputfile[i]);
  }
  currentDocID = "unknown";
  outputfile[1] = outputdir "none";
  output = 0;
  next;
}  

{
  if (output == 1){
    for (i = 1; i <= docidcount[currentDocID]; i++)
      print $0 >> outputfile[i];
  }
}


END{
  if (exitflag == 1) {
    exit;
  }  
  print " Done!";
  print "Creating warnings file...";
  outputfile[1] = outputdir "warning.txt";
  print "Warnings:  Files not found" >> outputfile[1];
  print "DOCID" >> outputfile[1];
  for ( docid in docidcount ){
    if(docidcount[docid] != docidcountV[docid])
      print docid >> outputfile[1];
  }
  close (outputfile[1]);
  print "Script ran successfully, check for warnings, Good bye!";
}


