/***************************************************************************
                          analyze.h  -  description
                             -------------------
    begin                : Sun Jan 7 2001
    copyright            : (C) 2001 by Jan Mueller
    email                : janmueller7@hotmail.com
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/
/***************************************************************************
                          analyze.h  -  description                              
                             -------------------                                         
    begin                : Thu Jun 15 2000                                           
    copyright            : (C) 2000 by Jan Mueller                         
    email                : janmueller7@hotmail.com
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   * 
 *                                                                         *
 ***************************************************************************/


#ifndef ANALYZE_H
#define ANALYZE_H

// #include <sql.h>
#include <qlist.h>
#include <qdict.h>
#include <qstring.h>
#include <qregexp.h>
#include "kaspasql.h"

#define TYPEAUTHOR 0
#define TYPEPUBL   1
#define TYPEPART   2
#define TYPENOTE   3
#define TYPEFILE   4

struct oidsrec {
	Oid obj;
	Oid table;
	int type;
};

struct dictrec {
	Oid w;
	Oid o;
};



/**
  *@author Jan Mueller
  */

class Analyze : protected Sql  {
	Analyze &operator=(const Analyze&);
	Analyze(const Analyze&);
	bool rebuild;
	int max;
	volatile bool term;

	QList<QRegExp> masks;
  QList<oidsrec> oids;
  QDict<dictrec> tokens;

	void readOIDs();
	void readDict();
	void getOIDs(int type, QString rel, QString field, QList<oidsrec> *l);
	void getFileOIDs(int type, QList<oidsrec> *l);
	void analyzeObj(uint i);
	char *lo2buf(Oid i, long *len);
	void deleteIndex();
	void createIndexes();
	void skipFrequentWords();
	bool fileMatches(const char *s);
	
public: 
	Analyze();
	~Analyze();
	void work();
	/** Delete all words which are referencing more than l% of the objects. */
	void setLimit(int l) { max=l; }
	/** Rebuild the index */
	void setRebuild(bool r) { rebuild=r; }
	/** Set the masks of the files to be analyzed, e.g. '*.txt' and '*.html' */
	void addSuffix(const char *s);
	/** Cancels parsing */
	void cancel() { term=true; }
};

#endif






