System documentation of the GNU Image-Finding Tool

CAcSQLInvertedFile.h
1 /* -*- mode: c++ -*-
2 */
3 /*
4 
5  GIFT, a flexible content based image retrieval system.
6  Copyright (C) 1998, 1999, 2000, 2001, 2002, CUI University of Geneva
7 
8  Copyright (C) 2003, 2004 Bayreuth University
9  2005 Bamberg University
10  This program is free software; you can redistribute it and/or modify
11  it under the terms of the GNU General Public License as published by
12  the Free Software Foundation; either version 2 of the License, or
13  (at your option) any later version.
14 
15  This program is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  GNU General Public License for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with this program; if not, write to the Free Software
22  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 
24 */
25 // -*- mode: c++ -*-
26 
27 
28 class CXMLElement;
29 
49 #ifndef _CINVERTEDFILEACCESSOR
50 #define _CINVERTEDFILEACCESSOR
51 #include "libGIFTAcInvertedFile/include/uses-declarations.h"
52 #include <string>
53 #include "libMRML/include/TID.h"
54 #include "libMRML/include/CSelfDestroyPointer.h"
55 #include "libMRML/include/CArraySelfDestroyPointer.h"
56 #include "libGIFTAcInvertedFile/include/CDocumentFrequencyList.h"
57 #include "CCollectionFrequencyList.h"
58 #include "libGIFTAcInvertedFile/include/CADIHash.h"
59 #include "libGIFTAcURL2FTS/include/CAcURL2FTS.h"
60 #include <iostream>
61 #include <fstream>
62 #include <map>
63 #include <vector>
64 #ifdef HAS_HASH_MAP
65 #include <hash_map>
66 #else
67 #define hash_map map
68 #endif
69 #include <functional>
70 #include <algorithm>
71 
72 #include "libMRML/include/CMagic.h"
73 
74 
75 typedef TID TFeatureID ;
76 
83 class CAcInvertedFile:public CAcURL2FTS{
84 
85 protected:
93 
95  mutable ifstream mOffsetFile;
96 
99 
102 
105 
108 
110  typedef hash_map<TID,unsigned int> CIDToOffset;//new hash
112  CIDToOffset mIDToOffset;
113 
115  mutable hash_map<TID,double> mFeatureToCollectionFrequency;//new hash
116 
120  hash_map<TID,unsigned int> mFeatureDescription;//new hash_
121 
127 
130  void writeOffsetFileElement(TID inFeatureID,
131  int inPosition,
132  ostream& inOpenOffsetFile);
134  CDocumentFrequencyList* getFeatureFile(string inFileName)const;
135 public:
137  bool operator()()const;
138 
153  CAcInvertedFile(const CXMLElement& inCollectionElement);
155  bool init(bool);
156 
159 
161  string IDToURL(TID inID)const;
162 
164  TID URLToID(const string& inURL)const;
165 
169  CDocumentFrequencyList* FeatureToList(TFeatureID)const;
170 
172  CDocumentFrequencyList* URLToFeatureList(string inURL)const;
173 
175  CDocumentFrequencyList* DIDToFeatureList(TID inDID)const;
176 
178 
179 
183  double FeatureToCollectionFrequency(TFeatureID)const;
184 
186  unsigned int getFeatureDescription(TID inFeatureID)const;
188 
192  double DIDToMaxDocumentFrequency(TID)const;
193 
195  double DIDToDFSquareSum(TID)const;
196 
198  double DIDToSquareDFLogICFSum(TID)const;
200 
201  /*@name Inverted File Generation and Consistency Checking*/
203 
211  bool generateInvertedFile();
212 
221 
224  bool checkConsistency();
225 
229  bool findWithinStream(TID inFeatureID,
230  TID inDocumentID,
231  double inDocumentFrequency)const;
232 
234 
236  TID getMaximumFeatureID()const;
244  list<TID>* getAllFeatureIDs()const;
245 };
246 
247 #endif
virtual double DIDToMaxDocumentFrequency(TID) const =0
returns the maximum document frequency for one document ID
virtual pair< bool, TID > URLToID(const string &inURL) const =0
Translate an URL to its document ID.
CIDToOffset mIDToOffset
map from feature id to the offset for this feature
Definition: CAcSQLInvertedFile.h:112
bool init(bool)
called by constructors
This class captures the structure of an XML element.
Definition: CXMLElement.h:51
A list of Document Frequency Elements (the main part of an inverted file)
Definition: CDocumentFrequencyList.h:58
string mFeatureDescriptionFileName
Name for the file with the feature description.
Definition: CAcSQLInvertedFile.h:107
virtual list< TID > * getAllFeatureIDs() const =0
Getting a list of all features contained in this.
virtual bool checkConsistency()=0
Check the consistency of the inverted file system accessed by this accessor.
virtual double DIDToSquareDFLogICFSum(TID) const =0
Returns this function for a given document ID.
virtual unsigned int getFeatureDescription(TID inFeatureID) const =0
What kind of feature is the feature with ID inFeatureID?
~CAcInvertedFile()
Destructor.
bool findWithinStream(TID inFeatureID, TID inDocumentID, double inDocumentFrequency) const
Is the Document with inDocumentID contained in the document frequency list of the feature inFeatureID...
TID mMaximumFeatureID
the maximum feature ID arising in this file
Definition: CAcSQLInvertedFile.h:87
CADIHash.
Definition: CADIHash.h:53
An accessor to an inverted file.
Definition: CAcInvertedFile.h:83
CDocumentFrequencyList * getFeatureFile(string inFileName) const
loads a *.fts file.
virtual double DIDToDFSquareSum(TID) const =0
Returns the document-frequency square sum for a given document ID.
CAcInvertedFile(const CXMLElement &inCollectionElement)
This opens an exsisting inverted file, and then inits this structure.
virtual double FeatureToCollectionFrequency(TFeatureID) const =0
Collection frequency for a given feature.
virtual CDocumentFrequencyList * FeatureToList(TFeatureID inFID) const =0
Give the List of documents containing the feature inFID.
virtual bool operator()() const =0
for testing if the inverted file is correctly constructed
virtual bool generateInvertedFile()=0
Generating an inverted File, if there is none.
hash_map< TID, double > mFeatureToCollectionFrequency
map from feature to the collection frequency
Definition: CAcSQLInvertedFile.h:115
CSelfDestroyPointer< istream > mInvertedFile
The inverted file.
Definition: CAcSQLInvertedFile.h:92
hash_map< TID, unsigned int > CIDToOffset
map from feature id to the offset for this feature
Definition: CAcSQLInvertedFile.h:110
ifstream mOffsetFile
Feature -> Offset in inverted file.
Definition: CAcSQLInvertedFile.h:95
TID getMaximumFeatureID() const
This is interesting for browsing.
hash_map< TID, unsigned int > mFeatureDescription
map from the feature ID to the feature description
Definition: CAcSQLInvertedFile.h:120
CADIHash mDocumentInformation
additional information about the document like, e.g.
Definition: CAcSQLInvertedFile.h:125
virtual CDocumentFrequencyList * DIDToFeatureList(TID inDID) const =0
List of features contained by a document with ID inDID.
string mInvertedFileName
Name of the inverted file.
Definition: CAcSQLInvertedFile.h:101
ifstream mFeatureDescriptionFile
File of feature descriptions.
Definition: CAcSQLInvertedFile.h:98
CArraySelfDestroyPointer< char > mInvertedFileBuffer
A buffer, if the inverted file is to be held in ram.
Definition: CAcSQLInvertedFile.h:90
This accessor is a base class for accessors which use an URL2FTS file to implement the interface of t...
Definition: CAcURL2FTS.h:66
virtual CDocumentFrequencyList * URLToFeatureList(string inURL) const =0
List of features contained by a document with URL inURL.
void writeOffsetFileElement(TID inFeatureID, int inPosition, ostream &inOpenOffsetFile)
add a pair of FeatureID,Offset to the open offset file (helper function for inverted file constructio...
string mOffsetFileName
Name of the Offset file.
Definition: CAcSQLInvertedFile.h:104
virtual string IDToURL(TID inID) const =0
<HIER-WIRDS-INTERESSANT->
bool newGenerateInvertedFile()
Generating an inverted File, if there is none.

Need for discussion? Want to contribute? Contact
help-gift@gnu.org Generated using Doxygen