kdecore Library API Documentation

ksycocadict.cpp

00001 /*  This file is part of the KDE libraries
00002  *  Copyright (C) 1999 Waldo Bastian <bastian@kde.org>
00003  *
00004  *  This library is free software; you can redistribute it and/or
00005  *  modify it under the terms of the GNU Library General Public
00006  *  License version 2 as published by the Free Software Foundation;
00007  *
00008  *  This library is distributed in the hope that it will be useful,
00009  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00010  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00011  *  Library General Public License for more details.
00012  *
00013  *  You should have received a copy of the GNU Library General Public License
00014  *  along with this library; see the file COPYING.LIB.  If not, write to
00015  *  the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
00016  *  Boston, MA 02111-1307, USA.
00017  **/
00018 
00019 #include "ksycocadict.h"
00020 #include "ksycocaentry.h"
00021 #include "ksycoca.h"
00022 
00023 #include <qptrlist.h>
00024 #include <qvaluelist.h>
00025 #include <kdebug.h>
00026 #include <stdlib.h>
00027 
00028 struct string_entry {
00029   string_entry(QString _key, KSycocaEntry *_payload) 
00030   { keyStr = _key; key = keyStr.unicode(); length = keyStr.length(); payload = _payload; hash = 0; }
00031   uint hash;
00032   int length;
00033   const QChar *key;
00034   QString keyStr;
00035   KSycocaEntry *payload;
00036 };
00037 
00038 template class QPtrList<string_entry>;
00039 
00040 class KSycocaDictStringList : public QPtrList<string_entry>
00041 {
00042 public:
00043    KSycocaDictStringList();
00044 };
00045 
00046 KSycocaDictStringList::KSycocaDictStringList()
00047 {
00048    setAutoDelete(true);
00049 }
00050 
00051 KSycocaDict::KSycocaDict()
00052   : d(0), mStr(0), mOffset(0)
00053 {
00054 }
00055 
00056 KSycocaDict::KSycocaDict(QDataStream *str, int offset)
00057   : d(0), mStr(str), mOffset(offset)
00058 {
00059    Q_UINT32 test1, test2;
00060    str->device()->at(offset);
00061    (*str) >> test1 >> test2;
00062    if ((test1 > 0x000fffff) || (test2 > 1024))
00063    {
00064        KSycoca::flagError();
00065        mHashTableSize = 0;
00066        mOffset = 0;
00067        return;
00068    }
00069 
00070    str->device()->at(offset);
00071    (*str) >> mHashTableSize;
00072    (*str) >> mHashList;
00073    mOffset = str->device()->at(); // Start of hashtable
00074 }
00075 
00076 KSycocaDict::~KSycocaDict()
00077 {
00078    delete d;
00079 }
00080 
00081 void 
00082 KSycocaDict::add(const QString &key, KSycocaEntry *payload)
00083 {
00084    if (key.isEmpty()) return; // Not allowed (should never happen)
00085    if (!payload) return; // Not allowed!
00086    if (!d)
00087    {
00088        d = new KSycocaDictStringList();
00089    }
00090 
00091    string_entry *entry= new string_entry(key, payload);
00092    d->append(entry);
00093 }
00094    
00095 int 
00096 KSycocaDict::find_string(const QString &key )
00097 {
00098    //kdDebug(7011) << QString("KSycocaDict::find_string(%1)").arg(key) << endl;
00099 
00100    if (!mStr || !mOffset)
00101    {
00102       kdError(7011) << "No database available!" << endl;
00103       return 0;
00104    }
00105 
00106    if (mHashTableSize == 0)
00107       return 0; // Unlikely to find anything :-]
00108 
00109    // Read hash-table data 
00110    uint hash = hashKey(key) % mHashTableSize;
00111    //kdDebug(7011) << QString("hash is %1").arg(hash) << endl;
00112 
00113    uint off = mOffset+sizeof(Q_INT32)*hash;
00114    //kdDebug(7011) << QString("off is %1").arg(off,8,16) << endl;
00115    mStr->device()->at( off );
00116 
00117    Q_INT32 offset;
00118    (*mStr) >> offset;
00119 
00120    //kdDebug(7011) << QString("offset is %1").arg(offset,8,16) << endl;
00121    if (offset == 0)
00122       return 0;
00123 
00124    if (offset > 0)
00125       return offset; // Positive ID
00126 
00127    // Lookup duplicate list.
00128    offset = -offset;
00129 
00130    mStr->device()->at(offset);
00131    //kdDebug(7011) << QString("Looking up duplicate list at %1").arg(offset,8,16) << endl;
00132    
00133    while(true)
00134    {
00135        (*mStr) >> offset;
00136        if (offset == 0) break;
00137        QString dupkey;
00138        (*mStr) >> dupkey;
00139        //kdDebug(7011) << QString(">> %1 %2").arg(offset,8,16).arg(dupkey) << endl;
00140        if (dupkey == key) return offset;
00141    }
00142    //kdWarning(7011) << "Not found!" << endl;
00143 
00144    return 0;
00145 }
00146    
00147 uint 
00148 KSycocaDict::count()
00149 {
00150    if (!d) return 0;
00151 
00152    return d->count();
00153 }
00154 
00155 void 
00156 KSycocaDict::clear()
00157 {
00158    delete d;
00159    d = 0;
00160 }
00161 
00162 uint
00163 KSycocaDict::hashKey( const QString &key)
00164 {
00165    int l = key.length();
00166    register uint h = 0;
00167   
00168    for(uint i = 0; i < mHashList.count(); i++)
00169    {
00170       int pos = mHashList[i];
00171       if (pos < 0)
00172       {
00173          pos = -pos-1;
00174          if (pos < l)
00175             h = ((h * 13) + (key[l-pos].cell() % 29)) & 0x3ffffff;
00176       } 
00177       else
00178       {
00179          pos = pos-1;
00180          if (pos < l)
00181             h = ((h * 13) + (key[pos].cell() % 29)) & 0x3ffffff;
00182       }
00183    }
00184    return h;
00185 }
00186 
00187 //
00188 // Calculate the diversity of the strings at position 'pos'
00189 int 
00190 calcDiversity(KSycocaDictStringList *d, int pos, int sz)
00191 {
00192    if (pos == 0) return 0;
00193    bool *matrix = (bool *) calloc(sz, sizeof(bool));
00194    uint usz = sz;
00195 
00196    if (pos < 0)
00197    {
00198       pos = -pos-1;
00199       for(string_entry *entry=d->first(); entry; entry = d->next())
00200       {
00201     register int l = entry->length;
00202          if (pos < l && pos != 0)
00203          {
00204            register uint hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3ffffff;
00205        matrix[ hash % usz ] = true;
00206          }
00207       }
00208    }
00209    else
00210    {
00211       pos = pos-1;
00212       for(string_entry *entry=d->first(); entry; entry = d->next())
00213       {
00214          if (pos < entry->length)
00215          {
00216             register uint hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3ffffff;
00217             matrix[ hash % usz ] = true;
00218          }
00219       }
00220    }
00221    int diversity = 0;
00222    for(int i=0;i< sz;i++)
00223       if (matrix[i]) diversity++;
00224    
00225    free((void *) matrix);
00226 
00227    return diversity;
00228 }
00229 
00230 //
00231 // Add the diversity of the strings at position 'pos'
00232 void 
00233 addDiversity(KSycocaDictStringList *d, int pos)
00234 {
00235    if (pos == 0) return;
00236    if (pos < 0)
00237    {
00238       pos = -pos-1;
00239       for(string_entry *entry=d->first(); entry; entry = d->next())
00240       {
00241          register int l = entry->length;
00242          if (pos < l)
00243             entry->hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3fffffff;
00244       }
00245    }
00246    else
00247    {
00248       pos = pos - 1;
00249       for(string_entry *entry=d->first(); entry; entry = d->next())
00250       {
00251          if (pos < entry->length)
00252             entry->hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3fffffff;
00253       }
00254    }
00255 }
00256 
00257 
00258 void 
00259 KSycocaDict::save(QDataStream &str)
00260 {
00261    if (count() == 0)
00262    {
00263       mHashTableSize = 0;
00264       mHashList.clear();
00265       str << mHashTableSize;
00266       str << mHashList;
00267       return;
00268    }
00269 
00270    mOffset = str.device()->at();
00271 
00272    //kdDebug(7011) << QString("KSycocaDict: %1 entries.").arg(count()) << endl;
00273 
00274    //kdDebug(7011) << "Calculating hash keys.." << endl;
00275 
00276    int maxLength = 0;
00277    //kdDebug(7011) << "Finding maximum string length" << endl;
00278    for(string_entry *entry=d->first(); entry; entry = d->next())
00279    {
00280       entry->hash = 0;
00281       if (entry->length > maxLength)
00282          maxLength = entry->length;
00283    }
00284 
00285    //kdDebug(7011) << QString("Max string length = %1").arg(maxLength) << endl;
00286 
00287    // use "almost prime" number for sz (to calculate diversity) and later
00288    // for the table size of big tables
00289    // int sz = d->count()*5-1;
00290    register unsigned int sz = count()*4 + 1;
00291    while(!(((sz % 3) && (sz % 5) && (sz % 7) && (sz % 11) && (sz % 13)))) sz+=2;
00292 
00293    int maxDiv = 0;
00294    int maxPos = 0;
00295    int lastDiv = 0;
00296 
00297    mHashList.clear();
00298 
00299    // try to limit diversity scan by "predicting" positions
00300    // with high diversity
00301    int *oldvec=new int[maxLength*2+1];
00302    for (int i=0; i<(maxLength*2+1); i++) oldvec[i]=0;
00303    int mindiv=0;
00304 
00305    while(true)
00306    {
00307       int divsum=0,divnum=0;
00308 
00309       maxDiv = 0;
00310       maxPos = 0;
00311       for(int pos=-maxLength; pos <= maxLength; pos++)
00312       {
00313          // cut off
00314          if (oldvec[pos+maxLength]<mindiv)
00315          { oldvec[pos+maxLength]=0; continue; }
00316 
00317          int diversity = calcDiversity(d, pos, sz);
00318          if (diversity > maxDiv)
00319          {
00320             maxDiv = diversity;
00321             maxPos = pos;
00322          }
00323          oldvec[pos+maxLength]=diversity;
00324          divsum+=diversity; divnum++;
00325       }
00326       // arbitrary cut-off value 3/4 of average seems to work
00327       if (divnum)
00328          mindiv=(3*divsum)/(4*divnum);
00329 
00330       if (maxDiv <= lastDiv)
00331          break;
00332       // qWarning("Max Div = %d at pos %d", maxDiv, maxPos);
00333       lastDiv = maxDiv;
00334       addDiversity(d, maxPos);
00335       mHashList.append(maxPos);
00336    }
00337 
00338    delete [] oldvec;
00339 
00340    for(string_entry *entry=d->first(); entry; entry = d->next())
00341    {
00342       entry->hash = hashKey(entry->keyStr);
00343    }
00344 // fprintf(stderr, "Calculating minimum table size..\n");
00345 
00346    mHashTableSize = sz;
00347 
00348    struct hashtable_entry {
00349       string_entry *entry;
00350       QPtrList<string_entry> *duplicates;
00351       int duplicate_offset;
00352    };
00353 
00354    hashtable_entry *hashTable = new hashtable_entry[ sz ];
00355 
00356    //kdDebug(7011) << "Clearing hashtable..." << endl;
00357    for (unsigned int i=0; i < sz; i++)
00358    {
00359       hashTable[i].entry = 0;
00360       hashTable[i].duplicates = 0;
00361    }
00362 
00363    //kdDebug(7011) << "Filling hashtable..." << endl;
00364    for(string_entry *entry=d->first(); entry; entry = d->next())
00365    {
00366       int hash = entry->hash % sz;
00367       if (!hashTable[hash].entry)
00368       { // First entry
00369          hashTable[hash].entry = entry;
00370       }
00371       else 
00372       {
00373          if (!hashTable[hash].duplicates)
00374          { // Second entry, build duplicate list.
00375             hashTable[hash].duplicates = new QPtrList<string_entry>();
00376             hashTable[hash].duplicates->append(hashTable[hash].entry);
00377             hashTable[hash].duplicate_offset = 0;
00378          }
00379          hashTable[hash].duplicates->append(entry);
00380       }
00381    }
00382 
00383    str << mHashTableSize;
00384    str << mHashList;
00385 
00386    mOffset = str.device()->at(); // mOffset points to start of hashTable
00387    //kdDebug(7011) << QString("Start of Hash Table, offset = %1").arg(mOffset,8,16) << endl;
00388 
00389    for(int pass = 1; pass <= 2; pass++)
00390    {
00391       str.device()->at(mOffset);
00392       //kdDebug(7011) << QString("Writing hash table (pass #%1)").arg(pass) << endl;
00393       for(uint i=0; i < mHashTableSize; i++)
00394       {
00395          Q_INT32 tmpid;
00396          if (!hashTable[i].entry)
00397             tmpid = (Q_INT32) 0;
00398          else if (!hashTable[i].duplicates)
00399             tmpid = (Q_INT32) hashTable[i].entry->payload->offset(); // Positive ID
00400          else
00401             tmpid = (Q_INT32) -hashTable[i].duplicate_offset; // Negative ID
00402          str << tmpid;
00403          //kdDebug(7011) << QString("Hash table : %1").arg(tmpid,8,16) << endl;
00404       }
00405       //kdDebug(7011) << QString("End of Hash Table, offset = %1").arg(str.device()->at(),8,16) << endl;
00406 
00407       //kdDebug(7011) << QString("Writing duplicate lists (pass #%1)").arg(pass) << endl;
00408       for(uint i=0; i < mHashTableSize; i++)
00409       {
00410          if (hashTable[i].duplicates)
00411          {
00412             QPtrList<string_entry> *dups = hashTable[i].duplicates;
00413             hashTable[i].duplicate_offset = str.device()->at();
00414 
00415             /*kdDebug(7011) << QString("Duplicate lists: Offset = %1 list_size = %2")                           .arg(hashTable[i].duplicate_offset,8,16).arg(dups->count()) << endl;
00416 */
00417             for(string_entry *dup = dups->first(); dup; dup=dups->next())
00418             {
00419                str << (Q_INT32) dup->payload->offset(); // Positive ID
00420                str << dup->keyStr;                      // Key (QString)
00421             }
00422             str << (Q_INT32) 0;               // End of list marker (0)
00423          }
00424       }
00425       //kdDebug(7011) << QString("End of Dict, offset = %1").arg(str.device()->at(),8,16) << endl;
00426    }
00427 
00428    //kdDebug(7011) << "Cleaning up hash table." << endl;
00429    for(uint i=0; i < mHashTableSize; i++)
00430    {
00431       delete hashTable[i].duplicates;
00432    }
00433    delete [] hashTable;
00434 }
00435 
KDE Logo
This file is part of the documentation for kdelibs Version 3.1.0.
Documentation copyright © 1996-2002 the KDE developers.
Generated on Wed Oct 8 12:20:42 2003 by doxygen 1.2.18 written by Dimitri van Heesch, © 1997-2001