Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members

rel_count.cc

Go to the documentation of this file.
00001 /* ===========================================================================
00002  *        Filename:  rel_count.cc
00003  *     Description:  Relation count class
00004  * 
00005  *         Version:  $Rev: 3 $
00006  *         Changed:  $Date: 2005-03-02 14:30:37 -0800 (Mi, 02 Mär 2005) $
00007  *         Licence:  GPL (read COPYING file for details)
00008  * 
00009  *          Author:  Erich Schubert (eS), erich@debian.org
00010  *                   Institut für Informatik, LMU München
00011  * ======================================================================== */
00012 #include "rel_count.h"
00013 #include <utility> /* for make_pair */
00014 
00015 namespace SSD {
00016 
00017 /* for static lookup hashmap */
00018 unsigned int RelCount::len = 0;
00019 hash_map<RelEqClass, unsigned int, hash_releqc> RelCount::cmap;
00020 
00021 RelCount::RelCount(hash_map<RelEqClass, int, hash_releqc>& map1, hash_map<RelEqClass, int, hash_releqc>& map2) : data(NULL) {
00022         hash_map<RelEqClass, int, hash_releqc>::iterator i1, i2;
00023         vector<short> tmpdata;
00024         
00025         /* initialize the first RelCount from the tables of two documents */
00026         if (len != 0) throw "RelCount has been initialized before";
00027 
00028         for (i1 = map1.begin(); i1 != map1.end(); i1++) {
00029                 i2 = map2.find(i1->first);
00030 
00031                 if (i2 == map2.end()) {
00032                         cmap[i1->first] = RELCOUNT_UNIQUE;
00033                 } else {
00034                         if (i1->second == i2->second && i1->second == 1) {
00035                                 cmap[i1->first] = RELCOUNT_ONCE;
00036                         } else {
00037                                 cmap[i1->first] = len;
00038                                 tmpdata.push_back(i1->second - i2->second);
00039                                 len++;
00040                         }
00041                 }
00042         }
00043         for (i2 = map2.begin(); i2 != map2.end(); i2++) {
00044                 i1 = map1.find(i2->first);
00045 
00046                 if (i1 == map1.end()) {
00047                         cmap[i2->first] = RELCOUNT_UNIQUE;
00048                 }
00049         }
00050         /* copy data to the minimal data structure */
00051         data = (short*) calloc(len, sizeof(short));
00052         if (tmpdata.size() != len) throw "Inconsitency detected!";
00053         for (unsigned int i=0; i<tmpdata.size(); i++)
00054                 /* we multiply by 2, so we can subtract twice in diff.c */
00055                 data[i] = tmpdata[i] * 2;
00056 }
00057 
00058 RelCount::RelCount(RelCount& rc) : data(NULL) {
00059         if (len > 0 && rc.data) {
00060                 data = (short*) calloc(len, sizeof(short));
00061                 memcpy(data, rc.data, sizeof(short)*len);
00062         }
00063 }
00064 
00065 RelCount::RelCount() : data(NULL) {
00066         if (len > 0) {
00067                 data = (short*) calloc(len, sizeof(short));
00068         }
00069 }
00070 
00071 RelCount::~RelCount() {
00072         if (data) { free(data); data=NULL; }
00073 }
00074 
00075 int RelCount::modify(RelEqClass key, short val) {
00076         int cost=0;
00077 
00078         /* find the key in lookup table */
00079         hash_map<RelEqClass, unsigned int, hash_releqc>::iterator pos = cmap.find(key);
00080         if (pos != cmap.end()) {
00081                 /* unique keys don't generate costs */
00082                 if (pos->second == RELCOUNT_UNIQUE) return 0;
00083                 /* key occuring once each have a cost of 1 if dropped from the first document */
00084                 if (pos->second == RELCOUNT_ONCE)   return (val > 0) ? 1 : 0;
00085 #ifdef CAREFUL
00086                 if (pos->second >= len)
00087                         throw "pos->second out of range in RelCount::modify";
00088 #endif
00089 
00090                 int oldval = data[pos->second];
00091                 int newval = oldval - val;
00092                 data[pos->second] = newval;
00093 
00094                 /* calculate costs */
00095                 if (oldval >= 0) {
00096                         if (   val < 0) cost += abs(val);
00097                         if (newval < 0) cost += abs(newval);
00098                 }
00099                 if (oldval <= 0) {
00100                         if (   val > 0) cost += val;
00101                         if (newval > 0) cost += newval;
00102                 }
00103         } else
00104                 throw "New key encountered during 'modify'.";
00105         return cost;
00106 }
00107 
00108 void RelCount::operator+=(const RelCount& other) {
00109         for (unsigned int i=0; i<len; i++)
00110                 data[i] += other.data[i];
00111 }
00112 
00113 void RelCount::operator-=(const RelCount& other) {
00114         for (unsigned int i=0; i<len; i++)
00115                 data[i] -= other.data[i];
00116 }
00117 
00118 std::ostream &operator<<(std::ostream &out, const RelCount& rc) {
00119         out << "[RC:";
00120         for (unsigned int i = 0; i < rc.len; i++) {
00121                 out << rc.data[i] << ",";
00122         }
00123         out << "]";
00124         return out;
00125 }
00126 
00127 void RelCount::dumpIndex(std::ostream &out) {
00128         hash_map<RelEqClass, unsigned int, hash_releqc>::iterator i;
00129         for (i = cmap.begin(); i != cmap.end(); i++) {
00130                 out << i->first << ": " << i->second << " ";
00131         }
00132 }
00133 
00134 }

Generated on Thu Aug 4 17:57:12 2005 for SSDDiff by  doxygen 1.4.3-20050530