Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members

out_merged.cc

Go to the documentation of this file.
00001 /* ===========================================================================
00002  *        Filename:  out_merged.cc
00003  *     Description:  the "merged" output format
00004  * 
00005  *         Version:  $Rev: 6 $
00006  *         Changed:  $Date: 2005-05-10 15:55:44 -0700 (Di, 10 Mai 2005) $
00007  *         Licence:  GPL (read COPYING file for details)
00008  * 
00009  *          Author:  Erich Schubert (eS), erich@debian.org
00010  *                   Institut für Informatik, LMU München
00011  * ======================================================================== */
00012 #include <iostream>
00013 #include <sstream>
00014 #include "config.h"
00015 #include "doc.h"
00016 #include "diff.h"
00017 #include <libxml/tree.h>
00018 #include "util.h"
00019 #include "out_merged.h"
00020 #include "out_common.h"
00021 
00022 using namespace SSD;
00023 namespace SSD {
00024 
00025 #define NAMESPACE_MERGED (xmlChar*)"merged-diff"
00026 
00027 #define INSERTED   (xmlChar*)"ins"
00028 #define DELETED    (xmlChar*)"del"
00029 #define MOVED_AWAY (xmlChar*)"moved-away"
00030 #define MOVED_HERE (xmlChar*)"moved-here"
00031 #define EDIT_NODE  (xmlChar*)"node"
00032 #define EDIT_CONT  (xmlChar*)"content"
00033 #define EDIT_FOLL  (xmlChar*)"following"
00034 #define INS_TEXT   (xmlChar*)"text-moved-here"
00035 #define REM_TEXT   (xmlChar*)"text-moved-away"
00036 #define REM_ATTR   (xmlChar*)"attr-moved-away"
00037 #define A_INSERTED   (xmlChar*)"attr-ins"
00038 #define A_DELETED    (xmlChar*)"attr-del"
00039 //#define A_MOVED_AWAY (xmlChar*)"attr-moved-away"
00040 #define A_MOVED_HERE (xmlChar*)"attr-moved-here"
00041 #define A_SEPARATOR (xmlChar*)";"
00042 
00043 static void markNode(xmlNode* node, xmlChar* text, xmlNsPtr ns) {
00044         if (xmlNodeIsText(node)) {
00045                 if (node->prev) {
00046                         xmlSetNsProp(node->prev,ns,EDIT_FOLL,text);
00047                 } else if (node->parent) {
00048                         xmlSetNsProp(node->parent,ns,EDIT_CONT,text);
00049                 } else {
00050                         throw "Textnode has neither prev sib nor parent!";
00051                 };
00052         } else {
00053                 xmlSetNsProp(node,ns,EDIT_NODE,text);
00054         }
00055 }
00056 
00057 static void xmlAppendNsProp(xmlNodePtr node, xmlNsPtr ns, const xmlChar* name, const xmlChar* buf, const xmlChar* sep) {
00058         /* do we already have a value? */
00059         xmlChar* cur = xmlGetNsProp(node, name, ns->href);
00060 
00061         /* allocate buffer */
00062         ostringstream buffer;
00063         if (cur)
00064                 buffer << (char*)cur << sep;
00065         buffer << (char*)buf;
00066 
00067         xmlSetNsProp(node, ns, name, (xmlChar*) buffer.str().c_str());
00068 }
00069 
00070 #define OUTPUT_FIRST  1
00071 #define OUTPUT_SECOND 2
00072 #define OUTPUT_BOTH   (OUTPUT_FIRST | OUTPUT_SECOND)
00073 
00074 void attr_diff(xmlNodePtr diff, xmlNsPtr ns, xmlNodePtr p1, xmlNodePtr p2, hash_map<xmlNodePtr, xmlNodePtr, hash<void*> >& map, set<xmlNodePtr>& known, int output_only) {
00075         hash_map<xmlNodePtr, xmlNodePtr, hash<void*> >::iterator i;
00076         xmlAttrPtr a1, a2;
00077         xmlNodePtr remattr = NULL;
00078         if (p1)
00079         for (a1 = p1->properties; a1; a1 = a1->next) {
00080                 i = map.find((xmlNodePtr)a1);
00081                 if (i != map.end()) {
00082                         a2 = (xmlAttrPtr) i->second;
00083                         if (a2->parent == p2) {
00084                                 /* this is a kept attribute */
00085                                 cerr << "Identical attribute: " << a1->name  << endl;
00086                                 if (a1->ns) {
00087                                         xmlNsPtr nsa = xmlSearchNsByHref(diff->doc, diff, a1->ns->href);
00088                                         if (!nsa) {
00089                                                 nsa = xmlNewNs(diff, a1->ns->href, a1->ns->prefix);
00090                                         }
00091                                         xmlSetNsProp(diff, nsa, a1->name, xmlNodeListGetString(p1->doc,a1->children,1));
00092                                 } else {
00093                                         /* just copy the attribute */
00094                                         xmlSetProp(diff, a1->name, xmlNodeListGetString(p1->doc,a1->children,1));
00095                                 }
00096                         } else {
00097                                 cerr << "Attribute moved away: " << a1->name << endl;
00098                                 /* add the catcher node */
00099                                 if (!remattr) {
00100                                         remattr = xmlNewNode(ns, REM_ATTR);
00101                                         xmlAddChild(diff,remattr);
00102                                 }
00103                                 xmlNsPtr nsa = NULL;
00104                                 if (a1->ns) {
00105                                         nsa = xmlSearchNs(remattr->doc, remattr, a1->ns->prefix);
00106                                         if (!nsa) {
00107                                                 nsa = xmlNewNs(remattr, a1->ns->href, a1->ns->prefix);
00108                                         }
00109                                 }
00110                                 xmlSetNsProp(remattr, nsa, a1->name, xmlGetNsProp(p1, a1->name, a1->ns ? a1->ns->prefix : NULL));
00111                         }
00112                 } else if (output_only & OUTPUT_FIRST) {
00113                         cerr << "Attribute deleted: " << a1->name << endl;
00114                         if (p2) {
00115                                 /* mark the attribute as deleted */
00116                                 ostringstream buffer;
00117                                 if (a1->ns && a1->ns->prefix)
00118                                         buffer << (char*) a1->ns->prefix << ':';
00119                                 buffer << a1->name;
00120                                 xmlAppendNsProp(diff, ns, A_DELETED, (xmlChar*) buffer.str().c_str(), A_SEPARATOR);
00121                         }
00122                         /* copy the deleted attribute */
00123                         if (a1->ns) {
00124                                 xmlNsPtr nsa = xmlSearchNsByHref(diff->doc, diff, a1->ns->href);
00125                                 if (!nsa) {
00126                                         nsa = xmlNewNs(diff, a1->ns->href, a1->ns->prefix);
00127                                 }
00128                                 xmlSetNsProp(diff, nsa, a1->name, xmlNodeListGetString(p1->doc,a1->children,1));
00129                         } else {
00130                                 /* just copy the attribute */
00131                                 xmlSetProp(diff, a1->name, xmlNodeListGetString(p1->doc,a1->children,1));
00132                         }
00133                 }
00134         }
00135         if (p2)
00136         for (a2 = p2->properties; a2; a2 = a2->next) {
00137                 i = map.find((xmlNodePtr)a2);
00138                 if (i != map.end()) {
00139                         a1 = (xmlAttrPtr) i->second;
00140                         if (a1->parent == p1) {
00141                                 /* this is a kept attribute */
00142                                 cerr << "Identical attribute: " << a2->name  << endl;
00143                         } else {
00144                                 cerr << "Attribute moved here: " << a2->name << endl;
00145                                 /* copy the attribute here */
00146                                 if (a2->ns) {
00147                                         xmlNsPtr nsa = xmlSearchNsByHref(diff->doc, diff, a2->ns->href);
00148                                         if (!nsa) {
00149                                                 nsa = xmlNewNs(diff, a2->ns->href, a2->ns->prefix);
00150                                         }
00151                                         xmlSetNsProp(diff, nsa, a2->name, xmlNodeListGetString(p2->doc,a2->children,1));
00152                                 } else {
00153                                         /* just copy the attribute */
00154                                         xmlSetProp(diff, a2->name, xmlNodeListGetString(p2->doc,a2->children,1));
00155                                 }
00156                                 /* mark it as moved-here */
00157                                 ostringstream buffer;
00158                                 if (a2->ns && a2->ns->prefix)
00159                                         buffer << a2->ns->prefix << ":";
00160                                 buffer << a2->name;
00161                                 xmlAppendNsProp(diff, ns, A_MOVED_HERE, (xmlChar*) buffer.str().c_str(), A_SEPARATOR);
00162                         }
00163                 } else if (output_only & OUTPUT_SECOND) {
00164                         cerr << "Attribute deleted: " << a2->name << endl;
00165                         if (p1) {
00166                                 /* mark the attribute as deleted */
00167                                 ostringstream buffer;
00168                                 if (a2->ns && a2->ns->prefix)
00169                                         buffer << (char*) a2->ns->prefix << ':';
00170                                 buffer << a2->name;
00171                                 xmlAppendNsProp(diff, ns, A_INSERTED, (xmlChar*) buffer.str().c_str(), A_SEPARATOR);
00172                         }
00173                         /* copy the deleted attribute */
00174                         if (a2->ns) {
00175                                 xmlNsPtr nsa = xmlSearchNsByHref(diff->doc, diff, a2->ns->href);
00176                                 if (!nsa) {
00177                                         nsa = xmlNewNs(diff, a2->ns->href, a2->ns->prefix);
00178                                 }
00179                                 xmlSetNsProp(diff, nsa, a2->name, xmlNodeListGetString(p2->doc,a2->children,1));
00180                         } else {
00181                                 /* just copy the attribute */
00182                                 xmlSetProp(diff, a2->name, xmlNodeListGetString(p2->doc,a2->children,1));
00183                         }
00184                 }
00185         }
00186 }
00187 
00188 void rec_diff(xmlNodePtr diff, xmlNsPtr ns, xmlNodePtr p1, xmlNodePtr p2, hash_map<xmlNodePtr, xmlNodePtr, hash<void*> >& map, set<xmlNodePtr>& known, int output_only) {
00189         hash_map<xmlNodePtr, xmlNodePtr, hash<void*> >::iterator i;
00190         xmlNodePtr pos1;
00191         xmlNodePtr pos2;
00192 
00193         /* for a nice output i need the longest common subsequence at each level.
00194         * this is easier here, since each node can match only once, this
00195         * boils down to longest-increasing-subsequence */
00196 
00197         /* for a good output we need the longest common subsequence at each level.
00198          * this is easier here, since each node can match only once, this
00199          * boils down to longest-increasing-subsequence */
00200         vector<pair<xmlNodePtr, xmlNodePtr> > lcs;
00201         {
00202                 vector<pair<xmlNodePtr, xmlNodePtr> > l1;
00203                 vector<xmlNodePtr> l2;
00204                 for (xmlNodePtr i=p1; i; i=i->next) {
00205                         hash_map<xmlNodePtr, xmlNodePtr, hash<void*> >::iterator f = map.find(i);
00206                         if (f != map.end()) l1.push_back(make_pair(i,f->second));
00207                 }
00208                 for (xmlNodePtr i=p2; i; i=i->next)
00209                         if (known.find(i) != known.end()) l2.push_back(i);
00210                 calcLIS(l1, l2, lcs);
00211         }
00212         vector<pair<xmlNodePtr, xmlNodePtr> >::iterator lcsi = lcs.begin();
00213         pos1 = p1; pos2 = p2;
00214 
00215         while (pos1 || pos2) {
00216                 /* Longest-Subsequence checkpoints */
00217                 xmlNodePtr check1 = NULL;
00218                 xmlNodePtr check2 = NULL;
00219                 if (lcsi != lcs.end()) {
00220                         check1 = lcsi->first;
00221                         check2 = lcsi->second;
00222                         lcsi++;
00223                 }
00224 
00225                 /* output all nodes from first document until LIS point */
00226                 while (pos1 && (pos1 != check1)) {
00227                         if (output_only & OUTPUT_FIRST) {
00228                                 /* ignored nodes are just dumped if applicable */
00229                                 if (known.find(pos1) == known.end()) {
00230                                         /* keep whitespace etc. from first document */
00231                                         xmlNodePtr copy = xmlCopyNode(pos1,2);
00232                                         xmlAddChild(diff,copy);
00233                                 } else {
00234                                         i = map.find(pos1);
00235                                         if (i != map.end()) {
00236                                                 /* Nodes moved away */
00237                                                 if (xmlNodeIsText(i->second)) {
00238                                                         xmlNodePtr wrap = xmlNewNode(ns, REM_TEXT);
00239                                                         xmlAddChild(diff,wrap);
00240                                                         xmlNodePtr copy = xmlCopyNode(i->second,1);
00241                                                         xmlAddChild(wrap, copy);
00242                                                 } else {
00243                                                         xmlNodePtr copy = xmlCopyNode(i->second,0);
00244                                                         xmlAddChild(diff,copy);
00245                                                         attr_diff(copy, ns, pos1, i->second, map, known, output_only & ~OUTPUT_SECOND);
00246                                                         markNode(copy, MOVED_AWAY, ns);
00247                                                         rec_diff(copy, ns, pos1->children, i->second->children, map, known, output_only & ~OUTPUT_SECOND);
00248                                                 }
00249                                         } else {
00250                                                 /* nodes removed altogether */
00251                                                 xmlNodePtr copy = xmlCopyNode(pos1,0);
00252                                                 xmlAddChild(diff,copy);
00253                                                 attr_diff(copy, ns, pos1, NULL, map, known, output_only & ~OUTPUT_SECOND);
00254                                                 if (p2) markNode(copy, DELETED, ns);
00255                                                 rec_diff(copy, ns, pos1->children, NULL, map, known, output_only & ~OUTPUT_SECOND);
00256                                         }
00257                                 }
00258                         }
00259                         pos1 = pos1->next;
00260                 }
00261                 
00262                 while (pos2 && (pos2 != check2)) {
00263                         if (output_only & OUTPUT_SECOND) {
00264                                 /* ignored nodes are just dumped if applicable */
00265                                 if (known.find(pos2) == known.end()) {
00266                                         /* ignore whitespace and similar nodes in second document */
00267                                         /* except when we are inserting subtrees or text would collapse */
00268                                         if (!xmlNodeIsText(xmlGetLastChild(diff))) {
00269                                                 xmlNodePtr copy = xmlCopyNode(pos2,0);
00270                                                 xmlAddChild(diff,copy);
00271                                         }
00272                                 } else {
00273                                         i = map.find(pos2);
00274                                         if (i != map.end()) {
00275                                                 /* Nodes moved here */
00276                                                 xmlNodePtr copy = xmlCopyNode(i->second,0);
00277                                                 xmlAddChild(diff,copy);
00278                                                 attr_diff(copy, ns, i->second, pos2, map, known, output_only & ~OUTPUT_FIRST);
00279                                                 markNode(copy, MOVED_HERE, ns);
00280                                                 rec_diff(copy, ns, i->second->children, pos2->children, map, known, output_only & ~OUTPUT_FIRST);
00281                                         } else {
00282                                                 /* nodes inserted */
00283                                                 xmlNodePtr copy = xmlCopyNode(pos2,0);
00284                                                 if (xmlNodeIsText(copy) && xmlNodeIsText(diff->children)) {
00285                                                         xmlNodePtr wrap = xmlNewNode(ns, INS_TEXT);
00286                                                         xmlAddChild(diff, wrap);
00287                                                         xmlAddChild(wrap, copy);
00288                                                 } else 
00289                                                         xmlAddChild(diff,copy);
00290                                                 attr_diff(copy, ns, NULL, pos2, map, known, output_only & ~OUTPUT_FIRST);
00291                                                 if (p1) markNode(copy, INSERTED, ns);
00292                                                 rec_diff(copy, ns, NULL, pos2->children, map, known, output_only & ~OUTPUT_FIRST);
00293                                         }
00294                                 }
00295                         }
00296                         pos2 = pos2->next;
00297                 }
00298 
00299                 /* we now arrived at the LIS point */
00300                 if (pos1 && pos2) {
00301                         if (output_only & (OUTPUT_FIRST | OUTPUT_SECOND)) {
00302                                 xmlNodePtr copy = xmlCopyNode(pos2,0);
00303                                 xmlAddChild(diff,copy);
00304                                 attr_diff(copy, ns, pos1, pos2, map, known, output_only);
00305 //**/                                   markNode(copy, (xmlChar*)"lcsi", ns);
00306                                 rec_diff(copy, ns, pos1->children, pos2->children, map, known, output_only);
00307                         }
00308                         pos1 = pos1->next;
00309                         pos2 = pos2->next;
00310                 }
00311         }
00312 }
00313 
00314 void MergedWriter::run(Doc& doc1, Doc& doc2, DiffDijkstra& diff) {
00315         if (mergeddoc) xmlFreeDoc(mergeddoc);
00316         mergeddoc = xmlNewDoc((xmlChar*)"1.0");
00317         /* build a map for lookups node-in-second -> node-in-first */
00318         hash_map<xmlNodePtr, xmlNodePtr, hash<void*> > map_back;
00319         for (NodeAssignments* iter = diff.result->ass; iter; iter = iter->next) {
00320                 if (iter->n1 && iter->n2) {
00321                         map_back.insert(make_pair((xmlNode*)iter->n1->data,(xmlNode*)iter->n2->data));
00322                         map_back.insert(make_pair((xmlNode*)iter->n2->data,(xmlNode*)iter->n1->data));
00323                 }
00324         }
00325 
00326         /*set<xmlNodePtr> known = *((set<xmlNodePtr>*)&doc1.processed);*/
00327         /*known.insert(doc2.processed.begin(), doc2.processed.end());*/
00328         set<xmlNodePtr>* k1 = (set<xmlNodePtr>*) &doc1.processed;
00329         set<xmlNodePtr>* k2 = (set<xmlNodePtr>*) &doc2.processed;
00330 
00331         set<xmlNodePtr> known;
00332         set_union(k1->begin(), k1->end(), k2->begin(), k2->end(),
00333                         inserter(known, known.begin()));
00334 
00335         xmlNodePtr root = xmlNewNode(NULL,(xmlChar*)"merged-doc");
00336         xmlDocSetRootElement(mergeddoc,root);
00337 
00338         xmlNsPtr ns = xmlNewNs(root, NAMESPACE_MERGED, (xmlChar*)"md");
00339         xmlSetNs(root,ns);
00340 
00341         rec_diff(root,ns,xmlDocGetRootElement(doc1.getDOM()), xmlDocGetRootElement(doc2.getDOM()), map_back, known, OUTPUT_BOTH);
00342 
00343         /* strip extra root node if possible */
00344         if (root->children && !(root->children->next)) {
00345                 xmlNodePtr newroot = root->children;
00346 
00347                 /* re-root the tree */
00348                 xmlUnlinkNode(root);
00349                 xmlUnlinkNode(newroot);
00350                 xmlDocSetRootElement(mergeddoc,newroot);
00351                 /* fix namespaces */
00352                 xmlReconciliateNs(mergeddoc, newroot);
00353                 /* remove old root */
00354                 xmlFreeNode(root);
00355                 root=newroot;
00356         }
00357 }
00358 
00359 void MergedWriter::dump() {
00360         xmlDocDump(stdout,mergeddoc);
00361 }
00362 
00363 MergedWriter::~MergedWriter() {
00364         if (mergeddoc) xmlFreeDoc(mergeddoc); mergeddoc = NULL;
00365 }
00366 } // namespace SSD

Generated on Thu Aug 4 17:57:12 2005 for SSDDiff by  doxygen 1.4.3-20050530