#include "odt.h" #include "base64.h" #include #include #include #include #include #include #include #include #define X(param) (xmlChar*)param namespace ODT { static int footnoteID = 0; ODT::ODT() { wxInitAllImageHandlers(); xmlKeepBlanksDefault(0); } bool ODT::LoadDocument(std::string document) { // Load the document into the memory. std::ifstream odtDocumentStream; odtDocumentStream.open(document, std::ios::in); if (!odtDocumentStream.good()) return false; std::string odtDocument((std::istreambuf_iterator(odtDocumentStream)), (std::istreambuf_iterator())); // Process the XML document xmlDocPtr xmlODTDoc; xmlODTDoc = xmlReadMemory(odtDocument.c_str(), (int)odtDocument.size(), "noname.xml", NULL, 0); if (!ProcessDocument(xmlODTDoc)) return false; return true; } bool ODT::ProcessDocument(xmlDocPtr document) { std::vector processFunctions = { &ODT::ODT::GenerateStyleList, &ODT::ODT::GetTitle, &ODT::ODT::GenerateTOC, &ODT::ODT::ProcessLineBreaks, &ODT::ODT::ProcessImages, &ODT::ODT::GenerateHelpTopics }; for (auto processFunction : processFunctions) if (!(this->*processFunction)(document)) return false; return true; } bool ODT::GenerateStyleList(xmlDocPtr document) { xmlXPathContextPtr context; xmlXPathObjectPtr result; context = xmlXPathNewContext(document); xmlXPathRegisterNs(context, X("office"), X("urn:oasis:names:tc:opendocument:xmlns:office:1.0")); xmlXPathRegisterNs(context, X("text"), X("urn:oasis:names:tc:opendocument:xmlns:text:1.0")); xmlXPathRegisterNs(context, X("xlink"), X("http://www.w3.org/1999/xlink")); xmlXPathRegisterNs(context, X("style"), X("urn:oasis:names:tc:opendocument:xmlns:style:1.0")); result = xmlXPathEvalExpression((xmlChar*)"//office:document//office:automatic-styles//style:style//*", context); if(xmlXPathNodeSetIsEmpty(result->nodesetval)){ xmlXPathFreeObject(result); std::cout << "Failed getting style list!" << std::endl; return false; } xmlNodeSetPtr nodeSet = result->nodesetval; xmlNodePtr meep = nodeSet->nodeTab[0]->children; for (int nodeSeek = 0; nodeSeek < nodeSet->nodeNr; nodeSeek++) { xmlNodePtr nodeData = nodeSet->nodeTab[nodeSeek]->parent; xmlChar *styleNameChar = xmlGetProp(nodeData, X("name")); if (styleNameChar == nullptr) { continue; } std::string styleName((char*) styleNameChar); xmlFree(styleNameChar); xmlChar *styleParentStyleNameChar = xmlGetProp(nodeData, X("parent-style-name")); if (styleParentStyleNameChar == nullptr) { continue; } std::string styleParentStyleName((char*) styleParentStyleNameChar); xmlFree(styleParentStyleNameChar); styleList.insert(std::make_pair(styleName, styleParentStyleName)); } return true; } bool ODT::GenerateTOC(xmlDocPtr document) { // Look for text:table-of-content element. xmlXPathContextPtr context; xmlXPathObjectPtr result; context = xmlXPathNewContext(document); xmlXPathRegisterNs(context, X("office"), X("urn:oasis:names:tc:opendocument:xmlns:office:1.0")); xmlXPathRegisterNs(context, X("text"), X("urn:oasis:names:tc:opendocument:xmlns:text:1.0")); xmlXPathRegisterNs(context, X("xlink"), X("http://www.w3.org/1999/xlink")); result = xmlXPathEvalExpression((xmlChar*)"//office:document//office:body//office:text//text:table-of-content//text:index-body//text:p/*", context); if(xmlXPathNodeSetIsEmpty(result->nodesetval)){ xmlXPathFreeObject(result); std::cout << "Failed getting table of contents!" << std::endl; return false; } xmlNodeSetPtr nodeSet = result->nodesetval; xmlNodePtr meep = nodeSet->nodeTab[0]->children; // Generate a list of contents. HelpTableOfContentsItem *parentItem = nullptr; HelpTopicCurrentLevel previousLevel = TOPIC_LEVEL1; for (int nodeSeek = 0; nodeSeek < nodeSet->nodeNr; nodeSeek++) { // Delete the page number at the end. xmlChar *pageTitleChar = xmlNodeListGetString(document, nodeSet->nodeTab[nodeSeek]->xmlChildrenNode, 1); xmlNodePtr nodeData = nodeSet->nodeTab[nodeSeek]->parent; std::string pageTitle((char*) pageTitleChar); xmlFree(pageTitleChar); int deletePageNumberCount = 0; for (std::string::iterator pageTitleIter = pageTitle.end() - 1; pageTitleIter != pageTitle.begin(); pageTitleIter--) { if (*pageTitleIter == ' ') { break; } deletePageNumberCount++; } pageTitle.erase(pageTitle.end()-deletePageNumberCount, pageTitle.end()); // Get the paragraph style. xmlChar *paragraphStyleChar = xmlGetProp(nodeData, X("style-name")); std::string paragraphStyle((char*) paragraphStyleChar); HelpTableOfContentsItem tocItem; // Create the TOC item and add it to the list. // Determine the level. HelpTopicCurrentLevel currentLevel = DetermineTopicLevel(paragraphStyle); xmlFree(paragraphStyleChar); // Get the href to pair the data with later on. xmlNodePtr nodeAChildData = nodeData->children; xmlChar *tocItemHrefChar = xmlGetProp(nodeAChildData, X("href")); std::string tocItemHref((char*) tocItemHrefChar); xmlFree(tocItemHrefChar); // Remove the hash from the string if it is there. if (tocItemHref[0] == '#') tocItemHref.erase(0,1); tocItem.tocItemName = pageTitle; tocItem.tocItemLevel = currentLevel; tocItem.tocItemID = tocItemHref; tocData.push_back(tocItem); } xmlFree(context); xmlFree(result); return true; } bool ODT::GetTitle(xmlDocPtr document) { // Look for text:table-of-content element. xmlXPathContextPtr context; xmlXPathObjectPtr result; context = xmlXPathNewContext(document); xmlXPathRegisterNs(context, X("office"), X("urn:oasis:names:tc:opendocument:xmlns:office:1.0")); xmlXPathRegisterNs(context, X("dc"), X("http://purl.org/dc/elements/1.1/")); result = xmlXPathEvalExpression((xmlChar*)"//office:document//office:meta//dc:title", context); if(xmlXPathNodeSetIsEmpty(result->nodesetval)){ xmlXPathFreeObject(result); std::cout << "Failed getting title!" << std::endl; return false; } xmlNodeSetPtr nodeSet = result->nodesetval; xmlNodePtr meep = nodeSet->nodeTab[0]->children; xmlChar *documentTitleChar = xmlNodeListGetString(document, nodeSet->nodeTab[0]->xmlChildrenNode, 1); std::string documentTitle((char*) documentTitleChar); title = documentTitle; return true; } bool ODT::GenerateHelpTopics(xmlDocPtr document) { // Look for bookmarks in the document. xmlXPathContextPtr context; xmlXPathObjectPtr result; context = xmlXPathNewContext(document); xmlXPathRegisterNs(context, X("office"), X("urn:oasis:names:tc:opendocument:xmlns:office:1.0")); xmlXPathRegisterNs(context, X("text"), X("urn:oasis:names:tc:opendocument:xmlns:text:1.0")); xmlXPathRegisterNs(context, X("xlink"), X("http://www.w3.org/1999/xlink")); result = xmlXPathEvalExpression((xmlChar*)"//office:document//office:body//office:text//text:h//*", context); if(xmlXPathNodeSetIsEmpty(result->nodesetval)){ xmlXPathFreeObject(result); std::cout << "Failed generating help topics!" << std::endl; return false; } xmlNodeSetPtr nodeSet = result->nodesetval; std::map bookmarkList; for (int nodeSeek = 0; nodeSeek < nodeSet->nodeNr; nodeSeek++) { xmlNodePtr nodeData = nodeSet->nodeTab[nodeSeek]; xmlChar *bookmarkIDChar = xmlGetProp(nodeData, X("name")); if (bookmarkIDChar == nullptr) continue; std::string bookmarkID((char*) bookmarkIDChar); xmlFree(bookmarkIDChar); bookmarkList.insert(std::make_pair(bookmarkID, nodeData)); } // Look for each of the help topics. for (auto tocItem : tocData) { footnoteID = 0; std::map::iterator tocItemIterator; tocItemIterator = bookmarkList.find(tocItem.tocItemID); if (tocItemIterator == bookmarkList.end()) continue; // Build the help topic information. HelpTopicData helpTopic; helpTopic.helpTopicName = tocItem.tocItemName; helpTopic.helpTopicID = tocItem.tocItemID; // Process text up to the next bookmark or closing office:text. xmlNodePtr paragraphNodePtr = tocItemIterator->second->parent; paragraphNodePtr = paragraphNodePtr->next; while(paragraphNodePtr != nullptr) { const xmlChar *nameChar = paragraphNodePtr->name; std::string name((char*) nameChar); if (name == "text") { paragraphNodePtr = paragraphNodePtr->next; continue; } if (name != "p" && name != "list") { paragraphNodePtr = paragraphNodePtr->next; break; } // TODO: Get the child nodes first and process them. xmlNodePtr paragraphChildNodePtr = paragraphNodePtr->children; while(paragraphChildNodePtr != nullptr) { const xmlChar *childNameChar = paragraphChildNodePtr->name; std::string childName((char*) childNameChar); if (childName == "note") { // Check the note-class is a footnote. ProcessNoteNode(paragraphChildNodePtr, &helpTopic); } paragraphChildNodePtr = paragraphChildNodePtr->next; } xmlChar *contentyStuffChar = xmlNodeGetContent(paragraphNodePtr); std::string contentyStuff((char*) contentyStuffChar); HelpTopicSection newSection; newSection.sectionFontSize = FONTSIZE_NORMAL; newSection.sectionText = contentyStuff; helpTopic.helpTopicSections.push_back(newSection); paragraphNodePtr = paragraphNodePtr->next; } helpTopicData.push_back(helpTopic); } return true; } bool ODT::ProcessLineBreaks(xmlDocPtr document) { xmlXPathContextPtr context; xmlXPathObjectPtr result; context = xmlXPathNewContext(document); xmlXPathRegisterNs(context, X("office"), X("urn:oasis:names:tc:opendocument:xmlns:office:1.0")); xmlXPathRegisterNs(context, X("text"), X("urn:oasis:names:tc:opendocument:xmlns:text:1.0")); xmlXPathRegisterNs(context, X("xlink"), X("http://www.w3.org/1999/xlink")); result = xmlXPathEvalExpression((xmlChar*)"//text:line-break", context); if(xmlXPathNodeSetIsEmpty(result->nodesetval)){ xmlXPathFreeObject(result); std::cout << "Failed generating line breaks!" << std::endl; return false; } xmlNodeSetPtr nodeSet = result->nodesetval; for (int nodeSeek = 0; nodeSeek < nodeSet->nodeNr; nodeSeek++) { xmlNodePtr nodeData = nodeSet->nodeTab[nodeSeek]; std::string linebreakHTML = "
"; const xmlChar *linebreakHTMLChar = (const xmlChar*)linebreakHTML.c_str(); xmlNodeSetContent(nodeData, linebreakHTMLChar); } return true; } bool ODT::ProcessImages(xmlDocPtr document) { xmlXPathContextPtr context; xmlXPathObjectPtr result; context = xmlXPathNewContext(document); xmlXPathRegisterNs(context, X("office"), X("urn:oasis:names:tc:opendocument:xmlns:office:1.0")); xmlXPathRegisterNs(context, X("text"), X("urn:oasis:names:tc:opendocument:xmlns:text:1.0")); xmlXPathRegisterNs(context, X("xlink"), X("http://www.w3.org/1999/xlink")); xmlXPathRegisterNs(context, X("draw"), X("urn:oasis:names:tc:opendocument:xmlns:drawing:1.0")); result = xmlXPathEvalExpression((xmlChar*)"//draw:frame//draw:image//office:binary-data", context); if(xmlXPathNodeSetIsEmpty(result->nodesetval)){ xmlXPathFreeObject(result); std::cout << "Failed generating images!" << std::endl; return false; } xmlNodeSetPtr nodeSet = result->nodesetval; int imageID = 0; for (int nodeSeek = 0; nodeSeek < nodeSet->nodeNr; nodeSeek++) { imageID++; xmlNodePtr nodeData = nodeSet->nodeTab[nodeSeek]; std::string imageFilename = "image" + std::to_string(imageID) + ".png"; xmlChar *imageDataChar = xmlNodeGetContent(nodeData); std::string imageData((char*) imageDataChar); xmlFree(imageDataChar); imageData.erase(std::remove(imageData.begin(), imageData.end(), '\t'), imageData.end()); imageData.erase(std::remove(imageData.begin(), imageData.end(), '\r'), imageData.end()); imageData.erase(std::remove(imageData.begin(), imageData.end(), '\n'), imageData.end()); imageData.erase(std::remove(imageData.begin(), imageData.end(), ' '), imageData.end()); imageData = base64_decode(imageData); wxMemoryInputStream imageDataInputStream(imageData.c_str(), imageData.size()); wxImage documentImage; documentImage.LoadFile(imageDataInputStream, wxBITMAP_TYPE_PNG); wxMemoryFSHandler::AddFile(wxString(imageFilename), documentImage, wxBITMAP_TYPE_PNG); } xmlFree(result); xmlFree(nodeSet); result = xmlXPathEvalExpression((xmlChar*)"//draw:frame//draw:image", context); if(xmlXPathNodeSetIsEmpty(result->nodesetval)){ xmlXPathFreeObject(result); std::cout << "Failed generating images!" << std::endl; return false; } nodeSet = result->nodesetval; imageID = 0; for (int nodeSeek = 0; nodeSeek < nodeSet->nodeNr; nodeSeek++) { imageID++; xmlNodePtr nodeData = nodeSet->nodeTab[nodeSeek]; std::string imageHTML = "

"; const xmlChar *imageHTMLChar = (const xmlChar*)imageHTML.c_str(); xmlNodeSetContent(nodeData, imageHTMLChar); } return true; } void ODT::ProcessNoteNode(xmlNodePtr nodePtr, HelpTopicData *helpTopic) { xmlChar *noteClassChar = xmlGetProp(nodePtr, X("note-class")); bool footnoteFound = false; if (noteClassChar != nullptr) { std::string noteClass((char*) noteClassChar); if (noteClass == "footnote") { xmlNodePtr noteChildNodePtr = nodePtr->children; while(noteChildNodePtr != nullptr) { const xmlChar *childNameChar = noteChildNodePtr->name; std::string childName((char*) childNameChar); if (childName == "note-body") { xmlNodePtr noteBodyPtr = noteChildNodePtr->children; xmlChar *noteBodyChar = xmlNodeGetContent(noteBodyPtr); std::string noteBody((char*) noteBodyChar); xmlFree(noteBodyChar); FootnoteSection footnoteSection; footnoteSection.footnoteID = ++footnoteID; footnoteSection.footnoteText = noteBody; helpTopic->helpTopicFootnotes.push_back(footnoteSection); // Delete this node. footnoteFound = true; } noteChildNodePtr = noteChildNodePtr->next; } } } xmlFree(noteClassChar); // Replace node with a node containing 1 if (footnoteFound) { std::string footnoteIDString = std::to_string(footnoteID); std::string footnoteHTML = "" + footnoteIDString +""; const xmlChar *footnoteHTMLChar = (const xmlChar*)footnoteHTML.c_str(); xmlNodePtr newNodePtr = xmlNewNode(nullptr, X("footnotePoint")); xmlNodeSetContent(newNodePtr, footnoteHTMLChar); nodePtr = xmlReplaceNode(nodePtr, newNodePtr); xmlUnlinkNode(nodePtr); xmlFreeNode(nodePtr); } } HelpTopicCurrentLevel ODT::DetermineTopicLevel(std::string styleText) { std::map::iterator styleTextIterator = styleList.find(styleText); if (styleTextIterator == styleList.end()) return TOPIC_LEVEL1; if (styleTextIterator->second == "Contents_20_1") return TOPIC_LEVEL1; else if (styleTextIterator->second == "Contents_20_2") return TOPIC_LEVEL2; else if (styleTextIterator->second == "Contents_20_3") return TOPIC_LEVEL3; else if (styleTextIterator->second == "Contents_20_4") return TOPIC_LEVEL4; return TOPIC_LEVEL1; } }