home *** CD-ROM | disk | FTP | other *** search
- //: C17:ReprocessHTML.cpp
- // From Thinking in C++, 2nd Edition
- // Available at http://www.BruceEckel.com
- // (c) Bruce Eckel 1999
- // Copyright notice in Copyright.txt
- // Take Word's html output and fix up
- // the code listings and html tags
- #include "../require.h"
- #include <iostream>
- #include <fstream>
- #include <string>
- using namespace std;
-
- // Produce a new string which is the original
- // string with the html paragraph break marks
- // stripped off:
- string stripPBreaks(string s) {
- int br;
- while((br = s.find("<P>")) != string::npos)
- s.erase(br, strlen("<P>"));
- while((br = s.find("</P>")) != string::npos)
- s.erase(br, strlen("</P>"));
- return s;
- }
-
- // After the beginning of a code listing is
- // detected, this function cleans up the listing
- // until the end marker is found. The first line
- // of the listing is passed in by the caller,
- // which detects the start marker in the line.
- void fixupCodeListing(istream& in,
- ostream& out, string& line, int tag) {
- out << line.substr(0, tag)
- << "<PRE>" // Means "preformatted" in html
- << stripPBreaks(line.substr(tag)) << endl;
- string s;
- while(getline(in, s)) {
- int endtag = s.find("/""/""/"":~");
- if(endtag != string::npos) {
- endtag += strlen("/""/""/"":~");
- string before = s.substr(0, endtag);
- string after = s.substr(endtag);
- out << stripPBreaks(before) << "</PRE>"
- << after << endl;
- return;
- }
- out << stripPBreaks(s) << endl;
- }
- }
-
- string removals[] = {
- "<FONT SIZE=2>",
- "<FONT SIZE=1>",
- "<FONT FACE=\"Times\" SIZE=1>",
- "<FONT FACE=\"Times\" SIZE=2>",
- "<FONT FACE=\"Courier\" SIZE=1>",
- "SIZE=1", // Eliminate all other '1' & '2' size
- "SIZE=2",
- };
- const int rmsz =
- sizeof(removals)/sizeof(*removals);
-
- int main(int argc, char* argv[]) {
- requireArgs(argc, 2);
- ifstream in(argv[1]);
- assure(in, argv[1]);
- ofstream out(argv[2]);
- string line;
- while(getline(in, line)) {
- // The "Body" tag only appears once:
- if(line.find("<BODY") != string::npos) {
- out << "<BODY BGCOLOR=\"#FFFFFF\" "
- "TEXT=\"#000000\">" << endl;
- continue; // Get next line
- }
- // Eliminate each of the removals strings:
- for(int i = 0; i < rmsz; i++) {
- int find = line.find(removals[i]);
- if(find != string::npos)
- line.erase(find, removals[i].size());
- }
- int tag1 = line.find("/""/"":");
- int tag2 = line.find("/""*"":");
- if(tag1 != string::npos)
- fixupCodeListing(in, out, line, tag1);
- else if(tag2 != string::npos)
- fixupCodeListing(in, out, line, tag2);
- else
- out << line << endl;
- }
- } ///:~
-