home *** CD-ROM | disk | FTP | other *** search
-
- /*
- This file was derived from the libwww code, version 2.15, from CERN.
- A number of modifications have been made by Spyglass.
-
- eric@spyglass.com
- */
- /* Parse HyperText Document Address HTParse.c
- ** ================================
- */
- #include "all.h"
-
- //jjo
- #ifdef __cplusplus
- extern "C" {
- #endif
-
- char *x_ExpandRelativeAnchor(const char *rel, const char *base);
-
- #ifdef __cplusplus
- }
- #endif
-
- #define HEX_ESCAPE '%'
-
- struct struct_parts
- {
- char *access;
- char *host;
- char *absolute;
- char *relative;
- /* char * search; no - treated as part of path */
- char *anchor;
- };
-
-
- /* Strip white space off a string
- ** ------------------------------
- **
- ** On exit,
- ** Return value points to first non-white character, or to 0 if none.
- ** All trailing white space is OVERWRITTEN with zero.
- */
-
- PUBLIC char *HTStrip(char *s)
- {
- #define SPACE(c) ((c==' ')||(c=='\t')||(c=='\n')||(c=='\r'))
- char *p = s;
- if (!s)
- return NULL; /* Doesn't dump core if NULL */
- for (p = s; *p; p++) ; /* Find end of string */
- for (p--; p >= s; p--)
- {
- if (SPACE(*p))
- *p = 0; /* Zap trailing blanks */
- else
- break;
- }
- while (SPACE(*s))
- s++; /* Strip leading blanks */
- return s;
- }
-
-
- /* Scan a filename for its consituents
- ** -----------------------------------
- **
- ** On entry,
- ** name points to a document name which may be incomplete.
- ** On exit,
- ** absolute or relative may be nonzero (but not both).
- ** host, anchor and access may be nonzero if they were specified.
- ** Any which are nonzero point to zero terminated strings.
- */
- PRIVATE void scan(char *name, struct struct_parts *parts)
- {
- char *after_access;
- char *p;
- int length = strlen(name);
-
- parts->access = 0;
- parts->host = 0;
- parts->absolute = 0;
- parts->relative = 0;
- parts->anchor = 0;
-
- after_access = name;
- for (p = name; *p; p++)
- {
- if (*p == ':')
- {
- *p = 0;
- parts->access = name; /* Access name has been specified */
- after_access = p + 1;
- }
- if (*p == '/')
- break;
- if (*p == '#')
- break;
- }
-
- if (length > 0)
- {
- for (p = name + length - 1; p >= name; p--)
- {
- if (*p == '#')
- {
- parts->anchor = p + 1;
- *p = 0; /* terminate the rest */
- }
- }
- }
-
- p = after_access;
- if (*p == '/')
- {
- if (p[1] == '/')
- {
- parts->host = p + 2; /* host has been specified */
- *p = 0; /* Terminate access */
- p = strchr(parts->host, '/'); /* look for end of host name if any */
-
- // if this is a "file" access, what appears to be a host may really be a volume
-
- if (!strcmp(parts->access, "file"))
- {
- char vBuffer[256];
-
- // make a proper mac style version of the "host" name
- {
- strcpy(vBuffer, parts->host);
- c2pstr(vBuffer);
- vBuffer[0] = 1 + p - parts->host;
- vBuffer[vBuffer[0]] = ':';
- }
-
- // check to see if the "host" name matches that of a mounted volumes
-
- if (vBuffer[0] > 1)
- {
- HParamBlockRec vParamBlock;
-
- memset(&vParamBlock, 0, sizeof(vParamBlock));
- vParamBlock.volumeParam.ioNamePtr = (unsigned char *)vBuffer;
- vParamBlock.volumeParam.ioVolIndex = -1;
- if (!PBHGetVInfoSync(&vParamBlock))
- {
- // it did; we will assume the "host" name is a mistake
-
- parts->host = after_access + 1;
- p = parts->host;
- }
- }
- }
-
- if (p)
- {
- *p = 0; /* Terminate host */
- parts->absolute = p + 1; /* Root has been found */
- }
- }
- else
- {
- parts->absolute = p + 1; /* Root found but no host */
- }
- }
- else
- {
- parts->relative = (*after_access) ? after_access : 0; /* zero for "" */
- }
-
- return;
- } /*scan */
-
- /* Parse a Name relative to another name
- ** -------------------------------------
- **
- ** This returns those parts of a name which are given (and requested)
- ** substituting bits from the related name where necessary.
- **
- ** On entry,
- ** aName A filename given
- ** relatedName A name relative to which aName is to be parsed
- ** wanted A mask for the bits which are wanted.
- **
- ** On exit,
- ** returns A pointer to a malloc'd string which MUST BE FREED
- */
- char *HTParse(const char *aName, const char *relatedName, int wanted)
- {
- char *return_value = 0;
- char *p;
- char *access;
- struct struct_parts given, related;
- char name[MAX_URL_STRING+1];
- char rel[MAX_URL_STRING+1];
- char result[2*MAX_URL_STRING+1]; /* Make this longer to avoid overflow */
-
- /* Make working copies of input strings to cut up:
- */
- GTR_strncpy(name, aName, MAX_URL_STRING);
- GTR_strncpy(rel, relatedName, MAX_URL_STRING);
-
- scan(name, &given);
- scan(rel, &related);
-
- /*
- For the given part, if we get a URL which contains a protocol and a host,
- but not an absolute, then it looked something like this:
-
- http://www.spyglass.com:4040
-
- We need to assume that the slash at the end should be there, or when this
- is found as a hyperlink in a document, it will steal the absolute part
- from the URL of the document itself (related).
- */
-
- if (given.access && given.host && !given.absolute)
- {
- GTR_strncpy(name, aName, MAX_URL_STRING);
- strcat(name, "/");
- scan(name, &given);
- }
-
- result[0] = 0; /* Clear string */
- access = given.access ? given.access : related.access;
- if (wanted & PARSE_ACCESS)
- if (access)
- {
- strcat(result, access);
- if (wanted & PARSE_PUNCTUATION)
- strcat(result, ":");
- }
-
- if (given.access && related.access) /* If different, inherit nothing. */
- if (strcmp(given.access, related.access) != 0)
- {
- related.host = 0;
- related.absolute = 0;
- related.relative = 0;
- related.anchor = 0;
- }
-
- if (wanted & PARSE_HOST)
- if (given.host || related.host)
- {
- char *tail = result + strlen(result);
- if (wanted & PARSE_PUNCTUATION)
- strcat(result, "//");
- if (given.host)
- {
- strcat(result, given.host);
- }
- else
- {
- strcat(result, related.host);
- }
-
- /* Ignore default port numbers, and trailing dots on FQDNs
- which will only cause identical adreesses to look different */
- {
- char *p;
- p = strchr(tail, ':');
- if (p && access)
- { /* Port specified */
- if ( ( strcmp(access, "http") == 0
- && strcmp(p, ":80") == 0)
- || ( strcmp(access, "gopher") == 0
- && strcmp(p, ":70") == 0)
- #ifdef SHTTP_ACCESS_TYPE
- || ( strcmp(access, "shttp") == 0
- && strcmp(p, ":80") == 0)
- #endif
- )
- *p = (char) 0; /* It is the default: ignore it */
- }
- if (!p)
- p = tail + strlen(tail); /* After hostname */
- if (strlen (p)) /* -dpg */
- {
- p--; /* End of hostname */
- if (*p == '.')
- *p = (char) 0; /* chop final . */
- }
- }
- }
-
- if (given.host && related.host) /* If different hosts, inherit no path. */
- if (strcmp(given.host, related.host) != 0)
- {
- related.absolute = 0;
- related.relative = 0;
- related.anchor = 0;
- }
-
- if (wanted & PARSE_PATH)
- {
- if (given.absolute)
- { /* All is given */
- if (wanted & PARSE_PUNCTUATION)
- strcat(result, "/");
- strcat(result, given.absolute);
- }
- else if (related.absolute)
- { /* Adopt path not name */
- strcat(result, "/");
- strcat(result, related.absolute);
- if (given.relative)
- {
- p = strchr(result, '?'); /* Search part? */
- if (!p)
- p = result + strlen(result) - 1;
- for (; *p != '/'; p--) ; /* last / */
- p[1] = 0; /* Remove filename */
- strcat(result, given.relative); /* Add given one */
- HTSimplify(result);
- }
- }
- else if (given.relative)
- {
- /* The following 3 lines were copied from NCSA Mosaic for Windows */
- if ((wanted & PARSE_HOST) && (given.host || related.host) && (wanted & PARSE_PUNCTUATION))
- if (result[strlen(result) - 1] != '/')
- strcat(result, "/");
- strcat(result, given.relative); /* what we've got */
- }
- else if (related.relative)
- {
- strcat(result, related.relative);
- }
- else
- { /* No inheritance */
- if (!strcmp(result, "mailto:")) // mailto:
- ;
- else if (!strcmp(result, "news:"))
- ;
- else // protocol ends with a slash
- strcat(result, "/");
- }
- }
-
- if (wanted & PARSE_ANCHOR)
- if (given.anchor || related.anchor)
- {
- if (wanted & PARSE_PUNCTUATION)
- strcat(result, "#");
- strcat(result, given.anchor ? given.anchor : related.anchor);
- }
-
- /* We truncate URLs to 1024 bytes if they're too long. */
- result[MAX_URL_STRING] = '\0';
- return_value = GTR_strdup(result);
-
- return return_value; /* exactly the right length */
- }
-
-
- /*
- ** As strcpy() but guaranteed to work correctly
- ** with overlapping parameters. AL 7 Feb 1994
- */
- PRIVATE void ari_strcpy(char *to, char *from)
- {
- char *tmp;
-
- if (!to || !from)
- return;
-
- tmp = (char *) GTR_MALLOC(strlen(from) + 1);
- if (tmp)
- {
- strcpy(tmp, from);
- strcpy(to, tmp);
- GTR_FREE(tmp);
- }
- else
- {
- /* TODO */
- }
- }
-
- /* Simplify a filename
- // -------------------
- //
- // A unix-style file is allowed to contain the seqeunce xxx/../ which may be
- // replaced by "" , and the seqeunce "/./" which may be replaced by "/".
- // Simplification helps us recognize duplicate filenames.
- //
- // Thus, /etc/junk/../fred becomes /etc/fred
- // /etc/junk/./fred becomes /etc/junk/fred
- //
- // but we should NOT change
- // http://fred.xxx.edu/../..
- //
- // or ../../albert.html
- */
- PUBLIC void HTSimplify(char *filename)
- {
- char *p = filename;
- char *q;
-
- if (p)
- {
- while (*p && (*p == '/' || *p == '.')) /* Pass starting / or .'s */
- p++;
- while (*p)
- {
- if (*p == '/')
- {
- if ((p[1] == '.') && (p[2] == '.') && (p[3] == '/' || !p[3]))
- {
- for (q = p - 1; (q >= filename) && (*q != '/'); q--) ; /* prev slash */
- if (q[0] == '/' && 0 != strncmp(q, "/../", 4)
- && !(q - 1 > filename && q[-1] == '/'))
- {
- ari_strcpy(q, p + 3); /* Remove /xxx/.. */
- if (!*filename)
- strcpy(filename, "/");
- p = q - 1; /* Start again with prev slash */
- }
- else
- {
- if (q[0] == '/' && (q - 1 > filename && q[-1] == '/'))
- {
- /*
- The so-called prev slash found is actually the one before the hostname!
-
- The URL looks like this:
-
- http://host.somewhere.com/../path
- ^ ^
- | |
- q p
-
- We now need to fix the URL to remove the ../
- */
- ari_strcpy(p, p + 3);
- }
- }
- }
- else if ((p[1] == '.') && (p[2] == '/' || !p[2]))
- {
- ari_strcpy(p, p + 2); /* Remove a slash and a dot */
- }
- #if 0
- else if (p[-1] != ':')
- {
- while (p[1] == '/')
- {
- ari_strcpy(p, p + 1); /* Remove multiple slashes */
- }
- }
- #endif
- }
- p++;
- } /* end while (*p) */
- } /* end if (p) */
- }
-
- /* from html.c */
- char *x_ExpandRelativeAnchor(const char *rel, const char *base)
- {
- char *pTemp = 0;
- char *stripped;
- char *result = NULL;
-
- if (!rel)
- {
- rel = "";
- }
-
- pTemp = GTR_strdup(rel);
-
- if(!base)
- return pTemp;
-
- stripped = HTStrip(pTemp);
- result = HTParse(stripped, base, PARSE_PUNCTUATION | PARSE_ACCESS | PARSE_HOST | PARSE_PATH | PARSE_ANCHOR);
- GTR_FREE(pTemp);
- return result;
- }
-
-