home *** CD-ROM | disk | FTP | other *** search
Wrap
'============================================================================ ' VARIABLE DECLARATIONS '============================================================================ 'PLEASE SEE THE PROCEDURE TITLED "LICENSE" FOR DISCLAIMER AND OTHER 'PERTINENT INFO. Global FileName As String 'Stores name of html doc to open (you must write 'the code that assigns a value to this variable. 'This module will use the value to open the file 'for parsing. Global SrcCount As Long 'Stores number of links found by parse engine Global SrcLabel() As String 'Stores the caption of the hotlink Global SrcURL() As String 'Stores the URL of the hotlink Global db As database 'Handle for HTML.mdb Global Dt As Dynaset 'Handle for TempLinks table Global Dp As Dynaset 'Handle for PermLinks table Global PDt As Dynaset 'Handle for Projects table Global DisableDatabase 'Disables use of Access database in module. 'Set value to TRUE if you experience 'prolems with the database and wish to 'use procedure without database storage. 'Pgm Constant for KeyDown Global Const KEY_DELETE = &H2E 'Data Constants for Database use Global Const DB_LANG_GENERAL = ";LANGID=0x0809;CP=1252;COUNTRY=0" Global Const DB_LONG = 4 Global Const DB_TEXT = 10 Global Const DB_MEMO = 12 Global ProjectTitle As String 'Stores name of Project Global DocTitle As String 'Stores Doc Title as found within <title></title> Function BuildLink (Txt As String, URL As String) As String BuildLink = "<a href=" + Chr$(34) + URL + Chr$(34) + ">" + Txt + "</a>" End Function Sub BuildLists (Order As Integer) 'THIS ROUTINE BUILDS THE INITIAL LISTS OF DESCRIPTIONS AND URL'S. 'IT ALSO USES THE LOOP TO ADD THE RECORDS TO THE TEMPLINKS TABLE 'IN THE HTML.MDB DATABASE PROVIDING THE GLOBAL VAR DisableDatabse = False 'The case argument tells the routine whether to sort the records in the order 'they were found or in alphabetical order. This is only the case when using 'the database capabilities and will not work with DisableDatabase set to TRUE Dim F As Form Dim L1 As Control Dim L2 As Control Dim Total As Integer, Count As Integer Set F = Hotlist Set L1 = F.List1 Set L2 = F.List2 L1.Clear L2.Clear 'Does not add links to database if DisableDatabase = True If DisableDefault = True Then Order = 2 Select Case Order 'Create list of references in default order as pulled from doc Case 0 Dim Ds1 As Dynaset Dim Ds2 As Dynaset Set Ds1 = db.CreateDynaset("Select Templinks.* From Templinks") Ds1.MoveLast Total = Ds1.RecordCount Ds1.MoveFirst Count = 0 Do Until Ds1.EOF Count = Count + 1 F.Caption = "Creating database " + Str(Count) + " of" + Str(Total) + " links" L1.AddItem Ds1("LinkText") L1.ItemData(L1.NewIndex) = Ds1("LinkID") L2.AddItem Ds1("LinkURL") L2.ItemData(L2.NewIndex) = Ds1("LinkID") Ds1.MoveNext Loop 'Create list of references in alphabetical order both by caption and URL Case 1 Dim Ds1 As Dynaset Dim Ds2 As Dynaset Set Ds1 = db.CreateDynaset("Select Templinks.* From Templinks Order by LinkText") Set Ds2 = db.CreateDynaset("Select Templinks.* From Templinks Order by LinkURL") Ds1.MoveLast Total = Ds1.RecordCount Ds1.MoveFirst Ds2.MoveFirst Count = 0 Do Until Ds1.EOF Count = Count + 1 F.Caption = "Creating database " + Str(Count) + " of" + Str(Total) + " links" L1.AddItem Ds1("LinkText") L1.ItemData(L1.NewIndex) = Ds1("LinkID") L2.AddItem Ds2("LinkURL") L2.ItemData(L2.NewIndex) = Ds2("LinkID") Ds1.MoveNext Ds2.MoveNext Loop 'Avoid using database, Case 2 only used if DisableDatabase = TRUE Case 2 Total = SrcCount Count = 0 For x = 0 To SrcCount - 1 Count = Count + 1 F.Caption = "Creating database " + Str(Count) + " of" + Str(Total) + " links" L1.AddItem SrcLabel(x) L1.ItemData(L1.NewIndex) = x L2.AddItem SrcURL(x) L2.ItemData(L2.NewIndex) = x Next x End Select F.Caption = FileName End Sub Sub CheckDatabase () 'Check to see if database already exists or is disabled - if so exit sub If Dir$("html.mdb") <> "" Or DisableDatabase = True Then Exit Sub 'Declare database object variables Dim NewDb As database Dim NewTd As New TableDef Dim F1 As New Field, F2 As New Field, F3 As New Field, F4 As New Field 'Create database file HTML.mdb Set NewDb = CreateDatabase("html.mdb", DB_LANG_GENERAL) 'Error handler If db Is Nothing Then MsgBox "Could not create database": Exit Sub If Err <> 0 Then MsgBox "Error during open:" & Err & "/" & Error$(Err) On Error GoTo 0 'Define new table name NewTd.Name = "TempLinks" 'Define new table fields F1.Name = "LinkID" F1.Type = DB_LONG NewTd.Fields.Append F1 F2.Name = "LinkText" F2.Type = DB_MEMO NewTd.Fields.Append F2 F3.Name = "LinkURL" F3.Type = DB_MEMO NewTd.Fields.Append F3 F4.Name = "URLType" F4.Type = DB_TEXT F4.Size = 10 NewTd.Fields.Append F4 'Create field def's in table def NewDb.TableDefs.Append NewTd 'Msg user that database has been created MsgBox "html.mdb created" 'Close the new database NewDb.Close End Sub Function CleanUpCaption (TempCap As String) As String Dim Clean1 As Integer, Clean2 As Integer, Temp1 As String, Temp2 As String Dim Linktrim As Integer 'FIND STYLE/IMG TAG DELIMITERS '----------------------------- Clean1 = InStr(TempCap, "<") Clean2 = InStr(TempCap, ">") 'COPY TEXT NOT IN STYLE/IMG TAG '------------------------------ If Clean1 <> 0 Then 'Or Clean1 <> "" Then Temp1 = Left$(TempCap, Clean1 - 1) End If If Clean2 <> 0 Then 'Or Clean2 <> "" Then Temp2 = Trim$(Right$(TempCap, Len(TempCap) - Clean2)) End If 'COPY REMAINING TEXT BACK TO CAPTION VARIABLE '-------------------------------------------- If Clean1 > 0 And Clean2 > 0 Then TempCap = Temp1 + Temp2 End If 'EXCHANGE CR AND LF FOR SPACES '----------------------------- For Linktrim = 1 To Len(TempCap) If Asc(Mid$(TempCap, Linktrim, 1)) = 10 Or Asc(Mid$(TempCap, Linktrim, 1)) = 13 Then Mid$(TempCap, Linktrim, 1) = " " End If Next Linktrim 'RETURN CLEANED UP CAPTION TO PARSE ROUTINE '------------------------------------------ CleanUpCaption = TempCap End Function Sub LICENSE () '=========================================================================== ' HTML HOTLINK PARSE ROUTINE '=========================================================================== 'DISCLAIMER '---------- ' 'This module is made freely available to the public and may be reproduced, 'in whole or in part, as well as redistributed without royalty. Use of the 'routines in this module are the sole responsibility of you, the user. I 'make no warranty, written or implied, regarding any portion of this module. 'It has run on my US3486/33 board with 20 megs RAM without incident. Before 'using this module with any critical data please read through all the 'functions and procedures and make sure you understand what they're doing. 'There are always potential problems when disk I/O is involved. ' 'This module utilizes form, componant, and database object variables. Use of 'these are only available in the PRO edition of VB. Please contact Microsoft 'or your local software vendor for more information on updating to VB-Pro. ' 'This routine will create a Microsoft Access Database the first time you use 'it. I am currently using Microsoft Access 2.0 with the compatability layer 'installed. If you experience problems with the database portion of the 'module I recommend you: ' 'a) install the compatability layer on your system. ' This can be found using FTP to "microsoft.com". ' 'b) disable the use of the database by changing the ' "DisableDatabase" global variable from False to ' True. ' 'If DisableDatabase is set to true then you will need to comment out the 'following Global Declarations from the General Declarations section of 'this module: ' 'Global db As database 'Handle for HTML.mdb 'Global Dt As Dynaset 'Handle for TempLinks table 'Global Dp As Dynaset 'Handle for PermLinks table 'Global PDt As Dynaset 'Handle for Projects table ' 'This module makes judicious use of form and componant object variables so 'using it should be a simple matter of changing the form and control names 'in the appropriate declarations. ' ' End Sub Sub ParseLinks (FileName As String) Dim FileNum As Integer 'Number of next available free filenumber Dim LinkText As String 'Contents of HTML document opened Dim TempVar As String 'Temporary container for document labels Dim LinkStart As Long 'Holds starting position of element to be parsed Dim LinkEnd As Long 'Holds ending position of element to be parsed Dim FileSize As Long 'Used for status line (percentage completed) Dim Lycos As Integer 'Flags true if user answers yes on lycos search Dim NotLycos As Integer 'Flags for second try on lycos query '---------------------------------------------------------------------------- 'Check for existance of "html.mdb". If it does not exist routine will create it. If DisableDatabase = False Then CheckDatabase 'If using database If DisableDatabase = False Then Set Dt = db.CreateDynaset("TempLinks") 'Global Var declared in General Declarations. db.Execute "Delete From Templinks" 'Delete any pre-existing records in temp database End If '============================================================================ ' Open HTML file for parsing '============================================================================ 'FIND FIRST AVAILABLE FILE NUMBER '-------------------------------- FileNum = FreeFile 'OPEN FILE WITH FILE NUMBER '-------------------------- Open FileName For Input As FileNum 'ATT: You must write code to assign value to Filename 'ASSIGN FILE CONTENTS TO VARIABLE '-------------------------------- LinkText = Input$(LOF(FileNum), FileNum) FileSize = LOF(FileNum) '============================================================================ ' Get document Title '============================================================================ LinkStart = InStr(UCase(LinkText), "<TITLE>") LinkStart = LinkStart + Len("<TITLE>") LinkEnd = InStr(UCase(LinkText), "</TITLE>") DocTitle = Mid$(LinkText, LinkStart, LinkEnd - LinkStart) '============================================================================ ' Begin Parsing Document '============================================================================ 'PROGRAM LABEL '------------- ParseLinksStart: URLRecognized = False 'ERROR CONTROL '------------- On Error Resume Next 'FIND NEXT HOTLINK REFERENCE '--------------------------- LinkStart = InStr(1, UCase(LinkText), "HREF") '*** CHECK FOR NO MORE HOTLINKS LEFT AND IF TRUE THEN ESCAPE FROM ROUTINE *** '---------------------------------------------------------------------------- If LinkStart = 0 Then 'Or LinkStart = "" Then Exit Sub End If 'INFORM USER OF PERCENTAGE COMPLETED '----------------------------------- Hotlist.Caption = "Scanning File: " + Format((Len(LinkText) / FileSize), "0%") 'TRUNCATE TEXT TO BEGINING OF HREF '--------------------------------- LinkText = Mid$(LinkText, LinkStart, Len(LinkText) - LinkStart) 'FIND POSITION OF FIRST URL DELIMTER '------------------------------------ LinkStart = InStr(1, LinkText, Chr$(34)) 'TRUNCATE TEXT UP TO AND INCLUDING FIRST URL DELIMITER '----------------------------------------------------- LinkText = Mid$(LinkText, LinkStart + 1, Len(LinkText) - LinkStart) 'FIND POSITION OF SECOND URL DELIMITER '------------------------------------- LinkEnd = InStr(1, LinkText, Chr$(34)) 'EXTRACT URL AND ASSIGN TO LOCAL VAR '----------------------------------- LinkURL = Mid$(LinkText, 1, LinkEnd - 1) 'TRUNCATE TEXT UP TO AND INCLUDING SECOND URL DELIMITER '------------------------------------------------------ LinkText = Mid$(LinkText, LinkEnd + 1, Len(LinkText) - LinkEnd) 'FIND CAPTION DELIMITER '---------------------- LinkStart = InStr(1, LinkText, ">") 'TRUNCATE TEXT UP TO AND INCLUDING FIRST CAPTION DELIMITER '--------------------------------------------------------- LinkText = Mid$(LinkText, LinkStart + 1, Len(LinkText) - LinkStart) 'FIND SECOND CAPTION DELIMITER '----------------------------- LinkEnd = InStr(1, LinkText, "</") 'EXTRACT CAPTION AND ASSIGN TO LOCAL VAR '--------------------------------------- LinkCaption = Mid$(LinkText, 1, LinkEnd - 1) 'CLEAN UP VARIOUS FORMATTING ITEMS EMBEDDED IN LINK CAPTION 'AND ASSIGN RESULT TO BACK TO LINK CAPTION VARIABLE '---------------------------------------------------------- TempVar = LinkCaption LinkCaption = CleanUpCaption(TempVar) LinkCaption = TempVar 'REMOVE EXCESS WHITE SPACE FROM LOCAL VARS '----------------------------------------- LinkCaption = Trim$(LinkCaption) LinkURL = Trim$(LinkURL) 'IF NOT LINK CAPTION ASSIGN DEFAULT CAPTION '------------------------------------------ If LinkCaption = "" Then LinkCaption = LinkURL '============================================================================ ' Check for Lycos Search '============================================================================ 'NotLycos = TRUE IF USER RESPONDS TO NEXT LINE THAT HTML DOC IS A LYCOS 'SEARCH. IF IT IS A LYCOS SEARCH THIS LINE CAUSES THE EXECUTION TO SKIP 'THE OTHER TWO CODE SECTIONS. '----------------------------------------------------------------------- If NotLycos = True Then GoTo ParseLinksNextSegment 'ASK USER IF HTML DOC IS RESULTS OF LYCOS SEARCH 'IF TRUE THEN NotLycos IS SET TO TRUE '----------------------------------------------- If Lycos = False And InStr(UCase(LinkURL), "LYCOS") <> 0 Then If MsgBox("There are indications that the document you submited was the result of a Lycos Search. Is this correct?", 4, "Confirm Document Origin") = 6 Then Lycos = True Else NotLycos = True End If End If 'IF LYCOS IS FOUND IN SUBSEQUENTLY PARSED URL'S THEN THE PARSE 'ROUTINE REPEATS ITSELF WITH THE NEXT LINK, FAILING TO ADD THE 'LYCOS LINK TO THE ARRAY '------------------------------------------------------------- If Lycos = True And InStr(UCase(LinkURL), "LYCOS") <> 0 Then GoTo ParseLinksStart End If '============================================================================ ' Build Hotlink Variable Arrays '============================================================================ ParseLinksNextSegment: 'ADD DESCRIPTION TO PROPER LIST '------------------------------ ReDim Preserve SrcLabel(SrcCount) ReDim Preserve SrcURL(SrcCount) SrcLabel(SrcCount) = LinkCaption SrcURL(SrcCount) = LinkURL 'IF DATABASE IS ENABLED THEN 'ADD RECORD TO TEMP TABLE OF DATABASE '------------------------------------ If DisableDatabase = False Then Dt.AddNew Dt("LinkID") = SrcCount Dt("LinkText") = SrcLabel(SrcCount) Dt("LinkURL") = SrcURL(SrcCount) Dt("URLType") = URLType(SrcURL(SrcCount)) '<-- This is a User Function Dt.Update End If 'INCREMENT SrcCount VARIABLE '--------------------------- SrcCount = SrcCount + 1 'RETURN TO BEGINING OF ROUTINE '----------------------------- GoTo ParseLinksStart End Sub Function URLType (URL As String) As String Select Case UCase(Left$(URL, 7)) Case "HTTP://" URLType = "WWW" Exit Function Case "TELNET:" URLType = "TELNET" Exit Function Case "GOPHER:" URLType = "GOPHER" Exit Function Case "MAILTO:" URLType = "MAILTO" Exit Function Case "FILE://" URLType = "FILE" Exit Function End Select If UCase(Left$(URL, 6)) = "FTP://" Then URLType = "FTP" Exit Function End If If UCase(Left$(URL, 5)) = "NEWS:" Then URLType = "NEWS" Exit Function End If URLType = "FILE" End Function