home *** CD-ROM | disk | FTP | other *** search
- Imports System.Text.RegularExpressions
-
- Module MainModule
-
- Sub Main()
- ' Run one of the Textxxxx procedures below by uncommenting only one statement
-
- 'TestSearchReplace()
- 'TestReplace()
- 'TestRegexOptions()
- 'TestRegexOptions2()
- 'TestRegex()
- 'TestReplace()
- 'TestReplaceWithCallback()
- 'TestSharedMethods()
- 'TestGroups()
- 'TestMatch()
- 'TestGroupObject()
- 'TestCaptureObject()
- 'TestCaptureObject2()
- 'TestSearchHyperlinks()
-
- ' These statements are usuful when running inside Visual Studio.NET
- Console.WriteLine("")
- Console.WriteLine(">>> type Enter to terminate the program <<<")
- Console.ReadLine()
- End Sub
-
- ' this procedure is the first example of regex
-
- Sub TestSearchReplace()
- Dim re As New Regex("[aeiou]\d")
- ' This source string contains 3 groups that match the Regex.
- Dim source As String = "a1 = a1 & e2"
- ' Get the collection of matches.
- Dim mc As MatchCollection = re.Matches(source)
- ' How many occurrences did we find?
- Console.WriteLine("Found " & mc.Count & " occurrences")
- ' => Found 3 occurrences.
-
- ' List position of each occurrence
- Dim m As Match
- For Each m In mc
- ' Display text and position of this match.
- Console.WriteLine("'{0}' at index {1}", m.ToString, m.Index)
- Next
-
- ' test case insensitive searches
- source = "ABC Abc abc"
- mc = Regex.Matches(source, "abc")
- Console.WriteLine(mc.Count) ' => 1
- mc = Regex.Matches(source, "abc", RegexOptions.IgnoreCase)
- Console.WriteLine(mc.Count) ' => 3
-
- source = "a1 = a1 & e2"
- ' Search for the "a" character followed by a digit.
- re = New Regex("a\d")
- ' Drop the digit that follows the "a" character
- Console.WriteLine(re.Replace(source, "a")) ' => a = a & e2
-
- ' This code snippet is equivalent to the previous one, but doesn't
- ' instantiate a Regex object.
- Console.WriteLine(Regex.Replace("a1 = a1 & e2", "a\d", "a"))
- End Sub
-
- ' this procedure tests Regex options - searches for variable assignments
-
- Sub TestRegexOptions()
- ' modify this path to point to an existing .VB file
- Dim source As String = FileText("c:\Module1.vb")
- Dim pattern As String = "^\s*[A-Z]\w* ?=.+(?=\r\n)"
-
- ' Get the collection of all matches, in multiline mode.
- Dim mc As MatchCollection = Regex.Matches(source, pattern, _
- RegexOptions.IgnoreCase Or RegexOptions.Multiline)
-
- ' Display all variable assignments
- Dim m As Match
- For Each m In mc
- Console.WriteLine("[{0}] {1}", m.Index, m.ToString)
- Next
- End Sub
-
- ' this procedure tests Regex options and search for variable declarations in VB files.
-
- Sub TestRegexOptions2()
- ' modify this path to point to an existing .VB file
- Dim source As String = FileText("c:\Module1.vb")
- Dim pattern As String = "(?im)^\s+(dim|public|private) [A-Z]\w* As .+(?=\r\n)"
-
- ' Get the collection of all matches, in multiline mode.
- Dim mc As MatchCollection = Regex.Matches(source, pattern, _
- RegexOptions.IgnoreCase Or RegexOptions.Multiline)
-
- ' Display all variable declarations and their offset in source file.
- Dim m As Match
- For Each m In mc
- Console.WriteLine("[{0}] {1}", m.Index, m.ToString)
- Next
- End Sub
-
- ' Reusable function that reads the contents of a text file.
-
- Function FileText(ByVal path As String) As String
- ' Open a file stream and define a stream reader.
- Dim fs As System.IO.FileStream
- Dim sr As System.IO.StreamReader
-
- Try
- fs = New System.IO.FileStream(path, System.IO.FileMode.Open)
- sr = New System.IO.StreamReader(fs)
- ' Read the entire contents of this file.
- FileText = sr.ReadToEnd
- Catch ex As Exception
- Console.WriteLine(ex.Message)
- Finally
- ' Clean-up code.
- sr.Close()
- fs.Close()
- End Try
- End Function
-
- ' this procedure tests various regex constructs
-
- Sub TestRegex()
- ' Finds consecutive groups of space delimited numbers.
- Dim re As New Regex("\G\s*\d+")
- ' Note that search stops at the first non-numeric group.
- Console.WriteLine(re.Matches("12 34 56 ab 78").Count) ' => 3
-
- ' Check whether the input string is a date in the format mm-dd-yy or
- ' mm-dd-yyyy (can optionally use slashes as date separator and
- ' contain leading or trailing white spaces).
- re = New Regex("^\s*\d{1,2}(/|-)\d{1,2}\1(\d{2}|\d{4})\s*$")
- If re.IsMatch(" 12/10/2001 ") Then
- Console.WriteLine("The date is formatted correctly.")
- ' (We don't check whether month and day values are in valid range.)
- Else
- Console.WriteLine("The date is formatted incorrectly.")
- End If
-
- ' Demonstrate the Match method
- ' Search all the dates in a source string.
- Dim source As String = " 12-2-1999 10/23/2001 4/5/2001 "
- re = New Regex("\s*\d{1,2}(/|-)\d{1,2}\1(\d{2}|\d{4})")
-
- ' Find the first match.
- Dim m As Match = re.Match(source)
- ' Enter the following loop only if the search was successful.
- Do While m.Success
- Console.WriteLine(m.ToString.Trim)
- ' Find the next matches, exit if not successful.
- m = m.NextMatch
- Loop
- Console.WriteLine("")
-
- source = "123, 456,,789"
- re = New Regex("\s*,\s*")
-
- Dim s As String
- For Each s In re.Split(source)
- ' Note that the 3rd element is a null string.
- Console.Write(s & "-") ' => 123-456--789
- Next
- End Sub
-
- ' this procedure tests the Replace method
-
- Sub TestReplace()
- Dim source As String = "12-2-99 10/23/2001 4/5/2001 "
- Dim pattern As String = _
- "\b(?<mm>\d{1,2})(?<sep>(/|-))(?<dd>\d{1,2})\k<sep>(?<yy>(\d{2}|\d{4}))\b"
- Dim re As New Regex(pattern)
- Console.WriteLine(re.Replace(source, "${dd}${sep}${mm}${sep}${yy}"))
- ' => 2-12-1999 23/10/2001 5/4/2001
-
- ' Expand all "ms" abbreviations to "Microsoft", regardless of their case.
- source = "Welcome to MS ms Ms MS"
- re = New Regex("\bMS\b", RegexOptions.IgnoreCase)
- ' Replace up to 3 occurrences, starting at 10th character.
- Console.WriteLine(re.Replace(source, "Microsoft", 3, 10))
- ' => Welcome to Microsoft Microsoft Microsoft MS
- End Sub
-
- ' this procedure tests Replace with callback
-
- Sub TestReplaceWithCallback()
- ' This pattern defines two integers separated by a + symbol.
- Dim re As New Regex("\d+\s*\+\s*\d+")
- Dim source As String = "a = 100 + 234: b = 200+345"
- ' Replace all sum operations with their result.
- Console.WriteLine(re.Replace(source, AddressOf DoSum))
- ' => a = 334: b = 545
- End Sub
-
- Function DoSum(ByVal m As Match) As String
- ' Find the position of the "+" symbol.
- Dim i As Integer = m.ToString.IndexOf("+"c)
- ' Parse the two operands.
- Dim n1 As Long = Long.Parse(m.ToString.Substring(0, i))
- Dim n2 As Long = Long.Parse(m.ToString.Substring(i + 1))
- ' Return their sum, as a string.
- Return (n1 + n2).ToString
- End Function
-
- ' this procedure tests Regex shared methods
-
- Sub TestSharedMethods()
- ' \W means "any non-alphanumeric character."
- Dim words() As String = Regex.Split("Split these words", "\W+")
- Dim w As String
- For Each w In words
- Console.WriteLine(w)
- Next
- Console.WriteLine("")
-
- ' test the Escape method
- Console.Write(Regex.Escape("(x)")) ' => \(x\)
- End Sub
-
- ' this procedure tests regex groups
-
- Sub TestGroups()
- ' A regex that searches for variable assignments.
- ' (Note that there are two named and two unnamed groups.)
- Dim re As New Regex("(\s*)(?<name>\w+)\s*=\s*(?<value>\d+)(.*)")
-
- ' get the names of all groups
- Dim s As String
- For Each s In re.GetGroupNames
- Console.Write(s & " ") ' => 0 1 2 name value
- Next
- Console.WriteLine("")
-
- ' the GetGroupNumbers (not really interesting)
- Dim n As Integer
- For Each n In re.GetGroupNumbers
- Console.Write(n.ToString & " ") ' => 0 1 2 3 4
- Next
- Console.WriteLine("")
-
- Console.WriteLine(re.GroupNameFromNumber(2)) ' => 2
- Console.WriteLine(re.GroupNameFromNumber(3)) ' => name
-
- Console.WriteLine(re.GroupNumberFromName("name")) ' => 3
- Console.WriteLine(re.GroupNumberFromName("foo")) ' => -1
- End Sub
-
- ' this procedure tests the Match object
-
- Sub TestMatch()
- Dim source As String = "A sentence with five words"
- Dim re As New Regex("\w+")
-
- ' Find the first match.
- Dim m As Match = re.Match(source)
- ' Enter the following loop only if the search was successful.
- Do While m.Success
- Console.WriteLine("{0} (found at {1})", m.Value, m.Index)
- ' Find the next match; exit if not successful.
- m = m.NextMatch
- Loop
- Console.WriteLine("")
-
- ' looks for sequences of zero or more digits
- re = New Regex("\d*")
- For Each m In re.Matches("1a23bc456de789")
- ' The output from this loop shows that some matches are empty.
- Console.Write(m.Value & ",") ' => 1,,23,,456,,,789
- Next
- Console.WriteLine("")
-
- ' listing groups
- source = "a = 123: b=456"
- re = New Regex("(\s*)(?<name>\w+)\s*=\s*(?<value>\d+)")
- For Each m In re.Matches(source)
- Console.WriteLine("Variable: {0} Value: {1}", m.Groups("name").Value, m.Groups("value").Value)
- ' => Variable: a Value: 123
- ' Variable: b Value: 456
- Next
- Console.WriteLine("")
-
- ' This code produces exactly the same result as the previous snippet.
- For Each m In re.Matches(source)
- Console.WriteLine(m.Result("Variable: ${name} Value: ${value}"))
- Next
- Console.WriteLine("")
- End Sub
-
- ' this procedure tests the Group object
-
- Sub TestGroupObject()
- Dim source As String = "a = 123: b=456"
- Dim re As New Regex("(\s*)(?<name>\w+)\s*=\s*(?<value>\d+)")
-
- Dim m As Match, g As Group
- ' Iterate over all the matches.
- For Each m In re.Matches(source)
- ' Get information on variable name
- g = m.Groups("name")
- Console.Write("Variable '{0}' found at index {1}", g.Value, g.Index)
- ' Get information on variable value.
- Console.WriteLine(", value is {0}", m.Groups("value").Value)
- Next
- End Sub
-
- ' this procedure tests the Capture object
-
- Sub TestCaptureObject()
- Dim source As String = "abc def"
- Dim re As New Regex("(\w)+")
- Dim m As Match, s As String, c As Capture
-
- ' Get the name or numbers of all the groups.
- Dim groups() As String = re.GetGroupNames
-
- ' Iterate over all matches.
- For Each m In re.Matches(source)
- ' Display information on this match.
- Console.WriteLine("Match '{0}' at index {1}", m.Value, m.Index)
- ' Iterate over the groups in each match.
- For Each s In groups
- ' Get a reference to the corresponding group.
- Dim g As Group = m.Groups(s)
- ' Get the capture collection for this group.
- Dim cc As CaptureCollection = g.Captures
- ' Display the number of captures.
- Console.WriteLine(" Found {0} capture(s) for group {1}", cc.Count, s)
- ' Display information on each capture.
- For Each c In cc
- Console.WriteLine(" '{0}' at index {1}", c.Value, c.Index)
- Next
- Next
- Next
- End Sub
-
- ' this procedure tests the Capture object and uses it to
- ' find the mantissa and exponent of numbers in fp notation
-
- Sub TestCaptureObject2()
- Dim source As String = "11.22E33 4.55E6 "
- Dim re As New Regex("((\d+).?(\d*))E(\d+)")
- Dim m As Match, s As String, c As Capture
-
- ' Get the name or numbers of all the groups.
- Dim groups() As String = re.GetGroupNames
-
- ' Iterate over all matches.
- For Each m In re.Matches(source)
- ' Display information on this match.
- Console.WriteLine("Match '{0}' at index {1}", m.Value, m.Index)
- ' Iterate over the groups in each match.
- For Each s In groups
- ' Get a reference to the corresponding group.
- Dim g As Group = m.Groups(s)
- ' Get the capture collection for this group.
- Dim cc As CaptureCollection = g.Captures
- ' Display the number of captures.
- Console.WriteLine(" Found {0} capture(s) for group {1}", cc.Count, s)
- ' Display information on each capture.
- For Each c In cc
- Console.WriteLine(" '{0}' at index {1}", c.Value, c.Index)
- Next
- Next
- Next
- End Sub
-
- ' this procedure shows how to search for HREFs in an html file
-
- ' ensure that the BIN directory contains the test.htm file, or copy
- ' it from the project directory if necessary
-
- Sub TestSearchHyperlinks()
- Dim re As New Regex("<A\s+HREF\s*=\s*""?([^"" >]+)""?>(.+)</A>", RegexOptions.IgnoreCase)
-
- Dim source As String = FileText("test.htm")
-
- Dim m As Match = re.Match(source)
- Do While m.Success
- Console.WriteLine("{0} => {1}", m.Groups(2).Value, m.Groups(1).Value)
- m = m.NextMatch()
- Loop
- End Sub
- End Module
-