Skip to main content

How to Create a Proxy Grabber/Scraper in Visual Basic

Screenshot
Body
Introduction: Welcome to my tutorial on how to create a proxy ip:port grabber. Steps of Creation: Step 1: First create a form with one button, this will allow the user to select a save location and begin the process. We also want to import a few things and create a global string of the link in which we want to extract the proxy information from...
  1. Imports System.Text.RegularExpressions
  2. Imports System.Net
  3. Imports System.IO
  1. Dim link As String = "http://free-proxy-list.net/uk-proxy.html"
Step 2: Now we want to make a couple of function, we will use these to extract strings from bigger strings later on.
  1. Private Function GetBetween(ByVal Source As String, ByVal Str1 As String, ByVal Str2 As String, Optional ByVal Index As Integer = 0) As String
  2. Return Regex.Split(Regex.Split(Source, Str1)(Index + 1), Str2)(0)
  3. End Function
  4. Private Function GetBetweenAll(ByVal Source As String, ByVal Str1 As String, ByVal Str2 As String) As String()
  5. Dim Results, T As New List(Of String)
  6. T.AddRange(Regex.Split(Source, Str1))
  7. T.RemoveAt(0)
  8. For Each I As String In T
  9. Results.Add(Regex.Split(I, Str2)(0))
  10. Next
  11. Return Results.ToArray
  12. End Function
Step 3: For the button click event we want to first let the user select a saving text file directory. Then check if the path is not nothing/null/empty.
  1. Dim fo As New SaveFileDialog()
  2. fo.Filter = "Text Files|*.txt"
  3. fo.FilterIndex = 1
  4. fo.ShowDialog()
  5. If (Not fo.FileName = Nothing) Then
  6. End If
Step 4: Within the if statement we want to get the source code of the given page link and extract the information. Once we have extracted the information we write it line by line in to the text file location.
  1. Dim r As HttpWebRequest = HttpWebRequest.Create(link)
  2. r.UserAgent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.69 Safari/537.36"
  3. r.KeepAlive = True
  4. Dim re As HttpWebResponse = r.GetResponse()
  5. Dim src As String = New StreamReader(re.GetResponseStream()).ReadToEnd()
  6. Dim rows As String() = GetBetweenAll(src, "<tr>", "</tr>")
  7. Dim tds As New List(Of String)
  8. Dim dones As New List(Of String)
  9. For Each s As String In rows
  10. If (Not s = rows(0) And s.Contains("<td>") And s.Contains("</td>")) Then
  11. Dim td As String() = GetBetweenAll(s, "<td>", "</td>")
  12. Dim ip As String = td(0)
  13. Dim port As String = td(1)
  14. dones.Add(ip & ":" & port)
  15. End If
  16. Next
  17. Using sw As New StreamWriter(fo.FileName)
  18. For Each s As String In dones
  19. sw.WriteLine(s)
  20. Next
  21. End Using
  22. MsgBox("Finished and wrote!")
Project Complete! Below is the full source code and download to the project files.
  1. Imports System.Text.RegularExpressions
  2. Imports System.Net
  3. Imports System.IO
  4. Public Class Form1
  5. Dim link As String = "http://free-proxy-list.net/uk-proxy.html"
  6. Private Function GetBetween(ByVal Source As String, ByVal Str1 As String, ByVal Str2 As String, Optional ByVal Index As Integer = 0) As String
  7. Return Regex.Split(Regex.Split(Source, Str1)(Index + 1), Str2)(0)
  8. End Function
  9. Private Function GetBetweenAll(ByVal Source As String, ByVal Str1 As String, ByVal Str2 As String) As String()
  10. Dim Results, T As New List(Of String)
  11. T.AddRange(Regex.Split(Source, Str1))
  12. T.RemoveAt(0)
  13. For Each I As String In T
  14. Results.Add(Regex.Split(I, Str2)(0))
  15. Next
  16. Return Results.ToArray
  17. End Function
  18. Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
  19. Dim fo As New SaveFileDialog()
  20. fo.Filter = "Text Files|*.txt"
  21. fo.FilterIndex = 1
  22. fo.ShowDialog()
  23. If (Not fo.FileName = Nothing) Then
  24. Dim r As HttpWebRequest = HttpWebRequest.Create(link)
  25. r.UserAgent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.69 Safari/537.36"
  26. r.KeepAlive = True
  27. Dim re As HttpWebResponse = r.GetResponse()
  28. Dim src As String = New StreamReader(re.GetResponseStream()).ReadToEnd()
  29. Dim rows As String() = GetBetweenAll(src, "<tr>", "</tr>")
  30. Dim tds As New List(Of String)
  31. Dim dones As New List(Of String)
  32. For Each s As String In rows
  33. If (Not s = rows(0) And s.Contains("<td>") And s.Contains("</td>")) Then
  34. Dim td As String() = GetBetweenAll(s, "<td>", "</td>")
  35. Dim ip As String = td(0)
  36. Dim port As String = td(1)
  37. dones.Add(ip & ":" & port)
  38. End If
  39. Next
  40. Using sw As New StreamWriter(fo.FileName)
  41. For Each s As String In dones
  42. sw.WriteLine(s)
  43. Next
  44. End Using
  45. MsgBox("Finished and wrote!")
  46. End If
  47. End Sub
  48. End Class

Add new comment