..:: MX Studio Fóruns ::..: classe ASP para screen-scraping - ..:: MX Studio Fóruns ::..

Jump to content

Publicidade




Page 1 of 1
  • You cannot start a new topic
  • You cannot reply to this topic

classe ASP para screen-scraping

#1 User is offline   xanburzum 

  • Group: Administrador
  • Posts: 2076
  • Joined: 04-November 08

Posted 26 January 2010 - 07:58 PM

classe ASP para fazer captura de tela

CÓDIGO
<%
'============================================================================
class httpcache
'============================================================================

    private Http
    private mFSO
    private mFile
    private mURL 'URL de origem.
    public cacheFilename 'nome do arquivo do arquivo de cache, se em branco, será auto-nomeado
    public data
    public TTL 'Time To Live in seconds
    public cachePath 'Path to your cache directory.
    public mode 'Either "image" or "text"
    public charSet
    public baseHref
    public table_data

    Dim filets, nowts
    
    private sub Class_Initialize()
        TTL = 0  'Default is no cache. Caching recommended!
        'Set default cache directory here. Include trailing slash!
        cachePath = ""
        mode = "text"
        baseHref = null
        set Http=Server.CreateObject("Msxml2.ServerXMLHTTP")
    end sub
    
    private sub Class_Terminate()
        Set Http = Nothing
    end sub

    private sub GetCacheFilename()
        cacheFilename = md5(mURL)
    end sub

    private sub ReadBinaryFile()
        Const adTypeBinary = 1
          
        'Create Stream object
        Dim BinaryStream
        Set BinaryStream = CreateObject("ADODB.Stream")
          
        'Specify stream type - we want To get binary data.
        BinaryStream.Type = adTypeBinary
          
        'Open the stream
        BinaryStream.Open
          
        'Load the file data from disk To stream object
        BinaryStream.LoadFromFile cachePath & cacheFilename
          
        'Open the stream And get binary data from the object
        data = BinaryStream.Read
    End sub

    private sub ReadTextFile()
        Const adTypeText = 2
          
        'Create Stream object
        Dim BinaryStream
        Set BinaryStream = CreateObject("ADODB.Stream")
          
        'Specify stream type - we want To get binary data.
        BinaryStream.Type = adTypeText
          
        'Specify charset For the source text (unicode) data.
        If Len(charSet) > 0 Then
            BinaryStream.CharSet = charSet
        End If
          
        'Open the stream
        BinaryStream.Open
          
        'Load the file data from disk To stream object
        BinaryStream.LoadFromFile cachePath & cacheFilename
          
        'Open the stream And get binary data from the object
        data = baseHref & BinaryStream.ReadText
    End sub
        
    private sub SaveBinaryData()
        Const adTypeBinary = 1
        Const adSaveCreateOverWrite = 2
          
        'Create Stream object
        Dim BinaryStream
        Set BinaryStream = CreateObject("ADODB.Stream")
          
        'Specify stream type - we want To save binary data.
        BinaryStream.Type = adTypeBinary
          
        'Open the stream And write binary data To the object
        BinaryStream.Open
        BinaryStream.Write data
          
        'Save binary data To disk
        BinaryStream.SaveToFile cachePath & cacheFilename, adSaveCreateOverWrite
    End sub

    private sub SaveTextData()
        Const adTypeText = 2
        Const adSaveCreateOverWrite = 2
          
        'Create Stream object
        Dim BinaryStream
        Set BinaryStream = CreateObject("ADODB.Stream")
          
        'Specify stream type - we want To save text/string data.
        BinaryStream.Type = adTypeText
          
        'Specify charset For the source text (unicode) data.
        If Len(charSet) > 0 Then
            BinaryStream.CharSet = charSet
        End If
          
        'Open the stream And write binary data To the object
        BinaryStream.Open
        BinaryStream.WriteText data
          
        'Save binary data To disk
        On Error Resume Next
        BinaryStream.SaveToFile cachePath & cacheFilename, adSaveCreateOverWrite
        if Err.number <> 0 then
            Response.Write "<div style=""font-size:xx-small;color:#cc0000;"">cache save failed</div>"
        end if
        On Error Goto 0

    End sub
        
    private sub GetmURL()
        'Create an Http object, use any of the four objects
        Dim Http
        ' Set Http = CreateObject("Microsoft.XMLHTTP")
        ' Set Http = CreateObject("MSXML2.ServerXMLHTTP")
        Set Http = CreateObject("WinHttp.WinHttpRequest.5.1")
        ' Set Http = CreateObject("WinHttp.WinHttpRequest")
          
        'Send request To mURL
        Http.Open "GET", mURL, False
        Http.Send
        'Get response data As a string
        if mode = "binary" then
            data = Http.ResponseBody
        else
            'Response.Write "<div style=""font-size:xx-small;color:#cccccc;"">live</div>"
            data = baseHref & Http.ResponseText
        end if
    End sub

    private sub ReadFile()
        if mode = "binary" then
            ReadBinaryFile()
        else
            'Response.Write "<div style=""font-size:xx-small;color:#cccccc;"">cache</div>"
            ReadTextFile()
        end if
    end sub

    private sub SaveData()
        if mode = "binary" then
            SaveBinaryData()
        else
            SaveTextData()
        end if
    end sub

    private sub Fetch()
        if TTL > 0 then
            if IsNull(cacheFilename) or (Len(Trim(cacheFilename)) = 0) then GetCacheFileName()
            set mFSO = Server.CreateObject("Scripting.FileSystemObject")
            if mFSO.FileExists(cachePath & cacheFilename) then
                set mFile = mFSO.GetFile(cachePath & cacheFilename)
                filets = mFile.DateLastModified
                nowts = Date() & " " & Time()
                if CLng(DateDiff("S",filets,nowts)) <= CLng(TTL) then
                    ReadFile()
                else
                    set mFile = Nothing
                    GetmURL()
                    SaveData()
                end if
            else
                GetmURL()
                SaveData()
            end if
            set mFSO = Nothing
        else
            GetmURL()
        end if

    end sub

    public function strip_tags( strToStrip )
        Dim objRegExp
        strToStrip = Trim( strToStrip & "" )
        If Len( strToStrip ) > 0 Then
            Set objRegExp = New RegExp
            objRegExp.IgnoreCase = True
            objRegExp.Global = True
            objRegExp.Pattern= "<[^>]+>"
            strToStrip = objRegExp.Replace(strToStrip, "")
            Set objRegExp = Nothing
        End If
        strip_tags = strToStrip
    end function

    public property let URL(x)
        mURL = x
        Fetch()
    end property
    
    'Methods
    public function img()
        Response.ContentType = "image/jpeg"
        Response.BinaryWrite data
    end function

    '------------------------------------------------------------------------------
    public sub table_extract(needle, needle_within, stripTags)
    ' Generic function to return dicionary object from HTML table data
    ' Output is a dictionary where each item represents a row. Each item is a
    ' dictionary where each item represents a column. (Nested dictionary objects.)
    ' In PHP, we'd just use a simple 2 dimensional array, but alas...
    '    rawHTML: the page source
    '    needle: optional string to start parsing source from
    '    needle_within: 0 = needle is BEFORE table, 1 = needle is within table
    '    stripTags: TRUE/FALSE remove all HTML tags, Default = false
    '------------------------------------------------------------------------------
        Dim rawHTML, pos1, pos2, cnt, cols, rowIdx, colIdx, row, xml
        rawHTML = data
        Set table_data = Server.CreateObject("Scripting.Dictionary")
        
        pos1 = 1
        if (Len(needle) > 0) then
            if needle_within = 1 then
                pos1 = InStr(1, rawHTML, needle, 1)
                if pos1 < 1 then exit sub
                pos1 = InStrRev(rawHTML, "<table", pos1, 1)
            else
                pos1 = InStr(1, rawHTML, needle, 1)
                if pos1 < 1 then exit sub
                pos1 = InStr(pos1, rawHTML, "<table", 1)
            end if
        end if
        if pos1 < 1 then exit sub

        pos2 = InStr(pos1, rawHTML, "</table>", 1)
        if pos2 < 1 then exit sub

        rawHTML = Mid(rawHTML, pos1, pos2-pos1+8) '= just the table
        
        rowIdx = 0

        pos1 = InStr(1, rawHTML, "<TR", 1)
        pos2 = InStr(pos1, rawHTML, "</TR>", 1)

        while (pos1 > 0) and (pos2 > 0)

            rowIdx = rowIdx + 1
            colIdx = 0

            xml = xml & "<ROW id=""" & rowIdx & """>" & vbcrlf
            row = Mid(rawHTML, pos1+3, pos2-pos1-1) 'Extract the row minus the row tags.
            rawHTML = Mid(rawHTML,pos2+4) 'Remove this row from the rawHTML so we don't deal with it again.
            pos1 = InStr(1, row,"<T", 1)

            Set cols = Server.CreateObject("Scripting.Dictionary")

            while (pos1 > 0)
                colIdx = colIdx + 1
                xml = xml & "<COL id=""" & colIdx & """>" & vbcrlf
                pos1 = InStr(1, row, ">", 1)
                pos1 = pos1 + 1
                pos2 = InStr(1, row, "</T", 1)
                if stripTags then
                    cols.Add colIdx, trim(strip_tags(Mid(row, pos1, pos2-pos1)))
                else
                    cols.Add colIdx, trim(Mid(row, pos1, pos2-pos1))
                end if
                row = Mid(row, pos2+5)
                pos1 = InStr(1, row,"<T", 1)
            wend
            
            table_data.Add rowIdx, cols
            
            Set cols = Nothing
            pos1 = InStr(1, rawHTML, "<TR", 1)
            if pos1 > 0 then pos2 = InStr(pos1, rawHTML, "</TR>", 1)

        wend
        
    end sub  'table_extract
    
    '------------------------------------------------------------------------------
    public sub table_dump()
    ' Displays a basic table showing the results of the table_data method.
    ' Useful during development and testing.
    '------------------------------------------------------------------------------
        Dim row, col
        Response.Write "<table border=""1"">" & vbcrlf
        for row = 2 to table_data.Count
            Response.Write vbtab & "<tr>" & vbcrlf
            for col = 1 to table_data.Item(row).Count
                Response.Write vbtab & vbtab & "<td>" & table_data.Item(row).Item(col) & "</td>" & vbcrlf
            next
            Response.Write vbtab & "</tr>" & vbcrlf
        next
        Response.Write "</table>" & vbcrlf
    end sub    

end class  'httpcache
%>

0

Share this topic:


Page 1 of 1
  • You cannot start a new topic
  • You cannot reply to this topic



Publicidade




1 User(s) are reading this topic
0 membro(s), 1 visitante(s) e 0 membros anônimo(s)