GetTextAndImageCreateExamPaper

Private Declare Function URLDownloadToFile Lib "urlmon" Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long
Private Declare Function DeleteUrlCacheEntry Lib "wininet" Alias "DeleteUrlCacheEntryA" (ByVal lpszUrlName As String) As Long
‘下载网络图片
Sub DownloadImageName(ByVal ImageURL As String, ByVal ImagePath As String)
    Dim lngRetVal As Long
    lngRetVal = URLDownloadToFile(0, ImageURL, ImagePath, 0, 0)
    If lngRetVal = 0 Then
        DeleteUrlCacheEntry ImageURL  ‘清除缓存
        ‘MsgBox "成功"
    Else
        ‘MsgBox "失败"
    End If
End Sub

Sub OneKeyCreateExam()
    Dim ImgNames As Variant
    Dim strText As String
    Dim i As Long, n As Long, m As Long
    Dim OneTagP As Object
    Dim OneTagA As Object
    Dim TagP As Object
    Dim PosText As String
    Dim Arr() As String
    ReDim Arr(1 To 1) As String
    Dim Brr() As String
    ReDim Brr(1 To 1)
    Dim ImageURL As String
    Dim FilePath As String
    Dim FileName As String

    Dim dContent As Object
    Set dContent = CreateObject("Scripting.Dictionary")
    Dim dImageName As Object
    Set dImageName = CreateObject("Scripting.Dictionary")

    Dim StartTime As Variant    ‘开始时间
    Dim UsedTime As Variant    ‘使用时间
    StartTime = VBA.Timer    ‘记录开始时间

    AppSettings
    On Error GoTo ErrHandler

    ‘设置URL,访问网页获取网页源码
    URL = ActiveSheet.Range("A2").Text
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", URL, False
        .Send
        strText = .responsetext
    End With

    ‘创建网页文件
    With CreateObject("htmlfile")
        .write strText
        ‘获取标题
        FileName = .getElementsByTagName("h2")(0).innerhtml
        Debug.Print FileName

        Application.StatusBar = ">>>>>>正在下载图片>>>>>>"

        i = 0    ‘初始化序号

        For Each OneTagA In .getElementsByTagName("a")    ‘循环所有A标签
            If OneTagA.HasChildNodes Then
                If OneTagA.href Like "http://photo.blog.sina.com.cn/showpic.html*" Then

                    ‘获取之前的一个段落
                    Set TagP = OneTagA.PreviousSibling
                    Do While TagP.tagName <> "P"
                        Set TagP = TagP.PreviousSibling
                    Loop

                    i = i + 1

                    ‘文字内容提取
                    PosText = TagP.innerhtml
                    PosText = RegReplace(PosText, "<.*?>")
                    PosText = Replace(PosText, " ", "")

                    ‘获取图片URL
                    ImageURL = OneTagA.FirstChild.getAttribute("real_src")
                    ImageName = "Image" & i & ".jpg"
                    ImagePath = ThisWorkbook.Path & Application.PathSeparator & ImageName
                    DownloadImageName ImageURL, ImagePath    ‘下载图片

                    ‘获取图片
                    If dImageName.Exists(PosText) = False Then
                        dImageName(PosText) = ImageName
                    Else
                        dImageName(PosText) = dImageName(PosText) & "|" & ImageName
                    End If

                End If
            End If
        Next

        Application.StatusBar = ">>>>>>正在获取文本>>>>>>"

        i = 0    ‘初始化序号
        n = 0    ‘初始化序号
        For Each OneTagP In .getElementsByTagName("p")
            ‘文字内容提取
            PosText = OneTagP.innerhtml
            PosText = RegReplace(PosText, "<.*?>")
            PosText = Replace(PosText, " ", "")

            i = i + 1

            If PosText = "喜欢" Then Exit For    ‘提前结束循环
            If i > 20 Then    ‘开始记录试卷内容
            If Len(PosText) > 0 Then    ‘保留非空数组
            n = n + 1
            ReDim Preserve Arr(1 To n)
            Arr(n) = PosText    ‘存入数组
            ‘Debug.Print n; "               "; PosText
            ‘dContent(PosText) = n
        End If
    End If
Next
End With

Application.StatusBar = ">>>>>>正在创建Word文档>>>>>>"

FilePath = ThisWorkbook.Path & "\" & FileName & ".doc"
On Error Resume Next
Kill FilePath
On Error GoTo 0

Dim wdApp As Object
Dim Doc As Object
Set wdApp = CreateObject("Word.Application")
Set Doc = wdApp.documents.Add()

Doc.Activate

For i = 1 To UBound(Arr)

    PosText = Arr(i)

    wdApp.Selection.TypeText Text:=PosText
    wdApp.Selection.TypeParagraph

    If dImageName.Exists(PosText) Then    ‘如果含有图片
    If InStr(dImageName(PosText), "|") = 0 Then    ‘如果只含有一张图片
    ImageName = dImageName(PosText)
    ImagePath = ThisWorkbook.Path & Application.PathSeparator & ImageName
    wdApp.Selection.InlineShapes.AddPicture FileName:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
    wdApp.Selection.TypeParagraph
Else
    ImgNames = Split(dImageName(PosText), "|")
    For n = LBound(ImgNames) To UBound(ImgNames) Step 1
        ImageName = ImgNames(n)
        ImagePath = ThisWorkbook.Path & Application.PathSeparator & ImageName
        wdApp.Selection.InlineShapes.AddPicture FileName:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
        wdApp.Selection.TypeParagraph
    Next n
End If
End If

Next i

Doc.SaveAs FilePath
Doc.Close
wdApp.Quit

Application.StatusBar = ">>>>>>正在删除Image图片>>>>>>"

For Each Key In dImageName.keys
    If InStr(dImageName(Key), "|") = 0 Then
        ImageName = dImageName(Key)
        ImagePath = ThisWorkbook.Path & Application.PathSeparator & ImageName
        Kill ImagePath
    Else
        ImgNames = Split(dImageName(Key), "|")
        For n = LBound(ImgNames) To UBound(ImgNames) Step 1
            ImageName = ImgNames(n)
            ImagePath = ThisWorkbook.Path & Application.PathSeparator & ImageName
            Kill ImagePath
        Next n
    End If
Next Key

UsedTime = VBA.Timer - StartTime
MsgBox "本次运行耗时:" & Format(UsedTime, "#0.0000秒")

ErrorExit:
Set wdApp = Nothing
Set Doc = Nothing

AppSettings False
Exit Sub
ErrHandler:
If Err.Number <> 0 Then
    MsgBox Err.Description & "!", vbCritical, "QQ 84857038"
    Debug.Print Err.Description
    Err.Clear
    Resume ErrorExit
End If

End Sub
Public Function RegReplace(ByVal OrgText As String, ByVal Pattern As String, Optional RepStr As String = "") As String
‘传递参数 :原字符串, 匹配模式 ,替换字符
    Dim Regex As Object
    Dim newText As String
    Set Regex = CreateObject("VBScript.RegExp")
    With Regex
        .Global = True
        .Pattern = Pattern
    End With
    newText = Regex.Replace(OrgText, RepStr)
    RegReplace = newText
    Set Regex = Nothing
End Function
Public Sub AppSettings(Optional IsStart As Boolean = True)
    If IsStart Then
        Application.ScreenUpdating = False
        Application.DisplayAlerts = False
        Application.Calculation = xlCalculationManual
        Application.StatusBar = ">>>>>>>>Macro Is Running>>>>>>>>"
    Else
        Application.ScreenUpdating = True
        Application.DisplayAlerts = True
        Application.Calculation = xlCalculationAutomatic
        Application.StatusBar = False
    End If
End Sub

  

时间: 2024-10-13 23:29:18

GetTextAndImageCreateExamPaper的相关文章