Skip to content

Commit

Permalink
修复 #26(抖音app分享的西瓜视频链接无法解析问题) 和 西瓜视频解析问题
Browse files Browse the repository at this point in the history
  • Loading branch information
wujunwei928 committed Feb 19, 2024
1 parent dbde53c commit 32eaccc
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 22 deletions.
28 changes: 26 additions & 2 deletions parser/douyin.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ func (d douYin) parseVideoID(videoId string) (*VideoParseInfo, error) {
res, err := client.R().
SetHeader(HttpHeaderUserAgent, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36").
SetHeader(HttpHeaderReferer, "https://www.douyin.com/").
SetHeader(HttpHeaderCookie, fmt.Sprintf(`msToken=%s;odin_tt=324fb4ea4a89c0c05827e18a1ed9cf9bf8a17f7705fcc793fec935b637867e2a5a9b8168c885554d029919117a18ba69; ttwid=1%7CWBuxH_bhbuTENNtACXoesI5QHV2Dt9-vkMGVHSRRbgY%7C1677118712%7C1d87ba1ea2cdf05d80204aea2e1036451dae638e7765b8a4d59d87fa05dd39ff; bd_ticket_guard_client_data=eyJiZC10aWNrZXQtZ3VhcmQtdmVyc2lvbiI6MiwiYmQtdGlja2V0LWd1YXJkLWNsaWVudC1jc3IiOiItLS0tLUJFR0lOIENFUlRJRklDQVRFIFJFUVVFU1QtLS0tLVxyXG5NSUlCRFRDQnRRSUJBREFuTVFzd0NRWURWUVFHRXdKRFRqRVlNQllHQTFVRUF3d1BZbVJmZEdsamEyVjBYMmQxXHJcbllYSmtNRmt3RXdZSEtvWkl6ajBDQVFZSUtvWkl6ajBEQVFjRFFnQUVKUDZzbjNLRlFBNUROSEcyK2F4bXAwNG5cclxud1hBSTZDU1IyZW1sVUE5QTZ4aGQzbVlPUlI4NVRLZ2tXd1FJSmp3Nyszdnc0Z2NNRG5iOTRoS3MvSjFJc3FBc1xyXG5NQ29HQ1NxR1NJYjNEUUVKRGpFZE1Cc3dHUVlEVlIwUkJCSXdFSUlPZDNkM0xtUnZkWGxwYmk1amIyMHdDZ1lJXHJcbktvWkl6ajBFQXdJRFJ3QXdSQUlnVmJkWTI0c0RYS0c0S2h3WlBmOHpxVDRBU0ROamNUb2FFRi9MQnd2QS8xSUNcclxuSURiVmZCUk1PQVB5cWJkcytld1QwSDZqdDg1czZZTVNVZEo5Z2dmOWlmeTBcclxuLS0tLS1FTkQgQ0VSVElGSUNBVEUgUkVRVUVTVC0tLS0tXHJcbiJ9`, d.randSeq(107))).
SetHeader(HttpHeaderCookie, `msToken=`+d.randSeq(107)+`;odin_tt=324fb4ea4a89c0c05827e18a1ed9cf9bf8a17f7705fcc793fec935b637867e2a5a9b8168c885554d029919117a18ba69; ttwid=1%7CWBuxH_bhbuTENNtACXoesI5QHV2Dt9-vkMGVHSRRbgY%7C1677118712%7C1d87ba1ea2cdf05d80204aea2e1036451dae638e7765b8a4d59d87fa05dd39ff; bd_ticket_guard_client_data=eyJiZC10aWNrZXQtZ3VhcmQtdmVyc2lvbiI6MiwiYmQtdGlja2V0LWd1YXJkLWNsaWVudC1jc3IiOiItLS0tLUJFR0lOIENFUlRJRklDQVRFIFJFUVVFU1QtLS0tLVxyXG5NSUlCRFRDQnRRSUJBREFuTVFzd0NRWURWUVFHRXdKRFRqRVlNQllHQTFVRUF3d1BZbVJmZEdsamEyVjBYMmQxXHJcbllYSmtNRmt3RXdZSEtvWkl6ajBDQVFZSUtvWkl6ajBEQVFjRFFnQUVKUDZzbjNLRlFBNUROSEcyK2F4bXAwNG5cclxud1hBSTZDU1IyZW1sVUE5QTZ4aGQzbVlPUlI4NVRLZ2tXd1FJSmp3Nyszdnc0Z2NNRG5iOTRoS3MvSjFJc3FBc1xyXG5NQ29HQ1NxR1NJYjNEUUVKRGpFZE1Cc3dHUVlEVlIwUkJCSXdFSUlPZDNkM0xtUnZkWGxwYmk1amIyMHdDZ1lJXHJcbktvWkl6ajBFQXdJRFJ3QXdSQUlnVmJkWTI0c0RYS0c0S2h3WlBmOHpxVDRBU0ROamNUb2FFRi9MQnd2QS8xSUNcclxuSURiVmZCUk1PQVB5cWJkcytld1QwSDZqdDg1czZZTVNVZEo5Z2dmOWlmeTBcclxuLS0tLS1FTkQgQ0VSVElGSUNBVEUgUkVRVUVTVC0tLS0tXHJcbiJ9`).
Get(reqUrl)
if err != nil {
return nil, err
Expand Down Expand Up @@ -68,14 +68,38 @@ func (d douYin) parseShareUrl(shareUrl string) (*VideoParseInfo, error) {
return nil, err
}

videoId := strings.ReplaceAll(strings.Trim(locationRes.Path, "/"), "share/video/", "")
videoId, err := d.parseVideoIdFromPath(locationRes.Path)
if err != nil {
return nil, err
}
if len(videoId) <= 0 {
return nil, errors.New("parse video id from share url fail")
}

// 西瓜视频解析方式不一样
if strings.Contains(locationRes.Host, "ixigua.com") {
return xiGua{}.parseVideoID(videoId)
}

return d.parseVideoID(videoId)
}

func (d douYin) parseVideoIdFromPath(urlPath string) (string, error) {
if len(urlPath) <= 0 {
return "", errors.New("url path is empty")
}

urlPath = strings.Trim(urlPath, "/")
urlSplit := strings.Split(urlPath, "/")

// 获取最后一个元素
if len(urlSplit) > 0 {
return urlSplit[len(urlSplit)-1], nil
}

return "", errors.New("parse video id from path fail")
}

func (d douYin) getRedirectUrl(videoInfo *VideoParseInfo) {
client := resty.New()
client.SetRedirectPolicy(resty.NoRedirectPolicy())
Expand Down
34 changes: 34 additions & 0 deletions parser/douyin_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package parser

import (
"testing"
)

func Test_douYin_parseIdFromPath(t *testing.T) {
type args struct {
path string
}
tests := []struct {
name string
args args
want string
wantErr bool
}{
{"抖音视频", args{"/share/video/7329354490828623130/"}, "7329354490828623130", false},
{"西瓜视频", args{"/douyin/share/video/7144194760184594977"}, "7144194760184594977", false},
{"异常视频", args{""}, "", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
d := douYin{}
got, err := d.parseVideoIdFromPath(tt.args.path)
if (err != nil) != tt.wantErr {
t.Errorf("parseVideoIdFromPath() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("parseVideoIdFromPath() got = %v, want %v", got, tt.want)
}
})
}
}
36 changes: 16 additions & 20 deletions parser/xigua.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ package parser

import (
"bytes"
"encoding/base64"
"errors"
"net/url"
"strings"

"github.com/tidwall/gjson"
Expand Down Expand Up @@ -38,7 +38,7 @@ func (x xiGua) parseShareUrl(shareUrl string) (*VideoParseInfo, error) {
}

func (x xiGua) parseVideoID(videoId string) (*VideoParseInfo, error) {
reqUrl := "https://www.ixigua.com/" + videoId
reqUrl := "https://m.ixigua.com/douyin/share/video/" + videoId + "?aweme_type=107&schema_type=1&utm_source=copy&utm_campaign=client_share&utm_medium=android&app=aweme"
headers := map[string]string{
HttpHeaderUserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36",
HttpHeaderCookie: "MONITOR_WEB_ID=7892c49b-296e-4499-8704-e47c1b150c18; ixigua-a-s=1; ttcid=af99669b6304453480454f150701d5c226; BD_REF=1; __ac_nonce=060d88ff000a75e8d17eb; __ac_signature=_02B4Z6wo00f01kX9ZpgAAIDAKIBBQUIPYT5F2WIAAPG2ad; ttwid=1%7CcIsVF_3vqSIk4XErhPB0H2VaTxT0tdsTMRbMjrJOPN8%7C1624806049%7C08ce7dd6f7d20506a41ba0a331ef96a6505d96731e6ad9f6c8c709f53f227ab1",
Expand All @@ -56,28 +56,24 @@ func (x xiGua) parseVideoID(videoId string) (*VideoParseInfo, error) {
if err != nil {
return nil, err
}
ssrData := doc.Find("#SSR_HYDRATED_DATA").Text()
ssrJson := strings.ReplaceAll(ssrData, "window._SSR_HYDRATED_DATA=", "")
ssrJson = strings.ReplaceAll(ssrJson, "undefined", "null")

videoData := gjson.Get(ssrJson, "anyVideo.gidInformation.packerData.video")
userId := videoData.Get("user_info.user_id").String()
userName := videoData.Get("user_info.name").String()
userAvatar := videoData.Get("user_info.avatar_url").String()
videoDesc := videoData.Get("title").String()
videoAddrBase64 := videoData.Get("videoResource.dash.dynamic_video.dynamic_video_list.2.main_url").String()
musicAddrBase64 := videoData.Get("videoResource.dash.dynamic_video.dynamic_audio_list.0.main_url").String()
if len(videoAddrBase64) <= 0 {
// 部分视频返回数据videoResource.dash为空, 改用 videoResource.normal 数据
videoAddrBase64 = videoData.Get("videoResource.normal.video_list.video_1.main_url").String()
ssrData := doc.Find("#RENDER_DATA").Text()
ssrJson, err := url.QueryUnescape(ssrData)
if err != nil {
return nil, err
}
videoAddr, _ := base64.StdEncoding.DecodeString(videoAddrBase64)
musicAddr, _ := base64.StdEncoding.DecodeString(musicAddrBase64)

videoData := gjson.Get(ssrJson, "app.videoInfoRes.item_list.0")
userId := videoData.Get("author.user_id").String()
userName := videoData.Get("author.nickname").String()
userAvatar := videoData.Get("author.avatar_thumb.url_list.0").String()
videoDesc := videoData.Get("desc").String()
videoAddr := videoData.Get("video.play_addr.url_list.0").String()
coverUrl := videoData.Get("video.cover.url_list.0").String()

parseRes := &VideoParseInfo{
Title: videoDesc,
VideoUrl: string(videoAddr),
MusicUrl: string(musicAddr),
VideoUrl: videoAddr,
CoverUrl: coverUrl,
}
parseRes.Author.Uid = userId
parseRes.Author.Name = userName
Expand Down

0 comments on commit 32eaccc

Please sign in to comment.