更新清理oss老文件逻辑

1.更新大图压缩逻辑。
1.增加图片信息，预览、下载原图
2 changed files with 147 additions and 60 deletions
--- a/pixiv/config.json.example
+++ b/pixiv/config.json.example
@ -7,8 +7,8 @@
  "content": "Pixiv排行榜类型，有效值：illust/ugoira/manga/,分别是插画/动图/漫画",
  "rule":{"匹配需要转义的内容":"转换后的值","&amp;":"&","&lt;":"<"},
  "sleep":{
-    "ready":0,
-    "request":10
+    "text":10,
+    "image":60
  },
  "aliyun_oss":{
    "Host":"Endpoint（地域节点）",
@ -17,13 +17,14 @@
    "AccessKeyId":"OpenAPI AccessKey ID",
    "AccessKeySecret":"OpenAPI AccessKey Secret",
    "db":"文件上传成功记录数据库",
-    "imageParam":"阿里云图片处理参数 比如：?x-oss-process=image/format,webp/resize,w_2560,h_2560"
+    "imageParam":"阿里云图片处理参数 比如：?x-oss-process=image/format,webp/resize,w_2560,h_2560",
+    "deleteApi":"删除老图片的接口地址"
  },
-  "compress":{
-    "Host":"图片压缩服务器地址",
-    "Port":"端口",
-    "User":"用户",
-    "Path":"压缩图片输出绝对路径"
+  "convertio":{
+    "apikey":"https://developers.convertio.co/zh/ 申请的API密钥",
  },
-  "CronitorKey":"cronitor api key"
+  "Cronitor":{
+    "API_KEY":"https://cronitor.io/ 申请的Key",
+    "JOB_NAME":"定时任务名称"
+  }
 }
--- a/pixiv/pixiv.sh
+++ b/pixiv/pixiv.sh
@ -29,11 +29,9 @@ AccessKeySecret=`cat $config_file | jq -r .aliyun_oss.AccessKeySecret`
 db_file=`cat $config_file | jq -r .aliyun_oss.db`
 db_file_err=$db_file.err
 imageParam=`cat $config_file | jq -r .aliyun_oss.imageParam`
+deleteApi=`cat $config_file | jq -r .aliyun_oss.deleteApi`

-CompressHost=`cat $config_file | jq -r .compress.Host`
-CompressPort=`cat $config_file | jq -r .compress.Port`
-CompressUser=`cat $config_file | jq -r .compress.User`
-CompressPath=`cat $config_file | jq -r .compress.Path`
+convertio_apikey=`cat $config_file | jq -r .convertio.apikey`

 CronitorKey=`cat $config_file | jq -r .Cronitor.API_KEY`
 CronitorJobName=`cat $config_file | jq -r .Cronitor.JOB_NAME`
@ -44,12 +42,10 @@ then
   touch $db_file
 fi

-function upload(){
+function PutObject(){
        VERB="PUT"
        file=$1
        Content_MD5=""
-        Content_Type="application/x-www-form-urlencoded"
-        Content_Type="text/plain"
        Content_Type=`file -b --mime-type $file`
        Date=`TZ=GMT env LANG=en_US.UTF-8 date +'%a, %d %b %Y %H:%M:%S GMT'`
        CanonicalizedOSSHeaders="x-oss-object-acl:public-read\n"
@ -57,7 +53,7 @@ function upload(){
        stringToSign="$VERB\n$Content_MD5\n$Content_Type\n$Date\n$CanonicalizedOSSHeaders$CanonicalizedResource"
        Signature=`echo -en $stringToSign | openssl sha1 -hmac $AccessKeySecret -binary | base64`
        Authorization="OSS $AccessKeyId:$Signature"
-        http_code=`curl -v -w "%{http_code}" -X PUT -H "HOST:$bucketname.$Host" -H "x-oss-object-acl:public-read"  -H "Date:$Date" -H "Content-Type:$Content_Type" -H "Authorization:$Authorization" --data-binary "@$file" "https://$bucketname.$Host/$file"`
+        http_code=`curl -v -w "%{http_code}" -X $VERB -H "HOST:$bucketname.$Host" -H "x-oss-object-acl:public-read"  -H "Date:$Date" -H "Content-Type:$Content_Type" -H "Authorization:$Authorization" --data-binary "@$file" "https://$bucketname.$Host/$file"`
        if [ $http_code -eq "200" ]
        then
           echo $file>>$db_file     
@ -67,6 +63,40 @@ function upload(){
        fi
 }

+function DeleteMultipleObjects(){
+      temp_file=DeleteMultipleObjects.json
+      while true
+      do
+         curl -v -d "AccessKeyId=$AccessKeyId&AccessKeySecret=$AccessKeySecret&host=$Host&bucketname=$bucketname&max=50"  $deleteApi > $temp_file
+         cat $temp_file
+         count=`cat $temp_file|jq -r .count`
+         echo "剩余需要清理文件个数：$count"
+         if [ $count == 0 ]
+         then
+            echo "delete finish"
+            break
+         fi
+         for item in `cat $temp_file|jq -r  ".delete_files[]"`
+         do
+            sed -i "/$item/g" $db_file
+         done
+         sed -i "/^\s*$/d" $db_file
+         sleep 1m
+      done
+      rm -f $temp_file
+}
+
+
+function fileSizeStr(){
+     fileSize=$1
+     if [[ $fileSize -gt 1048576 ]]
+     then
+        echo "`echo "scale=2;$fileSize/1048576"|bc`MB"
+     else
+        echo "$((fileSize/1024))KB"
+     fi
+}
+
 basePath=`cat $config_file|jq -r .basePath`
 if [ ! -f $basePath ]
 then
@ -83,13 +113,14 @@ rank_url="https://www.pixiv.net/ranking.php?mode=$mode&content=$content&p=1&form
 today=`date "+%Y-%m-%d"`
 _today=`date "+%Y%m%d"`
 rank_json=$today.json
-commands_file=$today.sh
+
 anonfiles_token=`cat $config_file | jq .anonfilesToken`
 rule=`cat $config_file | jq .rule|jq to_entries|jq 'map("sed -e \"s/\\\\"+.key+"/\\\\"+.value+"/g\"")'|jq -r  '.[]'|sed ':a;N;s/\n/|/;t a;'`
-touch $commands_file && chmod +x $commands_file
-sleep `cat $config_file | jq .sleep.ready`
-curl -v -d chat_id=$chat_id -d parse_mode=HTML  -d text="Pixiv排行榜已更新，30秒后开始处理<a href='https://www.pixiv.net/ranking.php?mode%3D$mode%26content%3D$content'>$today日榜</a>数据。#date$_today%0A%0A<strong>排名是什么？</strong>%0A排名是以pixiv上所有公开作品为对象的统计以及排名。%0A毎日0:00～23时59分59秒的阅览树・「赞！」数等为排名的依据，期结果由pixiv独自的算法「pixiv rank β」决定。统计结果于每日中午12:00公开。%0A<a href='https://www.pixiv.help/hc/zh-cn/categories/360001065093-%E6%9C%89%E5%85%B3%E6%8E%92%E8%A1%8C%E6%A6%9C'>有关排行榜</a>" $baseApi/sendMessage

+sleepText=`cat $config_file | jq .sleep.text`
+sleepImage=`cat $config_file | jq .sleep.image`
+curl -v -d chat_id=$chat_id -d parse_mode=HTML  -d text="Pixiv排行榜已更新，$sleepText秒后开始处理<a href='https://www.pixiv.net/ranking.php?mode%3D$mode%26content%3D$content'>$today日榜</a>数据。#date$_today #日期$_today %0A%0A<strong>排名是什么？</strong>%0A排名是以pixiv上所有公开作品为对象的统计以及排名。%0A毎日0:00～23时59分59秒的阅览树・「赞！」数等为排名的依据，期结果由pixiv独自的算法「pixiv rank β」决定。统计结果于每日中午12:00公开。%0A<a href='https://www.pixiv.help/hc/zh-cn/categories/360001065093-%E6%9C%89%E5%85%B3%E6%8E%92%E8%A1%8C%E6%A6%9C'>有关排行榜</a>" $baseApi/sendMessage
+sleep $sleepText
 if [ ! -f $rank_json ]
 then
        echo "get data from $rank_url"
@ -106,6 +137,13 @@ maxFileSize=20971520
 maxFileSize_M="$((maxFileSize/1024/1024))M"
 start_rank=''
 end_rank=''
+maxLikeCount=0
+maxLikeCountPid=0
+maxBookmarkCount=0
+maxBookmarkCountPid=0
+maxViewCount=0
+maxViewCountPid=0
+maxRatio=10
 for index in `seq 1 $length`
 do
        index=$((index-1))
@ -124,35 +162,49 @@ do
        fi
        if [ $yes_rank -eq 0 ]
        then
-          rank_info="\#rank$rank \#首次登场"
+          rank_info="\#排名$rank \#rank$rank \#首次登场"
        else
-          rank_info="\#rank$rank 之前 \#rank$yes_rank"   
+          rank_info="\#排名$rank \#rank$rank 之前 \#排名$yes_rank \#rank$yes_rank"   
        fi
        echo "pid=$pid,artworkLink=$artworkLink,rank_info=$rank_info"
        png_html_file=$pid.html
        if [ ! -f $png_html_file ]
        then
-                sleep 1
                echo "get data from $artworkLink"
                curl -v $artworkLink >$png_html_file
        fi
        json_file=$pid.json
        if [ ! -f $json_file ]
        then
-                egrep -o "content='{\"timestamp.*].{3}" $png_html_file | sed -e "s/content='//" >$json_file
+                egrep -o "content='{\"timestamp.*" $png_html_file|sed -e "s/content='//"|sed -e "s/..$//" >$json_file
        fi
        pageCount=`jq --arg pid $pid '.illust[$pid].pageCount' $json_file`
        original_url=`jq -r --arg pid $pid '.illust[$pid].urls.original' $json_file`
        small_url=`jq -r --arg pid $pid '.illust[$pid].urls.small' $json_file`
-        title=`jq -r --arg pid $pid '.illust[$pid].title' $json_file|sed -e 's/\"/\\\"/g'`
+        title=`jq -r --arg pid $pid '.illust[$pid].title' $json_file|sed -e 's/\"/\\\"/g'|sed -e 's/&lt;//g'|sed -e 's/&gt;//g'`
        title=`bash -c "echo '$title'|$rule"`
        description=`jq -r --arg pid $pid '.illust[$pid].description' $json_file`
        userName=`jq -r --arg pid $pid '.illust[$pid].userName' $json_file|sed -e 's/\"/\\\"/g'`
        userName=`bash -c "echo '$userName'|$rule"`
        userId=`jq -r --arg pid $pid '.illust[$pid].userId' $json_file`
        likeCount=`jq --arg pid $pid '.illust[$pid].likeCount' $json_file`
+        if [ $likeCount -gt $maxLikeCount ]
+        then
+           maxLikeCount=$likeCount
+           maxLikeCountPid=$pid
+        fi
        bookmarkCount=`jq --arg pid $pid '.illust[$pid].bookmarkCount' $json_file`
+        if [ $bookmarkCount -gt $maxBookmarkCount ]
+        then
+           maxBookmarkCount=$likeCount
+           maxBookmarkCountPid=$pid
+        fi
        viewCount=`jq --arg pid $pid '.illust[$pid].viewCount' $json_file`
+        if [ $viewCount -gt $maxViewCount ]
+        then
+           maxViewCount=$likeCount
+           maxViewCountPid=$pid
+        fi
        tag=`jq -r  --arg pid $pid '.illust[$pid].tags.tags[].tag' $json_file|sed -e 's/^/\\#/g'|sed ':a;N;s/\n/ /;t a;'`
        tag=`bash -c "echo '$tag'|$rule"`
        
@ -167,8 +219,8 @@ do
                page_original_url=`echo $original_url | sed -e "s/p0/p$page/"`
                page_small_url=`echo $small_url | sed -e"s/p0/p$page/"`
                original_file_name=`echo $page_original_url | egrep -o "$pid.*"`
-                small_file_name=`echo $page_small_url | egrep -o "$pid.*"`
                webp_file_name=`echo $original_file_name|sed 's/jpg/webp/'|sed 's/png/webp/'`
+
                if [ "`cat $db_file|grep $original_file_name`" != "$original_file_name" ]
                then
                        echo "download image file name=$original_file_name,url=$page_original_url"
@ -176,45 +228,83 @@ do
                        then
                                curl -v -H 'referer: https://www.pixiv.net/' $page_original_url -o $original_file_name       
                        fi
-                        if [ `du -b  $original_file_name|awk '{print $1}'` -gt $maxFileSize ]
+                        original_file_size=`du -b  $original_file_name|awk '{print $1}'`
+                        if [ $original_file_size -gt $maxFileSize ]
                        then
-                           echo "图片：$original_file_name 体积：`du -h $original_file_name|awk '{print $1}'` 超过 $maxFileSize_M,需要压缩"     
-                           scp -i ~/.ssh/$CompressHost  -P $CompressPort $original_file_name $CompressUser@$CompressHost:$CompressPath/$original_file_name
-                           ssh -i ~/.ssh/$CompressHost -p $CompressPort $CompressUser@$CompressHost "cd $CompressPath;jpegoptim --size=$maxFileSize_M $original_file_name"
-                           scp -i ~/.ssh/$CompressHost  -P $CompressPort $CompressUser@$CompressHost:$CompressPath/$original_file_name $original_file_name
-                           echo "图片：$original_file_name 压缩体积：`du -h $original_file_name`"
+                          echo "${original_file_name}文件体积超过${maxFileSize}字节，需要在线压缩"
+                          result=`curl -v -X POST -d "{\"apikey\": \"$convertio_apikey\", \"input\":\"upload\", \"outputformat\":\"jpeg\"}" http://api.convertio.co/convert`
+
+                          if [ `echo $result|jq -r .status` = ok ]
+                          then
+                             id=`echo $result|jq -r .data.id`
+                             result=`curl -v -X PUT --upload-file $original_file_name  http://api.convertio.co/convert/$id/$original_file_name`
+                             if [ `echo $result|jq -r .status` = ok ]
+                             then
+                                while true
+                                do
+                                   sleep 10
+                                   result=`curl -v -X GET http://api.convertio.co/convert/$id/status`
+                                   if [[ `echo $result|jq -r .status` = ok && `echo $result|jq -r .data.step` = finish ]]
+                                   then
+                                       echo $result|jq .data.output.url|xargs curl -v -o $original_file_name 
+                                       PutObject $original_file_name
+                                       break
+                                   elif [ `echo $result|jq -r .status` = error ]
+                                   then
+                                       break
+                                   else
+                                       echo "10s后重新获取转换结果"    
+                                   fi
+                                done
+                             else
+                                echo "Direct File Upload For Conversion error!!!"
+                             fi
+                          else
+                              echo "Start a New Conversion error!!!"
+                          fi
+                        else
+                          PutObject $original_file_name      
                        fi
-                        upload $original_file_name
+                        
                fi
                if [ ! -f $webp_file_name ]
                then
                   webp_url="https://$bucketname.$accelerateHost/$original_file_name$imageParam"
                   echo "download image file name=$webp_file_name,url=$webp_url"      
                   curl -v $webp_url -o $webp_file_name
-                fi
-                if [ ! -f $small_file_name ]
-                then
-                        echo "download image file name=$small_file_name,url=$page_small_url"
-                        curl -v -H 'referer: https://www.pixiv.net/' $page_small_url -o $small_file_name
+                   image_ratio=`file $webp_file_name|awk '{print $9}'|sed -e 's/,//'|sed '/^$/d'|awk '{split($0,a,"x");f=a[1]>a[2]?a[1]/a[2]:a[2]/a[1];print f}'`
+                   if [[ -n $image_ratio && $image_ratio > $maxRatio ]]
+                   then
+                     echo "图片分辨率`file $webp_file_name|awk '{print $9}'|sed -e 's/,//'`比率：$image_ratio>$maxRatio,视为异常，返回异常图片"
+                     watermark=`echo "画作pid：$pid"|base64`
+                     url="https://$bucketname.$accelerateHost/error.jpeg?x-oss-process=image/watermark,text_`echo "画作pid：$pid"|base64`,g_north/watermark,text_5YiG6L6o546H5q%2BU5L6L5byC5bi4Cg==,g_center/watermark,text_5bi46KeB5LqO6LaF6ZW_5Zu-,g_south"
+                     echo "文字水印请求url：$url"
+                     curl -v $url -o $webp_file_name
+                   fi
                fi
   
                if [ $page -eq 0 ]
                then
-                        media="$media,{\"type\":\"photo\",\"media\":\"attach://$webp_file_name\",\"parse_mode\":\"HTML\",\"caption\":\"$rank_info\n<a href=\\\"$artworkLink\\\">$title</a>\n<a href=\\\"https://www.pixiv.net/users/$userId\\\">$userName</a>\n$tag\"}"
+                        info_file=$original_file_name.info
+                        original_image_info=`curl -v --output $info_file https://$bucketname.$Host/$original_file_name?x-oss-process=image/info`
+                        FileSize=$(fileSizeStr $(cat $info_file|jq -r  .FileSize.value))
+                        ImageHeight=$(cat $info_file|jq -r  .ImageHeight.value)
+                        ImageWidth=$(cat $info_file|jq -r  .ImageWidth.value)
+                        media="$media,{\"type\":\"photo\",\"media\":\"attach://$webp_file_name\",\"parse_mode\":\"HTML\",\"caption\":\"$rank_info\n<a href=\\\"$artworkLink\\\">$title</a>\n<a href=\\\"https://www.pixiv.net/users/$userId\\\">$userName</a>\n$tag\n原图分辨率：${ImageWidth}X${ImageHeight}(宽X高)，文件体积：$FileSize，<a href=\\\"https://$bucketname.$Host/$original_file_name\\\">预览原图</a>  <a href=\\\"https://$bucketname.$accelerateHost/$original_file_name\\\">加速下载原图</a>\"}"
                        fileList="$fileList -F $webp_file_name=@$webp_file_name"
                        fileCount=$((fileCount + 1))
-                        fileSize=`du $small_file_name | awk '{print $1}'`
+                        fileSize=`du $webp_file_name | awk '{print $1}'`
                        fileCountSize=$((fileCountSize + fileSize))
                        echo "fileCountSize=$fileCountSize,fileCount=$fileCount"
                fi
                if [[ $fileCount -eq $maxFileCount ]]
                then
-                        echo "sleep `cat $config_file | jq .sleep.request`" >>$commands_file
-                        echo "curl -v -F  chat_id=$chat_id $fileList -F media='[`echo $media | cut -c 2-`]' $baseApi/sendMediaGroup" >>$commands_file
-                        echo "curl -v -d chat_id=$chat_id -d text='以上作品日榜排名分别是 #rank${start_rank}_${end_rank} ，点击作品可以查看pid/标题/画师/tag信息' $baseApi/sendMessage" >>$commands_file 
-                        echo >>$commands_file
-                        echo >>$commands_file
-                        echo >>$commands_file
+                        command="curl -v -F  chat_id=$chat_id $fileList -F media='[`echo $media | cut -c 2-`]' $baseApi/sendMediaGroup"
+                        echo "上传图片命令：$command"
+                        bash -c "$command"
+                        sleep $sleepImage
+                        curl -v -d chat_id=$chat_id -d text="以上作品日榜排名分别是 #排名${start_rank}_${end_rank} #rank${start_rank}_${end_rank} ，点击作品可以查看pid/标题/画师/tag信息." $baseApi/sendMessage
+                        sleep $sleepText
                        fileCountSize=0
                        fileList=''
                        media=''
@ -222,16 +312,12 @@ do
                fi
        done
 done
-if [ $fileCount -gt 0 ]
-then
-        echo "curl -v -F  chat_id=$chat_id $fileList -F media='[`echo $media | cut -c 2-`]' $baseApi/sendMediaGroup" >>$commands_file
-fi
-sed -i '1d' $commands_file
-hasSend=$today.hasSend
-if [ ! -f $hasSend ]
-then
-        bash -c ./$commands_file
-        touch $hasSend
-        next_expected_at=`curl -v https://cronitor.io/api/monitors/$CronitorJobName -u $CronitorKey:''|jq .next_expected_at`
-        curl -v -d chat_id=$chat_id -d text="以上就是$today日榜前${length}名作品，本次推送完毕，下次推送时间预计是`date -d @$next_expected_at '+%Y-%m-%d %H:%M:%S'`，如有问题请联系管理员。 #date$_today" $baseApi/sendMessage
-fi
+
+
+next_expected_at=`curl -v https://cronitor.io/api/monitors/$CronitorJobName -u $CronitorKey:''|jq .next_expected_at`
+curl -v -d chat_id=$chat_id -d text="以上就是$today日榜前${length}名作品，本次推送完毕，下次推送时间预计是`date -d @$next_expected_at '+%Y-%m-%d %H:%M:%S'`，如有问题请联系管理员。 #date$_today  #日期$_today " $baseApi/sendMessage
+
+find -type f  -mtime +7|grep html
+find -type f  -mtime +7|grep json
+find -type f  -mtime +7|grep webp
+DeleteMultipleObjects
Author	SHA1	Message	Date
Qihua Pan	b2e8639479	更新清理oss老文件逻辑	2 years ago
Qihua Pan	470f99bbc2	1.更新大图压缩逻辑。	2 years ago
Qihua Pan	7a0f876df1	1.增加图片信息，预览、下载原图	2 years ago
Qihua Pan	55b042f87e	1.异常分辨率比例图片特殊处理 2.记录最多收藏数/喜欢/浏览数画作（暂无用） 3.增加中文标签 4.更新config.json模版配置	2 years ago