python 从ccs 文件里获取图片地址并用requests 下载

从ccs 文件里获取图片地址并用requests 下载

# -*- coding: utf-8 -*-

import os
import re
import shutil
import requests


css = ''' css string '''

m = re.findall('url\(([^)]+)\)', css)

img_dict = {}

# mkdir
dir_dict = {}
for url in m:
    url = url.strip('"')
    img_dict[url] = 1
    path = url.replace('http://www.xxx.com', '')
    path = '/'.join(path.split('/')[:-1])

    if path.startswith('/template'):
        path = '/static' + path
    dir_dict[path] = 1

b = '/home/yourname/abc'
for p in sorted(dir_dict.keys()):
    # print p
    print b + p
    if not os.path.exists(b + p):
        os.makedirs(b + p)

print
# read img and save to local
print len(img_dict)
i = 0
for url in sorted(img_dict.keys()):
    url = url.strip('"')
    # print url
    req_headers = {'refer': url}
    r = requests.get(url, headers=req_headers, stream=True)
    code = r.status_code
    if code == 200:
        path = url.replace('http://www.xxx.com', '')
        if path.startswith('/template'):
            path = '/static' + path

        dst = b + path
        with open(dst, 'wb') as f:
            r.raw.decode_content = True
            shutil.copyfileobj(r.raw, f)
            i += 1

print len(m), i


Relative Articles