1.网络获取Google图像
1.1 google_images_download
Python 是一种多用途语言,广泛用于脚本编写。我们可以编写 Python 脚本来自动化日常事务。假设我们要下载具有多个搜索查询的谷歌图片。我们可以自动化该过程,而不是手动进行。
如何安装所需的模块:
1
|
pip install google_images_download |
让我们看看如何编写 Python 脚本以使用 Python google_images_download 模块下载 Google 图像。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
# importing google_images_download module from google_images_download import google_images_download # creating object response = google_images_download.googleimagesdownload() search_queries = [ 'The smartphone also features an in display fingerprint sensor.' , 'The pop up selfie camera is placed aligning with the rear cameras.' , '''In terms of storage Vivo V15 Pro could offer up to 6GB of RAM and 128GB of onboard storage.''' , 'The smartphone could be fuelled by a 3 700mAh battery.' , ] def downloadimages(query): # keywords is the search query # format is the image file format # limit is the number of images to be downloaded # print urs is to print the image file url # size is the image size which can # be specified manually ("large, medium, icon") # aspect ratio denotes the height width ratio # of images to download. ("tall, square, wide, panoramic") arguments = { "keywords" : query, "format" : "jpg" , "limit" : 4 , "print_urls" : True , "size" : "medium" , "aspect_ratio" : "panoramic" } try : response.download(arguments) # Handling File NotFound Error except FileNotFoundError: arguments = { "keywords" : query, "format" : "jpg" , "limit" : 4 , "print_urls" : True , "size" : "medium" } # Providing arguments for the searched query try : # Downloading the photos based # on the given arguments response.download(arguments) except : pass # Driver Code for query in search_queries: downloadimages(query) print () |
输出
注意:由于下载错误,部分图片无法打开。
1.2 BeautifulSoup
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
import re import requests from bs4 import BeautifulSoup from urllib.parse import urlparse import os f = open ( "images_flowers.txt" , "w" ) res = [] def download_google(url): #url = 'https://www.google.com/search?q=flowers&sxsrf=ALeKk00uvzQYZFJo03cukIcMS-pcmmbuRQ:1589501547816&source=lnms&tbm=isch&sa=X&ved=2ahUKEwjEm4LZyrTpAhWjhHIEHewPD1MQ_AUoAXoECBAQAw&biw=1440&bih=740' page = requests.get(url).text soup = BeautifulSoup(page, 'html.parser' ) for raw_img in soup.find_all( 'img' ): link = raw_img.get( 'src' ) res.append(link) if link: f.write(link + "\n" ) download_google( 'https://www.google.com/search?q=flowers&sxsrf=ALeKk00uvzQYZFJo03cukIcMS-pcmmbuRQ:1589501547816&source=lnms&tbm=isch&sa=X&ved=2ahUKEwjEm4LZyrTpAhWjhHIEHewPD1MQ_AUoAXoECBAQAw&biw=1440&bih=740' ) f.close() |
1.3 pyimagesearch
感谢 Adrian Rosebrock 编写此代码并将其公开。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
# USAGE # python download_images.py --urls urls.txt --output images/santa # import the necessary packages from imutils import paths import argparse import requests import cv2 import os # construct the argument parse and parse the arguments ap = argparse.ArgumentParser() ap.add_argument( "-u" , "--urls" , required = True , help = "path to file containing image URLs" ) ap.add_argument( "-o" , "--output" , required = True , help = "path to output directory of images" ) args = vars (ap.parse_args()) # grab the list of URLs from the input file, then initialize the # total number of images downloaded thus far rows = open (args[ "urls" ]).read().strip().split( "\n" ) total = 0 # loop the URLs for url in rows: try : # try to download the image r = requests.get(url, timeout = 60 ) # save the image to disk p = os.path.sep.join([args[ "output" ], "{}.jpg" . format ( str (total).zfill( 8 ))]) f = open (p, "wb" ) f.write(r.content) f.close() # update the counter print ( "[INFO] downloaded: {}" . format (p)) total + = 1 # handle if any exceptions are thrown during the download process except : print ( "[INFO] error downloading {}...skipping" . format (p)) # loop over the image paths we just downloaded for imagePath in paths.list_images(args[ "output" ]): # initialize if the image should be deleted or not delete = False # try to load the image try : image = cv2.imread(imagePath) # if the image is `None` then we could not properly load it # from disk, so delete it if image is None : print ( "None" ) delete = True # if OpenCV cannot load the image then the image is likely # corrupt so we should delete it except : print ( "Except" ) delete = True # check to see if the image should be deleted if delete: print ( "[INFO] deleting {}" . format (imagePath)) os.remove(imagePath) |
2.网络获取Youtube视频
如何安装所需的模块:
1
|
pip install pytube3 |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
import cv2 from collections import defaultdict import matplotlib.pyplot as plt import numpy as np import pandas as pd import warnings from pytube import YouTube warnings.filterwarnings( 'ignore' ) video = YouTube( 'https://www.youtube.com/watch?v=GTkU4qj6v7g' ) # print(video.streams.all()) print (video.streams. filter (file_extension = "mp4" ). all ()) # [<Stream: itag="18" mime_type="video/mp4" res="360p" fps="30fps" vcodec="avc1.42001E" acodec="mp4a.40.2" progressive="True" type="video">, # <Stream: itag="22" mime_type="video/mp4" res="720p" fps="30fps" vcodec="avc1.64001F" acodec="mp4a.40.2" progressive="True" type="video">, # <Stream: itag="137" mime_type="video/mp4" res="1080p" fps="30fps" vcodec="avc1.64001f" progressive="False" type="video">, # <Stream: itag="136" mime_type="video/mp4" res="720p" fps="30fps" vcodec="avc1.4d401e" progressive="False" type="video">, # <Stream: itag="135" mime_type="video/mp4" res="480p" fps="30fps" vcodec="avc1.4d4015" progressive="False" type="video">, # <Stream: itag="134" mime_type="video/mp4" res="360p" fps="30fps" vcodec="avc1.4d400d" progressive="False" type="video">, # <Stream: itag="133" mime_type="video/mp4" res="240p" fps="30fps" vcodec="avc1.4d400c" progressive="False" type="video">, # <Stream: itag="160" mime_type="video/mp4" res="144p" fps="30fps" vcodec="avc1.4d400b" progressive="False" type="video">, # <Stream: itag="140" mime_type="audio/mp4" abr="128kbps" acodec="mp4a.40.2" progressive="False" type="audio">] # 为要下载的视频的分辨率使用适当的 itag。如果您需要高分辨率视频下载, # 请在以下步骤中选择最高分辨率的 itag 进行下载 print (video.streams.get_by_itag( 137 ).download()) # '/Users/sapnasharma/Documents/github/video_clips/Akshay Kumars Fitness Mantras for a Fit India GOQii Play Exclusive.mp4' video_path = video.title print (video_path) # "Akshay Kumar's Fitness Mantras for a Fit India | GOQii Play Exclusive" # 视频标题在名称之间添加了一个管道,因此实际名称已损坏。我稍后会修复这个错误, # 现在我们可以直接粘贴视频的名字来达到我们的目的。 video_path = "Akshay Kumars Fitness Mantras for a Fit India GOQii Play Exclusive.mp4" # Video Capture Using OpenCV cap = cv2.VideoCapture(video_path) frame_cnt = int (cap.get(cv2.cv2.CAP_PROP_FRAME_COUNT)) fps = cap.get(cv2.CAP_PROP_FPS) print ( 'Frames in video: ' , frame_cnt) print (f "Frames per sec: {fps}" ) # Frames in video: 34249 # Frames per sec: 25.0 # (1)要获取整个视频的帧,请使用下面的代码块。 # Use this for accessing the entire video index = 1 for x in range (frame_cnt): ret, frame = cap.read() if not ret: break # Get frame timestamp frame_timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) # fetch frame every sec if frame_timestamp > = (index * 1000.0 ): # change the value from 1000 to anyother value if not needed per second index = index + 2 # decides the freq. of frames to be saved print (f "++ {index}" ) cv2.imwrite(f "images/cv_{index}.png" , frame) if cv2.waitKey( 20 ) & 0xFF = = ord ( 'q' ): break cap.release() cv2.destroyAllWindows() # (2)要获取特定持续时间之间的帧,请使用以下代码块。 # Use this in case frames are to be fetched within a certain time frame # frame_timestamp will be calculated as fps*time*1000 and set the starting index accordingly index = 1560 for x in range (frame_cnt): ret, frame = cap.read() if not ret: break # Get frame timestamp frame_timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) if frame_timestamp > = 1560000.0 and frame_timestamp < = 1800000.0 : # fetch frame every sec if frame_timestamp > = (index * 1000.0 ): index = index + 4 # decides the freq. of frames to be saved print (f "++ {index}" ) cv2.imwrite(f "images/cv_{index}.png" , frame) if cv2.waitKey( 20 ) & 0xFF = = ord ( 'q' ): break cap.release() cv2.destroyAllWindows() |
以上就是Python获取网络图片和视频的示例代码的详细内容,更多关于Python获取图片 视频的资料请关注服务器之家其它相关文章!
原文链接:https://blog.csdn.net/weixin_43229348/article/details/123392081