-
Notifications
You must be signed in to change notification settings - Fork 5
Advanced search filters #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -41,6 +41,116 @@ def add_filetype(file_path: str): | |
| eprint(err) | ||
| return 1 | ||
|
|
||
|
|
||
| def process_image_size(val: str): | ||
| key = 'isz:' | ||
| if (val == 'large'): | ||
| return key + 'l' | ||
| elif (val == 'medium'): | ||
| return key + 'm' | ||
| elif (val == 'icon'): | ||
| return key + 'i' | ||
| elif (val in ['400x300', '640x480', '800x600', '1024x768']): | ||
| key += 'lt%2Cislt:' | ||
| if (val == '400x300'): | ||
| return key + "qsvga" | ||
| elif (val == '640x480'): | ||
| return key + "vga" | ||
| elif (val == '800x600'): | ||
| return key + "svga" | ||
| elif (val == '1024x768'): | ||
| return key + "xga" | ||
| elif (val in ['2mp','4mp','6mp','8mp','10mp','12mp','15mp','20mp','40mp','70mp']): | ||
| return key + 'lt%2Cislt:' + val | ||
| else: | ||
| return "" | ||
|
|
||
| def process_image_aspectratio(val: str): | ||
| key = 'iar:' | ||
| if (val == 'tall'): | ||
| return key + 't' | ||
| elif (val == 'square'): | ||
| return key + 's' | ||
| elif (val == 'wide'): | ||
| return key + 'w' | ||
| elif (val == 'panoramic'): | ||
| return key + 'xw' | ||
|
|
||
| def process_image_color(val: str): | ||
| if (val == "color"): | ||
| return "ic:color" | ||
| elif (val == "grayscale"): | ||
| return "ic:gray" | ||
| elif (val == "transparent"): | ||
| return "ic:trans" | ||
| elif (val in ['red','orange','yellow','green','teal','blue','purple','pink','white','gray','black','brown']): | ||
| return "ic:specific%2Cisc:" + val | ||
| else: | ||
| return "" | ||
|
|
||
| def process_image_type(val: str): | ||
| if (val in ['face', 'photo', 'clipart', 'lineart', 'animated']): | ||
| return 'itp:' + val | ||
| else: | ||
| return "" | ||
|
|
||
| def process_image_region(val: str): | ||
| if (val == ''): | ||
| return '' | ||
| else: | ||
| return 'ctr:country' + val.upper() | ||
|
|
||
| def process_image_filetype(val: str): | ||
| if (val in ['jpg', 'gif', 'png', 'bmp', 'svg', 'webp', 'ico', 'raw']): | ||
| return 'ift:' + val | ||
|
|
||
| def process_image_usage(val: str): | ||
| key = 'sur:' | ||
| if (val == 'cc'): | ||
| return key + 'cl' | ||
| elif (val == 'other'): | ||
| return key + 'ol' | ||
| else: | ||
| return '' | ||
|
|
||
| def process_safesearch(val: str): | ||
|
||
| if (val in ["on", "off"]): | ||
| return val | ||
| else: | ||
| return "" | ||
|
|
||
|
|
||
| def setup_url(searchurl: str, imgsize: str, imgaspectratio: str, imgcolor: str, imgtype: str, imgregion: str, imgfiletype: str, imgusage: str, safesearch: str): | ||
talleyhoe marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| features = [searchurl] | ||
| subfeatures = [[],[]] | ||
| if (imgsize != None): | ||
| subfeatures[0] += [process_image_size(imgsize)] | ||
| if (imgaspectratio != None): | ||
| subfeatures[0] += [process_image_aspectratio(imgaspectratio)] | ||
| if (imgcolor != None): | ||
| subfeatures[0] += [process_image_color(imgcolor)] | ||
| if (imgtype != None): | ||
| subfeatures[0] += [process_image_type(imgtype)] | ||
| if (imgregion != None): | ||
| subfeatures[0] += [process_image_region(imgregion)] | ||
| if (imgfiletype != None): | ||
| subfeatures[0] += [process_image_filetype(imgfiletype)] | ||
| if (imgusage != None): | ||
| subfeatures[0] += [process_image_usage(imgusage)] | ||
| if (safesearch != None): | ||
| subfeatures[1] += [process_safesearch(safesearch)] | ||
|
|
||
| delim1 = "&" | ||
| delim2 = "%2C" | ||
|
|
||
| if (subfeatures[0] != []): | ||
| features += ["tbs=" + delim2.join(subfeatures[0])] | ||
| if (subfeatures[1] != []): | ||
| features += ["safe=" + delim2.join(subfeatures[1])] | ||
| print(delim1.join(features)) | ||
|
||
| return delim1.join(features) | ||
|
|
||
|
|
||
| ############################# scraping helpers ################################ | ||
|
|
||
| def get_image_urls(query: str, page: int): | ||
|
|
@@ -159,7 +269,7 @@ def get_manifest(search_key: str, image_cnt: int): | |
|
|
||
| ################################# main api #################################### | ||
|
|
||
| def scrape_images(search_key, image_cnt, directory, threads): | ||
| def scrape_images(search_key, image_cnt, directory, threads, size, aspectratio, color, imgtype, region, filetype, usage, safesearch): | ||
| """ | ||
| Request manifest, generate paths, save files, get filetype. | ||
| This is the only function that should be called externally. | ||
|
|
@@ -174,7 +284,8 @@ def scrape_images(search_key, image_cnt, directory, threads): | |
| print("savedir: {}".format(directory)) | ||
| if not os.path.exists(directory): | ||
| os.makedirs(directory) | ||
|
|
||
| global search_url | ||
|
||
| search_url = setup_url(search_url, size, aspectratio, color, imgtype, region, filetype, usage, safesearch) | ||
| id_url_manifest = get_manifest(search_key, image_cnt) | ||
| with ThreadPoolExecutor(max_workers=threads) as pool: | ||
| with tqdm(total=len(id_url_manifest)) as progress: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Filters should be passed as a single object so we can try to avoid these long definitions that look ugly. IMO if you can't call the function on one line, it needs to be refactored into a more compact structure