| 1234567891011121314151617181920212223242526272829303132333435363738394041 |
- import re
- def filter_links(links):
- '''
- input: {
- 'internal': [{}],
- 'external': [
- {
- "href": "xx",
- "text": "xxm",
- "title": "",
- "base_domain": "benlcollins.com"
- }
- ],
- }
- '''
- external_links = links["external"]
- filtered_links = [link for link in external_links if "google" not in link["base_domain"]]
- return filtered_links
- def is_valid_domain(domain):
- # 正则表达式匹配域名格式
- pattern = r'^([a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}$'
- return re.match(pattern, domain) is not None
- def filter_local_domain(links):
- '''
- input: [{
- "href": "xx",
- "text": "xxm",
- "title": "",
- "base_domain": "benlcollins.com"
- }]
- '''
- filtered_links = []
- for link in links:
- if 'base_domain' in link and is_valid_domain(link['base_domain']):
- filtered_links.append(link)
- return filtered_links
|