- A+
在SEO过程中,有时需要批量检测链接状态码,以剔除404坏链。有的链接地址做301重定向,需要根据旧地址获取重定向后的新地址,所有这些操作可以通过以下脚本实现。需要提前把要检测的链接地址复制粘贴到urls.txt
文件,新建output.txt
文件保存结果数据。准备好后运行以下命令即可。
命令
python redirection.py
redirection.py脚本
import requests
def get_status_code(url):
try:
r = requests.get(url)
print ("Processing " + url)
if len(r.history) > 0:
chain = ""
code = r.history[0].status_code
final_url = r.url
for resp in r.history:
chain += resp.url + " | "
return str(code) + 't' + str(len(r.history)) + 't' + chain + 't' + final_url + 't'
else:
return str(r.status_code) + 'tttt'
except requests.ConnectionError:
print("Error: failed to connect.")
return '0tttt'
input_file = 'urls.txt'
output_file = 'output.txt'
with open(output_file, 'w') as o_file:
o_file.write('URLtStatustNumber of redirectstRedirect ChaintFinal URLtn')
f = open(input_file, "r")
lines = f.read().splitlines()
for line in lines:
code = get_status_code(line)
o_file.write(line + "t" + str(code) + "tn")
f.close()
改进代码
import requests
def get_status_code(url):
try:
r = requests.get(url)
print ("Processing " + url)
if len(r.history) > 0:
chain = ""
code = r.history[0].status_code
final_url = r.url
for resp in r.history:
chain += resp.url + " | "
return str(code) + '\t' + str(len(r.history)) + '\t' + chain + '\t' + final_url + '\t'
else:
return str(r.status_code) + '\t\t\t\t'
except requests.ConnectionError:
print("Error: failed to connect.")
return '0\t\t\t\t'
input_file = 'urls.txt'
output_file = 'output.txt'
with open(output_file, 'w') as o_file:
o_file.write('URL\tStatus\tNumber of redirects\tRedirect Chain\tFinal URL\t\n')
f = open(input_file, "r", encoding = "utf-8")
lines = f.read().splitlines()
for line in lines:
code = get_status_code(line)
o_file.write(line + "\t" + str(code) + "\t\n")
f.close()
独角兽驿站
公众号