test | a-c-dream

##frist
内容
import requests
from lxml import etree
import time
header = {
        'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.69"
    }

def get_detail(url):
    '''
    产权性质
    //*[@id="houseInfo"]/table/tbody/tr[1]/td[2]/span[2]/text()
    产权年限
    //*[@id="houseInfo"]/table/tbody/tr[2]/td[2]/span[2]/text()
    房本年限
    //*[@id="houseInfo"]/table/tbody/tr[2]/td[3]/span[2]/text()
    '''
    time.sleep(0.3)
    content = requests.get(url=url,headers=header).text
    tree = etree.HTML(content)
    try:
        char = tree.xpath('//*[@id="houseInfo"]/table/tbody/tr[1]/td[2]/span[2]/text()')[0]
    except:
        char = None
    try:
        property_right_years = tree.xpath('//*[@id="houseInfo"]/table/tbody/tr[2]/td[2]/span[2]/text()')[0]
    except:
        property_right_years = None
    try:
        room_years = tree.xpath('//*[@id="houseInfo"]/table/tbody/tr[2]/td[3]/span[2]/text()')[0]
    except:
        room_years = None
    res = f'{char},{property_right_years},{room_years}'
    return res

def data_cat(root):
    res = ''
    ls = root.xpath('./span/text()')
    for i in ls:
        res+=i
    return res

def get_data(i,fp):
    print(f'=======正在爬取第{i}页=========')
    time.sleep(1.2)
    url = f'https://rizhao.anjuke.com/sale/p{i}/?from=HomePage_TopBar'

    contents = requests.get(url=url,headers=header).text
    '''
    标题
    //h3[@class="property-content-title-name"]
    价格
    //p[@class="property-price-total"]/span[1]/text()
    几室几厅
    //p[@class="property-content-info-text property-content-info-attribute"]
    面积
    //p[@class="property-content-info-text"][1]
    朝向
    //p[@class="property-content-info-text"][2]
    楼层
    //p[@class="property-content-info-text"][3]
    建造年份
    //p[@class="property-content-info-text"][4]
    地址
    //p[@class="property-content-info-comm-address"]
    联系人
    //span[@class="property-extra-text"][1]
    链接
    //div[@class="property"]/a/@href
    '''
    tree = etree.HTML(contents)
    titles = tree.xpath('//h3[@class="property-content-title-name"]/text()')
    prices = tree.xpath('//p[@class="property-price-total"]/span[1]/text()')
    #
    rooms = tree.xpath('//p[@class="property-content-info-text property-content-info-attribute"]')
    areas = tree.xpath('//p[@class="property-content-info-text"][1]/text()')
    forward = tree.xpath('//p[@class="property-content-info-text"][2]/text()')
    floor = tree.xpath('//p[@class="property-content-info-text"][3]/text()')
    years = tree.xpath('//p[@class="property-content-info-text"][4]/text()')
    #
    address = tree.xpath('//p[@class="property-content-info-comm-address"]')
    persons = tree.xpath('//span[@class="property-extra-text"][1]/text()')
    links = tree.xpath('//div[@class="property"]/a/@href')
    for i in range(len(titles)):
        print(f'第{i}个')
        print(f'{titles[i].strip()},{prices[i].strip()},{data_cat(rooms[i]).strip()},{areas[i].strip()},{forward[i].strip()},{floor[i].strip()},{years[i].strip()},{data_cat(address[i]).strip()},{persons[i].strip()},{get_detail(links[i].strip())}',file=fp)
    get_detail(links[i].strip())
if __name__ == '__main__':
    with open('rizhao.txt','a+',encoding='utf-8') as fp:
        for i in range(1,11):
            get_data(i,fp)
    # get_detail('https://rizhao.anjuke.com/prop/view/A6507735925?auction=201&hpType=1&entry=102&position=18&kwtype=filter&now_time=1682470529&spread=filtersearch_p&from=from_esf_List_screen&index=18')