from playwright.sync_api import Playwright, sync_playwright, expect def run(playwright: Playwright) -> None: browser = playwright.chromium.launch(headless=False) context = browser.new_context() page = context.new_page() page.goto("http://psamv4athetn.cminl.oa/form/Logon.html?url=http%3A%2F%2Fpinnoplm.cminl.oa%2Freport%2FpartBom%2FbomStructure%2FbomWithMaker&SysID=INNOPLM") page.locator("input[type=\"text\"]").click() page.locator("input[type=\"text\"]").fill("qilong.zou") page.locator("input[type=\"text\"]").press("Tab") page.locator("#tbPassword").fill("pass.007") page.locator("#tbPassword").press("Enter") page.get_by_role("link", name=" Report ").click() page.get_by_role("link", name="Part/BOM ").click() page.get_by_role("link", name="BOM With Maker").click() page.locator("#parentPartNo").click() page.locator("#parentPartNo").press("CapsLock") page.locator("#parentPartNo").fill("6B01M003A300R") page.get_by_role("button", name=" 查詢").click() with page.expect_download() as download_info: page.get_by_role("button", name="").click() download = download_info.value # --------------------- context.close() browser.close() with sync_playwright() as playwright: run(playwright)
时间: 2023-04-08 16:05:29 浏览: 108
这段代码是使用Python编写的,使用了Playwright库中的同步API。它的作用是启动一个Chromium浏览器,并在其中打开一个新的页面,访问指定的URL。其中,headless=False表示浏览器将以非无头模式启动,即可以看到浏览器界面。这段代码的具体功能需要根据访问的URL来确定。
相关问题
``` from playwright.async_api import Playwright, async_playwright, expect import asyncio, time, random from lxml import etree from bs4 import BeautifulSoup #from crawlab import save_item import aiomysql # 解析二级网址 async def jx_html2(urls, db_connection): selector2 = etree.HTML(urls) data_set2 = selector2.xpath('.//*[@id="header"]/div/div[3]/div/div[5]/div[1]/h1/text()')[0] selector3 = BeautifulSoup(urls, 'lxml') # 获取父类 for bt3 in selector3.find_all(attrs={"id": "downlist"}): download = bt3.a.get('href') #result = {'title': data_set2, 'url': download} #save_item(result) print('标题:', data_set2, '下载地址:', download) await asyncio.sleep(random.randint(2, 6)) # 检查数据是否已经存在 async with db_connection.cursor() as cursor: sql = "SELECT * FROM movie WHERE name = %s AND urls = %s" values = (data_set2, download) await cursor.execute(sql, values) result = ```TypeError: object NoneType can't be used in 'await' expression
从代码片段来看,您在尝试解析网页内容并将其保存到数据库时遇到了 `TypeError` 错误。具体错误信息是“object NoneType can't be used in 'await' expression”,这意味着某个本应为协程(coroutine)或可等待对象的地方实际上返回了 `None`。
根据您的代码和报错位置,我们重点分析一下可能的问题:
### 可能的原因
1. **未完成的查询语句**:
- 在最后一行中,`result =` 后面没有赋值操作,导致后面的 `await cursor.fetchone()` 或其他类似的操作无法执行。
2. **异步库的正确导入**:
- 确保所有使用的异步库都被正确定义并且版本兼容。例如 `aiomysql`, `playwright` 需要确保安装的是最新稳定版或其他指定版本。
3. **检查连接状态及游标的创建**:
- 数据库连接 (`db_connection`) 是否正常建立?是否有断开重连机制?
4. **HTML 解析部分**:
- 使用 `etree.HTML(urls)` 和 `BeautifulSoup(urls, 'lxml')` 进行 HTML 解析时传入的内容应当是一个字符串形式的 HTML 文档而不是 URL 地址本身;如果这里传递的是请求结果而非实际页面源码,则会导致解析失败进而引发异常情况。
针对以上几点建议做以下调整优化:
#### 修改后的代码示例
```python
from playwright.async_api import async_playwright
import asyncio, time, random
from lxml import etree
from bs4 import BeautifulSoup
#import aiomysql # 如果使用aiomysql,请确认其已正确安装配置
async def fetch_page(url):
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
page = await browser.new_page()
await page.goto(url)
content = await page.content()
await browser.close()
return content
# 解析二级网址
async def jx_html2(url, db_connection):
try:
html_content = await fetch_page(url)
selector2 = etree.HTML(html_content)
title = selector2.xpath('.//*[@id="header"]/div/div/div/div[5]/div[1]/h1/text()')
if not title: raise ValueError("Title Not Found")
soup = BeautifulSoup(html_content, 'lxml')
for item in soup.select('#downlist a'):
download_link = item['href']
print(f"标题:{title} 下载地址:{download_link}")
await asyncio.sleep(random.randint(2, 6))
async with db_connection.cursor() as cursor:
sql_check_existence = """
SELECT COUNT(*) AS count FROM movie
WHERE name=%s AND url=%s;
"""
await cursor.execute(sql_check_existence, (title, download_link))
exists = await cursor.fetchone()
if int(exists["count"]) == 0:
insert_sql = """INSERT INTO movie(name,url) VALUES (%s,%s);"""
await cursor.execute(insert_sql, (title, download_link))
await db_connection.commit()
except Exception as e:
print(e)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
task = jx_html2("http://example.com", your_db_conn_instance_here)
loop.run_until_complete(task)
```
请注意替换 `"your_db_conn_instance_here"` 为您自己的数据库实例以及相应表结构字段名等细节匹配实际情况。
此外还需注意该段程序逻辑仅作为参考,在生产环境中还应该考虑更多边界条件处理、日志记录等功能增强稳定性与健壮性。
python代码playwright库 expect_file_chooser()
`expect_file_chooser()`是Playwright库中的一个函数,用于模拟文件选择器的操作。它可以等待文件选择器弹出,并模拟用户选择文件的操作。
以下是使用`expect_file_chooser()`函数的示例代码:
```python
import asyncio
from playwright.async_api import Playwright, async_playwright
async def file_chooser_example(playwright: Playwright) -> None:
# Launch the browser
browser = await playwright.chromium.launch()
page = await browser.new_page()
# Navigate to a page with a file input element
await page.goto("https://www.example.com")
# Define a coroutine to handle the file chooser dialog
async def handle_file_chooser(dialog):
await dialog.set_files("./example.txt")
await dialog.accept()
# Listen for the file chooser dialog and handle it
page.on("filechooser", handle_file_chooser)
# Click the file input element to trigger the file chooser dialog
await page.click("#file-input")
# Wait for the file to be selected and uploaded
await page.wait_for_selector("#file-uploaded")
# Close the browser
await browser.close()
# Run the example
with async_playwright() as playwright:
asyncio.run(file_chooser_example(playwright))
```
该示例使用Playwright库启动一个Chromium浏览器实例,并在页面上找到一个文件输入元素。当用户单击该元素时,将弹出文件选择器对话框。`expect_file_chooser()`函数会等待该对话框出现并模拟用户选择文件的操作。然后,该示例会等待文件上传完成并关闭浏览器。
阅读全文
相关推荐
















