html转json
时间: 2023-06-29 13:11:25 浏览: 162
将HTML转换为JSON需要进行多步处理,以下是其中的一种实现方法:
1. 使用DOM解析器将HTML转换为DOM对象
2. 遍历DOM树,将每个节点转换为JSON对象
3. 将JSON对象序列化为字符串
下面是一个示例代码,可以将HTML字符串转换为JSON字符串:
```python
from xml.dom import minidom
import json
def parse_node(node):
result = {}
result['tag'] = node.tagName.lower()
if node.hasAttributes():
attrs = {}
for name, value in node.attributes.items():
attrs[name.lower()] = value
result['attrs'] = attrs
if node.hasChildNodes():
children = []
for child in node.childNodes:
if child.nodeType == minidom.Node.TEXT_NODE:
children.append(child.nodeValue.strip())
elif child.nodeType == minidom.Node.ELEMENT_NODE:
children.append(parse_node(child))
result['children'] = children
return result
def html_to_json(html):
doc = minidom.parseString(html)
return json.dumps(parse_node(doc.documentElement))
# 测试
html = '<div class="container"><h1>Hello, world!</h1><p>This is a paragraph.</p></div>'
json_str = html_to_json(html)
print(json_str)
```
输出结果为:
```json
{
"tag": "div",
"attrs": {
"class": "container"
},
"children": [
{
"tag": "h1",
"children": [
"Hello, world!"
]
},
{
"tag": "p",
"children": [
"This is a paragraph."
]
}
]
}
```
阅读全文