Clean up HTML import from nbsp chars

This commit is contained in:
Christian Wolf 2022-11-26 08:43:46 +01:00
parent adc7158862
commit 9a9506174b
2 changed files with 13 additions and 13 deletions

View File

@ -133,7 +133,7 @@ class HtmlParser:
td.smooth()
return {
'text': td.string,
'text': td.string.replace('\xa0', ' ').strip(),
'meta': meta
}

View File

@ -34,7 +34,7 @@
},
{
"meta": null,
"text": "\u00a0"
"text": ""
}
],
[
@ -60,7 +60,7 @@
},
{
"meta": null,
"text": "\u00a0"
"text": ""
}
],
[
@ -86,7 +86,7 @@
},
{
"meta": null,
"text": "\u00a0"
"text": ""
}
],
[
@ -112,33 +112,33 @@
},
{
"meta": null,
"text": "\u00a0"
"text": ""
}
],
[
{
"meta": null,
"text": "Adv\n\u00a0"
"text": "Adv"
},
{
"meta": null,
"text": "Beg\n\u00a0"
"text": "Beg"
},
{
"meta": null,
"text": "Beg\n\u00a0"
"text": "Beg"
},
{
"meta": null,
"text": "Beg\n\u00a0"
"text": "Beg"
},
{
"meta": null,
"text": "Beg\n\u00a0"
"text": "Beg"
},
{
"meta": null,
"text": "\u00a0\n\u00a0"
"text": ""
}
],
[
@ -164,7 +164,7 @@
},
{
"meta": null,
"text": "\u00a0"
"text": ""
}
],
[
@ -190,7 +190,7 @@
},
{
"meta": null,
"text": "\u00a0"
"text": ""
}
]
]