Clean up HTML import from nbsp chars

This commit is contained in:
Christian Wolf 2022-11-26 08:43:46 +01:00
parent adc7158862
commit 9a9506174b
2 changed files with 13 additions and 13 deletions

View File

@ -133,7 +133,7 @@ class HtmlParser:
td.smooth() td.smooth()
return { return {
'text': td.string, 'text': td.string.replace('\xa0', ' ').strip(),
'meta': meta 'meta': meta
} }

View File

@ -34,7 +34,7 @@
}, },
{ {
"meta": null, "meta": null,
"text": "\u00a0" "text": ""
} }
], ],
[ [
@ -60,7 +60,7 @@
}, },
{ {
"meta": null, "meta": null,
"text": "\u00a0" "text": ""
} }
], ],
[ [
@ -86,7 +86,7 @@
}, },
{ {
"meta": null, "meta": null,
"text": "\u00a0" "text": ""
} }
], ],
[ [
@ -112,33 +112,33 @@
}, },
{ {
"meta": null, "meta": null,
"text": "\u00a0" "text": ""
} }
], ],
[ [
{ {
"meta": null, "meta": null,
"text": "Adv\n\u00a0" "text": "Adv"
}, },
{ {
"meta": null, "meta": null,
"text": "Beg\n\u00a0" "text": "Beg"
}, },
{ {
"meta": null, "meta": null,
"text": "Beg\n\u00a0" "text": "Beg"
}, },
{ {
"meta": null, "meta": null,
"text": "Beg\n\u00a0" "text": "Beg"
}, },
{ {
"meta": null, "meta": null,
"text": "Beg\n\u00a0" "text": "Beg"
}, },
{ {
"meta": null, "meta": null,
"text": "\u00a0\n\u00a0" "text": ""
} }
], ],
[ [
@ -164,7 +164,7 @@
}, },
{ {
"meta": null, "meta": null,
"text": "\u00a0" "text": ""
} }
], ],
[ [
@ -190,7 +190,7 @@
}, },
{ {
"meta": null, "meta": null,
"text": "\u00a0" "text": ""
} }
] ]
] ]