From 9a9506174b85728ba3fe524f07631e80721fa94b Mon Sep 17 00:00:00 2001 From: Christian Wolf Date: Sat, 26 Nov 2022 08:43:46 +0100 Subject: [PATCH] Clean up HTML import from nbsp chars --- src/solo_turnier/html_parser.py | 2 +- .../tests/html_parser/tabges/1/expected.json | 24 +++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/solo_turnier/html_parser.py b/src/solo_turnier/html_parser.py index 9224d31..04eb8ad 100644 --- a/src/solo_turnier/html_parser.py +++ b/src/solo_turnier/html_parser.py @@ -133,7 +133,7 @@ class HtmlParser: td.smooth() return { - 'text': td.string, + 'text': td.string.replace('\xa0', ' ').strip(), 'meta': meta } diff --git a/src/solo_turnier/tests/html_parser/tabges/1/expected.json b/src/solo_turnier/tests/html_parser/tabges/1/expected.json index cffc6b3..d4e51d5 100644 --- a/src/solo_turnier/tests/html_parser/tabges/1/expected.json +++ b/src/solo_turnier/tests/html_parser/tabges/1/expected.json @@ -34,7 +34,7 @@ }, { "meta": null, - "text": "\u00a0" + "text": "" } ], [ @@ -60,7 +60,7 @@ }, { "meta": null, - "text": "\u00a0" + "text": "" } ], [ @@ -86,7 +86,7 @@ }, { "meta": null, - "text": "\u00a0" + "text": "" } ], [ @@ -112,33 +112,33 @@ }, { "meta": null, - "text": "\u00a0" + "text": "" } ], [ { "meta": null, - "text": "Adv\n\u00a0" + "text": "Adv" }, { "meta": null, - "text": "Beg\n\u00a0" + "text": "Beg" }, { "meta": null, - "text": "Beg\n\u00a0" + "text": "Beg" }, { "meta": null, - "text": "Beg\n\u00a0" + "text": "Beg" }, { "meta": null, - "text": "Beg\n\u00a0" + "text": "Beg" }, { "meta": null, - "text": "\u00a0\n\u00a0" + "text": "" } ], [ @@ -164,7 +164,7 @@ }, { "meta": null, - "text": "\u00a0" + "text": "" } ], [ @@ -190,7 +190,7 @@ }, { "meta": null, - "text": "\u00a0" + "text": "" } ] ]