Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #
- # https://www.facebook.com/groups/learnpython.org/permalink/1224436087621006/
- #
- # http://stackoverflow.com/a/925630/1832058
- # https://docs.python.org/2/library/codecs.html
- #
- data = [
- '<div class="more"> <h3>Key Responsibilities</h3><ul><li>Product development: assist in new product development process, from concept through commercialization, by translating market needs into product requirements and establishing the added value,</li><li>Product launches: prepare marketing deliverables, develop scientific and technical product information and training for sales and customers, monitor product revenues, make sales forecasts,</li><li>Guiding sales: develop product positioning and marketing tools to effectively promote products and train sales teams accordingly. Collaborate with sales team to drive growth for the business segment(s).</li><li>Customer contacts: develop a customer network by performing regular site visits, training initiatives and congress participation.</li><li>Product regulatory and quality: assure internal procedures are followed and products are commercialized according to the company’s quality requirements. </li></ul><p> </p><h3>Profile</h3><ul><li>Master’s degree in life sciences (ideally molecular biology) or equivalent experience,</li><li>Understanding of molecular diagnostic markets (preferably genetics or pathology),</li><li>Customer-oriented and problem-solving,</li><li>Motivated to work in - and being flexible to adapt to an entrepreneurial environment,</li><li>Multilingual (Dutch, English, French),</li><li>Prepared to travel on a regular basis (up to 25%, in Europe).</li></ul><p> </p><h3>We offer</h3><p>Multiplicom offers a competitive compensation and benefits package, and strong leadership commitment to individual learning and personal development in an entrepreneurial environment.</p><p> </p><h3>How to apply? </h3> <div> </div> </div>',
- # entities
- '< & © ® £ >',
- # no newline
- 'Hello<br/>World',
- # OK - bytes converted to string/unicode using cp1250
- b'\x92'.decode('cp1250'),
- # OK - incorrect code converted to bytes
- '\x92'.encode('raw_unicode_escape').decode('cp1250'),
- # error - incorrect unicode string when encode('cp1250'),
- '\x92',
- ]
- from html.parser import HTMLParser
- class MLStripper(HTMLParser):
- def __init__(self):
- self.reset()
- self.strict = False
- self.convert_charrefs= True
- self.fed = []
- def handle_data(self, d):
- self.fed.append(d)
- def get_data(self):
- return ''.join(self.fed)
- def strip_tags(html):
- s = MLStripper()
- s.feed(html)
- return s.get_data()
- for row in data:
- print(strip_tags(row))
- try:
- # simulate print() in Windows where cmd.exe uses 'cp1250'
- print(strip_tags(row).encode('cp1250'))
- except Exception as e:
- print('ERROR:', e)
- print('-----')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement