import codecs file = codecs.open("sample.html", "r", "utf-8") print(file.read())

Read more of this post