Vote count:
0
i using python 3.3.4 and try to get content of web page. but i have a error. my sample site is:
whene i write content to file, everything is good but when i print data, i give an error
my error:
UnicodeEncodeError: 'charmap' codec can't encode characters in position 341-342:
character maps to <undefined>
and my code:
#!/usr/bin/python
# -*- coding: utf-8 -*-
import codecs
import urllib.request
import re
from sys import argv
__author__ = 'Moein'
totalPages = 100
UrlList = []
emails = []
numbers = []
toUtf8 = codecs.getencoder('UTF8')
class Robot:
def __init__(self):
self.getURLContent(UrlList.pop(0))
pass
def getURLContent(self, url):
request = urllib.request.Request(url)
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
print(content)
f = open('f:/1.txt', 'w', encoding='utf-8')
f.write(content);
f.close()
pass
def getPageURLs(self, page):
pass
def getPageNumbers(self, page):
pass
def getPageEmails(self, page):
r = re.compile(r'[^@]+@[^@]+\.[^@]+')
return r.findall(page)
pass
# get arg
arg_names = ['command', 'x', 'y', 'operation', 'option']
args = dict(zip(arg_names, argv))
# set arg to list
UrlList.append(args['x'])
robot = Robot()
asked 1 min ago
python 3.4 get content of page - charmap error
Aucun commentaire:
Enregistrer un commentaire