解析煎蛋图片

from html.parser import HTMLParser


class my_html_parser(HTMLParser):
    def __init__(self):
        HTMLParser.__init__(self)
        self.bprint = False
        self.imgs = []

    def handle_starttag(self, tag, attrs):
        # print('start:<%s>' % tag)
        self.bprint = False
        if tag is not 'a':
            img = ''
            for (var, value) in attrs:
                if var == 'href':
                    img = value
                if var == 'class' and value =https://my.oschina.net/IzumiHoshi/blog/='view_img_link':
                    # print('%s="%s"' %(var, value))
                    self.bprint = True
            if str(img).__len__() > 0 and self.bprint is True:
                self.imgs.append(img)

    def handle_startendtag(self, tag, attrs):
        pass
        # print('start_end:<%s>' % tag)

    def handle_endtag(self, tag):
        pass
        # print('end:<%s>' % tag)

    # def handle_data(self, data):
    #     if self.bprint is True:
    #         print('data:<%s>' % data)

            # def handle_comment(self, data):
            #     print('comment:<%s>' % data)
            #
            # def handle_entityref(self, name):
            #     print('entityref:<&%s>' % name)
            #
            # def handle_charref(self, name):
            #     print('charref:<&#%s>' % name)

    def print_img(self):
        for img in self.imgs:
            print(img)

if __name__ == '__main__':
    parser = my_html_parser()
    parser.feed('''<html>
    <head></head>
    <body>
    <!-- test html parser -->
        <p>Some <a href=https://my.oschina.net/"#\">html</a> HTML&nbsp;tutorial...<br>END</p>
    </body></html>''')
赞 (0) 评论 分享 ()