from HTMLParser import HTMLParser class ValueParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.inside = False self.data = [] def handle_starttag(self, tag, attrs): if tag == "p" and (("class", "valuable information") in attrs): self.inside = True def handle_data(self, data): if self.inside: self.data.append(data) def handle_endtag(self, tag): if tag == "p": self.inside = False def get_valuables(url): #import urllib #f = urllib.urlopen("some.html") data = open("some.html", "r") v = ValueParser() for line in data: v.feed(line) return v.data[:] if __name__ == "__main__": print get_valuables("blabla")