1
1
#!/usr/bin/env python
2
+ # -*- coding=utf-8 -*-
2
3
3
4
import sys
4
5
import re
5
6
import os
6
7
import argparse
7
8
import requests
8
- from lxml import html
9
+ from lxml import html as lxml_html
10
+
11
+ try :
12
+ import html
13
+ except ImportError :
14
+ import HTMLParser
15
+ html = HTMLParser .HTMLParser ()
9
16
10
17
try :
11
18
import cPickle as pk
@@ -23,7 +30,7 @@ def get_problems_info(self):
23
30
indexs = re .findall (r'<td>(\d+)</td>' , cm )
24
31
problem_urls = ['https://leetcode.com' + url \
25
32
for url in re .findall (
26
- r'href="(/problems/.+?)"' , cm )]
33
+ r'<a href="(/problems/.+?)"' , cm )]
27
34
levels = re .findall (r"<td value='\d*'>(.+?)</td>" , cm )
28
35
tinfos = zip (indexs , levels , problem_urls )
29
36
infos = []
@@ -32,11 +39,14 @@ def get_problems_info(self):
32
39
if not res .ok :
33
40
print ('request error' )
34
41
sys .exit ()
35
- tree = html .fromstring (res .text )
42
+ tree = lxml_html .fromstring (res .text )
36
43
title = tree .xpath ('//meta[@property="og:title"]/@content' )[0 ]
37
- description = tree .xpath ('//meta[@property="og:description"]/@content' )[0 ]
38
- if self .args .rm_blank :
39
- description = re .sub (r'\n+' , r'\n' , description )
44
+ description = tree .xpath ('//meta[@property="description"]/@content' )
45
+ if not description :
46
+ description = tree .xpath ('//meta[@property="og:description"]/@content' )[0 ]
47
+ else :
48
+ description = description [0 ]
49
+ description = html .unescape (description .strip ())
40
50
tags = tree .xpath ('//div[@id="tags"]/following::a[@class="btn btn-xs btn-primary"]/text()' )
41
51
infos .append (
42
52
{
@@ -71,6 +81,8 @@ def to_text(self, pm_infos):
71
81
'{description}\n ' + '\n ' * self .args .line
72
82
text = ''
73
83
for info in infos :
84
+ if self .args .rm_blank :
85
+ info ['description' ] = re .sub (r'[\n\r]+' , r'\n' , info ['description' ])
74
86
text += text_template .format (** info )
75
87
76
88
with open ('leecode problems.txt' , 'w' ) as g :
0 commit comments