Skip to content

Commit 85edb51

Browse files
committed
Fix search by id using the htmlparser to find the title tag
1 parent 305753e commit 85edb51

File tree

3 files changed

+14
-11
lines changed

3 files changed

+14
-11
lines changed

howlongtobeatpy/howlongtobeatpy/HTMLRequests.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
import re
55
import html
66
import json
7-
from bs4 import BeautifulSoup
87
from enum import Enum
8+
from bs4 import BeautifulSoup
99
import aiohttp
1010
import requests
1111
from fake_useragent import UserAgent
@@ -100,7 +100,7 @@ def send_web_request(game_name: str, search_modifiers: SearchModifiers = SearchM
100100
api_key_result = HTMLRequests.send_website_request_getcode(True)
101101
# Make the post request and return the result if is valid
102102
search_url_with_key = HTMLRequests.SEARCH_URL + "/" + api_key_result
103-
resp = requests.post(search_url_with_key, headers=headers, data=payload)
103+
resp = requests.post(search_url_with_key, headers=headers, data=payload, timeout=60)
104104
if resp.status_code == 200:
105105
return resp.text
106106
return None
@@ -129,7 +129,7 @@ async def send_async_web_request(game_name: str, search_modifiers: SearchModifie
129129
return None
130130

131131
@staticmethod
132-
def __cut_game_title(game_title: str):
132+
def __cut_game_title(page_source: str):
133133
"""
134134
Function that extract the game title from the html title of the howlongtobeat page
135135
@param game_title: The HowLongToBeat page title of the game
@@ -138,12 +138,15 @@ def __cut_game_title(game_title: str):
138138
(So, in this example: "A Way Out")
139139
"""
140140

141-
if game_title is None or len(game_title) == 0:
141+
if page_source is None or len(page_source) == 0:
142142
return None
143143

144-
title = re.search("<title>(.*)<\/title>", game_title)
144+
soup = BeautifulSoup(page_source, 'html.parser')
145+
title_tag = soup.title
146+
title_text = title_tag.string
147+
145148
# The position of start and end of this method may change if the website change
146-
cut_title = str(html.unescape(title.group(1)[12:-17]))
149+
cut_title = title_text[12:-17].strip()
147150
return cut_title
148151

149152
@staticmethod
@@ -183,7 +186,7 @@ def get_game_title(game_id: int):
183186
headers = HTMLRequests.get_title_request_headers()
184187

185188
# Request and extract title
186-
contents = requests.get(HTMLRequests.GAME_URL, params=params, headers=headers)
189+
contents = requests.get(HTMLRequests.GAME_URL, params=params, headers=headers, timeout=60)
187190
return HTMLRequests.__cut_game_title(contents.text)
188191

189192
@staticmethod
@@ -213,7 +216,7 @@ def send_website_request_getcode(parse_all_scripts: bool):
213216
"""
214217
# Make the post request and return the result if is valid
215218
headers = HTMLRequests.get_title_request_headers()
216-
resp = requests.get(HTMLRequests.BASE_URL, headers=headers)
219+
resp = requests.get(HTMLRequests.BASE_URL, headers=headers, timeout=60)
217220
if resp.status_code == 200 and resp.text is not None:
218221
# Parse the HTML content using BeautifulSoup
219222
soup = BeautifulSoup(resp.text, 'html.parser')
@@ -225,7 +228,7 @@ def send_website_request_getcode(parse_all_scripts: bool):
225228
matching_scripts = [script['src'] for script in scripts if '_app-' in script['src']]
226229
for script_url in matching_scripts:
227230
script_url = HTMLRequests.BASE_URL + script_url
228-
script_resp = requests.get(script_url, headers=headers)
231+
script_resp = requests.get(script_url, headers=headers, timeout=60)
229232
if script_resp.status_code == 200 and script_resp.text is not None:
230233
pattern = r'"/api/search/".concat\("([a-zA-Z0-9]+)"\)'
231234
matches = re.findall(pattern, script_resp.text)

howlongtobeatpy/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
long_description = fh.read()
55

66
setup(name='howlongtobeatpy',
7-
version='1.0.8',
7+
version='1.0.9',
88
packages=find_packages(exclude=['tests']),
99
description='A Python API for How Long to Beat',
1010
long_description=long_description,

sonar-project.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ sonar.organization=scrappycocco-github
22
sonar.projectKey=ScrappyCocco_HowLongToBeat-PythonAPI
33

44
sonar.projectName=HowLongToBeat-PythonAPI
5-
sonar.projectVersion=1.0.8
5+
sonar.projectVersion=1.0.9
66
sonar.python.version=3.9
77

88
# Path is relative to the sonar-project.properties file. Replace "\" by "/" on Windows.

0 commit comments

Comments
 (0)