From de0a783ecf104dcdae4d8dcdfa4204c1b43a495a Mon Sep 17 00:00:00 2001 From: Richard Chien Date: Fri, 13 Jan 2017 12:00:39 +0800 Subject: [PATCH] Add cache and NLP for weather --- commands/weather.py | 140 +++++++++++++++++++++++---------------- nl_processor.py | 1 + nl_processors/weather.py | 48 ++++++++++++++ 3 files changed, 131 insertions(+), 58 deletions(-) create mode 100644 nl_processors/weather.py diff --git a/commands/weather.py b/commands/weather.py index 00c83e2b..883e770f 100644 --- a/commands/weather.py +++ b/commands/weather.py @@ -1,12 +1,13 @@ import os -from datetime import datetime +import json +import sqlite3 +from datetime import datetime, timedelta import requests -import jieba from command import CommandRegistry, split_args from commands import core -from little_shit import get_source +from little_shit import get_source, get_db_dir, get_tmp_dir from interactive import * __registry__ = cr = CommandRegistry() @@ -14,9 +15,7 @@ __registry__ = cr = CommandRegistry() _api_key = os.environ.get('HEWEATHER_API_KEY') _base_api_url = 'https://free-api.heweather.com/v5' _search_api_url = _base_api_url + '/search' -_forecast_api_url = _base_api_url + '/forecast' -_now_api_url = _base_api_url + '/now' -_suggestion_api_url = _base_api_url + '/suggestion' +_detail_api_url = _base_api_url + '/weather' _cmd_weather = 'weather.weather' _cmd_suggestion = 'weather.suggestion' @@ -34,21 +33,19 @@ def weather(args, ctx_msg, allow_interactive=True): return _do_interactively(_cmd_weather, weather, args, ctx_msg, source) city_id = args[0] - session = requests.Session() - params = {'city': city_id, 'key': _api_key} text = '' - # Get real-time weather - data_now = session.get(_now_api_url, params=params).json() - if data_now and 'HeWeather5' in data_now and data_now['HeWeather5'][0].get('status') == 'ok': - now = data_now['HeWeather5'][0]['now'] - text += '实时:\n%s,气温%s˚C,体感温度%s˚C,%s%s级,能见度%skm' \ - % (now['cond']['txt'], now['tmp'], now['fl'], now['wind']['dir'], now['wind']['sc'], now['vis']) + data = _get_weather(city_id) + if data: + text += '%s天气\n更新时间:%s' % (data['basic']['city'], data['basic']['update']['loc']) - # Get forecast - data_forecast = session.get(_forecast_api_url, params=params).json() - if data_forecast and 'HeWeather5' in data_forecast and data_forecast['HeWeather5'][0].get('status') == 'ok': - daily_forecast = data_forecast['HeWeather5'][0]['daily_forecast'] + now = data['now'] + aqi = data['aqi']['city'] + text += '\n\n实时:\n%s,气温%s˚C,体感温度%s˚C,%s%s级,能见度%skm,空气质量指数:%s,%s,PM2.5:%s,PM10:%s' \ + % (now['cond']['txt'], now['tmp'], now['fl'], now['wind']['dir'], now['wind']['sc'], now['vis'], + aqi['aqi'], aqi['qlty'], aqi['pm25'], aqi['pm10']) + + daily_forecast = data['daily_forecast'] text += '\n\n预报:\n' for forecast in daily_forecast: @@ -64,7 +61,6 @@ def weather(args, ctx_msg, allow_interactive=True): text += '降雨概率%s%%' % forecast['pop'] text += '\n' - text = text.rstrip() if text: core.echo(text, ctx_msg) else: @@ -81,14 +77,11 @@ def suggestion(args, ctx_msg, allow_interactive=True): return _do_interactively(_cmd_suggestion, suggestion, args, ctx_msg, source) city_id = args[0] - session = requests.Session() - params = {'city': city_id, 'key': _api_key} text = '' - # Get suggestion - data_suggestion = session.get(_suggestion_api_url, params=params).json() - if data_suggestion and 'HeWeather5' in data_suggestion and data_suggestion['HeWeather5'][0].get('status') == 'ok': - data = data_suggestion['HeWeather5'][0]['suggestion'] + data = _get_weather(city_id) + if data: + data = data['suggestion'] text += '生活指数:\n\n' \ '舒适度:%s\n\n' \ '洗车指数:%s\n\n' \ @@ -122,42 +115,27 @@ def _do_interactively(command_name, func, args, ctx_msg, source): core.echo('你输入的城市不正确哦,请重新发送命令~', c) return True - prov = None - city = a[0] - # Try to split province and city if possible - tmp = jieba.lcut(city) - if len(tmp) == 2: - prov, city = tmp + city_list = _get_city_list(a[0]) - resp = requests.get(_search_api_url, params={ - 'city': city, - 'key': _api_key - }) - data = resp.json() - if resp.status_code == 200 and data and 'HeWeather5' in data: - city_list = data['HeWeather5'] - if city_list[0].get('status') != 'ok': - core.echo('没有找到你输入的城市哦,请重新发送命令~', c) - return True + if not city_list: + core.echo('没有找到你输入的城市哦,请重新发送命令~', c) + return True - if prov: - city_list = list(filter(lambda c: c['basic']['prov'] == prov, city_list)) + s.data['city_list'] = city_list - s.data['city_list'] = city_list + if len(city_list) == 1: + # Directly choose the first one + choose_city(s, ['1'], c) + return True - if len(city_list) == 1: - # Directly choose the first one - choose_city(s, ['1'], c) - return True - - # Here comes more than one city with the same name - core.echo( - '找到 %d 个重名城市,请选择你要查询的那个,发送它的序号:\n\n' % len(city_list) - + '\n'.join( - [str(i + 1) + '. ' + c['basic']['prov'] + c['basic']['city'] for i, c in enumerate(city_list)] - ), - c - ) + # Here comes more than one city with the same name + core.echo( + '找到 %d 个重名城市,请选择你要查询的那个,发送它的序号:\n\n' % len(city_list) + + '\n'.join( + [str(i + 1) + '. ' + c['prov'] + c['city'] for i, c in enumerate(city_list)] + ), + c + ) s.state += 1 @@ -172,7 +150,7 @@ def _do_interactively(command_name, func, args, ctx_msg, source): core.echo('你输入的序号超出范围了,请重新发送命令~', c) return True - city_id = city_list[choice]['basic']['id'] + city_id = city_list[choice]['id'] # sess.data['func']([city_id], c, allow_interactive=False) func([city_id], c, allow_interactive=False) return True @@ -189,3 +167,49 @@ def _do_interactively(command_name, func, args, ctx_msg, source): if _state_machines[command_name][sess.state](sess, args, ctx_msg): # Done remove_session(source, command_name) + + +_weather_db_path = os.path.join(get_db_dir(), 'weather.sqlite') + + +def _get_city_list(city_name): + city_name = city_name.lower() + if not os.path.exists(_weather_db_path): + resp = requests.get('http://7xo46j.com1.z0.glb.clouddn.com/weather.sqlite', stream=True) + with resp.raw as s, open(_weather_db_path, 'wb') as d: + d.write(s.read()) + + conn = sqlite3.connect(_weather_db_path) + cities = list(conn.execute( + 'SELECT code, name, province FROM city WHERE name = ? OR name_en = ? OR province || name = ?', + (city_name, city_name, city_name) + )) + return [{'id': x[0], 'city': x[1], 'prov': x[2]} for x in cities] + + +_weather_cache_dir = os.path.join(get_tmp_dir(), 'weather') + + +def _get_weather(city_id): + if not os.path.exists(_weather_cache_dir): + os.makedirs(_weather_cache_dir) + + file_name = city_id + '.json' + file_path = os.path.join(_weather_cache_dir, file_name) + if os.path.exists(file_path): + update_time = datetime.fromtimestamp(os.path.getmtime(file_path)) + if (datetime.now() - update_time) < timedelta(hours=1): + with open(file_path, 'r') as f: + data = json.load(f) + data['from_cache'] = True + return data + + data = requests.get(_detail_api_url, params={'city': city_id, 'key': _api_key}).json() + if data and 'HeWeather5' in data and data['HeWeather5'][0].get('status') == 'ok': + data = data['HeWeather5'][0] + with open(file_path, 'w') as f: + json.dump(data, f) + data['from_cache'] = False + return data + + return None diff --git a/nl_processor.py b/nl_processor.py index 72a26daa..08dde1cb 100644 --- a/nl_processor.py +++ b/nl_processor.py @@ -39,4 +39,5 @@ def parse_potential_commands(sentence): result = func(sentence, segmentation) if result: potential_commands.append(result) + print('可能的命令:', potential_commands) return potential_commands diff --git a/nl_processors/weather.py b/nl_processors/weather.py new file mode 100644 index 00000000..707c62fa --- /dev/null +++ b/nl_processors/weather.py @@ -0,0 +1,48 @@ +import re + +from nl_processor import as_processor + +_keywords = ('天气', '气温', '空气(质量)?', '温度', '多少度', '(风|雨|雪|冰雹|霜|雾|霾)') + + +def _match_keywords(word): + for regex in _keywords: + if re.match(regex, word): + return True + return False + + +@as_processor(keywords=_keywords) +def _processor(sentence, segmentation): + possibility = 100 + location_segs = list(filter(lambda x: x.flag == 'ns', segmentation)) + if not location_segs: + return None + + if len(location_segs) == 1: + # Just city name + city = location_segs[0].word.rstrip('市县区') + elif len(location_segs) == 2: + # Maybe has both province and city name + city = location_segs[0].word.rstrip('省') + location_segs[1].word.rstrip('市县区') + else: + # More than 3 location name, use the last one + city = location_segs[-1].word.rstrip('市县区') + + for seg in location_segs: + segmentation.remove(seg) + + for seg in segmentation: + # Scan over all segments and decrease possibility + if _match_keywords(seg.word): + continue + + flag = seg.flag + score_dict = {'v': -10, 'l': -8, 'n': -5, 'p': -3, 't': +3, 'other': -1} + for k, v in score_dict.items(): + if flag.startswith(k): + possibility += v + continue + possibility += score_dict['other'] + + return possibility, 'weather.weather', city, None