Source code for meteoserver.weatherforecast

# -*- coding: utf-8 -*-
#  Copyright (c) 2020-2021  Marc van der Sluys - marc.vandersluys.nl
#  
#  This file is part of the Meteoserver Python package, containing a Python module to obtain and read Dutch
#  weather data from Meteoserver.nl.  See: https://github.com/MarcvdSluys/Meteoserver
#  
#  This is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License
#  as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
#  
#  This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
#  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
#  
#  You should have received a copy of the GNU General Public License along with this code.  If not, see
#  <http://www.gnu.org/licenses/>.


"""
   Functions to obtain, read and write 2 (HARMONIE) or 4-10 (GFS) day hourly weather-forecast
   ("Uurverwachting") data from Meteoserver.nl.
"""


import pandas as pd
import json
import requests
import sys




[docs]
def read_json_url_weatherforecast(key, location, model='GFS', full=False, loc=False, numeric=True):
    """Get hourly weather-forecast data from the Meteoserver server and return them as a dataframe.
    
    This uses the "Uurverwachting" Meteoserver API/data.
    
    Parameters:
        key (string):       The Meteoserver API key.
        location (string):  The name of the location (in the Netherlands) to obtain data for (e.g. 'De Bilt').
        model (string):     Weather model to use: 'HARMONIE' or 'GFS' (default: GFS)
    
                                - HARMONIE: use high-resolution HARMONIE model for BeNeLux and HiRLAM for the
                                  rest of Europe.  Hourly predictions up to 48 hours in advance.  New data
                                  available at 5:30, 11:30, 17:30 and 23:30 CE(S)T.
    
                                - GFS: use GFS model for BeNeLux.  Hourly predictions for 4 days, then
                                  three-hourly predictions for the next 10 days.  New data are available at
                                  0:30, 7:30, 12:30 and 18:30 CE(S)T.
    
        full (bool):        Return the full dataframe (currently 31 columns).  If false, obsolescent and duplicate 
                            (in non-SI units) columns are removed (currently, 22 columns are returned).  Default: False.
        loc (bool):         Return the location name as a second return value (default=False).
        numeric (bool):     Convert dataframe content from strings to numeric/datetime format (default=True).
                            Set this to False if you intend to write a JSON file that is (nearly) identical
                            to the original format.
    
    Returns:
        tuple (df, str):  Tuple containing (data, retLoc):
    
          - data (df):     Pandas dataframe containing forecast data for the specified location (or region).
          - retLoc (str):  The name of the location the data are for (only returned if loc=True - in this case,
            the two return values are returned as a tuple).

    """
    
    # Get online data and return a string containing the json file:
    if(model == 'GFS'):
        dataJSON = requests.get('https://data.meteoserver.nl/api/uurverwachting_gfs.php?locatie='+location+'&key='+key).text
    elif(model == 'HARMONIE'):
        dataJSON = requests.get('https://data.meteoserver.nl/api/uurverwachting.php?locatie='+location+'&key='+key).text
    else:
        print('read_json_url_weatherforecast(): error: unknown model: '+model+'; please choose between HARMONIE and GFS',
              file=sys.stderr)
        exit(1)
        
    # Convert the JSON 'file' to a dictionary with keys 'plaatsnaam' and 'data':
    dataDict = json.loads(dataJSON)  # Note: .loads(), not .load()!
    
    # Get the location name and forecast-data dataframe from the data dictionary:
    retLoc, data = extract_hourly_forecast_dataframes_from_dict(dataDict, numeric)
    
    if(not full):  # Remove obsolescent and duplicate columns:
        data = remove_unused_hourly_forecast_columns(data)
        
    if(loc):
        return data, retLoc
    else:
        return data




[docs]
def read_json_file_weatherforecast(fileJSON, full=False, loc=False, numeric=True):
    """Read a Meteoserver weather-forecast-data JSON file from disc and return the data as a dataframe.
    
    This uses the "Uurverwachting" Meteoserver data.
    
    Parameters:
        fileJSNO (string):  The name of the JSON file to read.
        full (bool):        Return the full dataframe (currently 31 columns).  If false, obsolescent and duplicate 
                            (in non-SI units) columns are removed (currently, 22 columns are returned).  Default: False.
        loc (bool):         Return the location name as a second return value (default=False).
        numeric (bool):     Convert dataframe content from strings to numeric/datetime format (default=True).
                            Set this to False if you intend to write a JSON file that is (nearly) identical
                            to the original format.
    
    Returns:
        tuple (df, str):   Tuple containing (data, location):
    
        - data (df):       Pandas dataframe containing forecast data for the specified location (or region).
        - location (str):  The name of the location the data are for (only returned if loc=True) - in this case, 
          the two return values are returned as a tuple).
    
    """
    
    with open(fileJSON) as dataJSON:
        # Convert the JSON 'file' to a dictionary with keys 'plaatsnaam' and 'data':
        dataDict = json.load(dataJSON)  # Note: .load(), not .loads()!
        
        # Get the location name and forecast-data dataframe from the data dictionary:
        location, data = extract_hourly_forecast_dataframes_from_dict(dataDict, numeric)
        
        if(not full):  # Remove obsolescent and duplicate columns:
            data = remove_unused_hourly_forecast_columns(data)

    if(loc):
        return data, location
    else:
        return data




[docs]
def extract_hourly_forecast_dataframes_from_dict(dataDict, numeric):
    """Extract the location and forecast-data Pandas dataframe from a data dictionary.
    
    Parameters:
        dataDict (dict):  The data dictionary to convert.
        numeric (bool):   Convert dataframe content from strings to numeric/datetime format (default=True).
                          Set this to False if you intend to write a JSON file that is (nearly) identical
                          to the original format.
    
    Returns:
        tuple (str, df):  Tuple containing (location, data):
        
          - location (str):  Location the data are for.
          - data (df):       Pandas dataframe containing forecast data for the specified location (or region).
    """
    
    # print(dataDict.keys())  # Dictionary with keys: ['plaatsnaam' and 'data']
    # print(type(dataDict['plaatsnaam']))  # List of 1 dict containing a location name
    # print(type(dataDict['data']), len(dataDict['data']))       # List with (152) forecasts
    
    # Create location string from list of dictionaries:
    location = pd.DataFrame.from_dict(dataDict['plaatsnaam']).plaats[0]  # List of dict -> df -> str
    
    # Create Pandas dataframe from list of dictionaries:
    data = pd.DataFrame.from_dict(dataDict['data'])
    
    # Convert df elements to numeric values:
    if(numeric):
        if('tijd'       in data.columns):  data['tijd']       = pd.to_numeric(data['tijd'],        errors='coerce')
        if('tijd_nl'    in data.columns):  data['tijd_nl']    = pd.to_datetime(data['tijd_nl'], format='%d-%m-%Y %H:%M', errors='coerce')
        if('offset'     in data.columns):  data['offset']     = pd.to_numeric(data['offset'],      errors='coerce')
        if('loc'        in data.columns):  data['loc']        = pd.to_numeric(data['loc'],         errors='coerce')
        if('temp'       in data.columns):  data['temp']       = pd.to_numeric(data['temp'],        errors='coerce')
        if('winds'      in data.columns):  data['winds']      = pd.to_numeric(data['winds'],       errors='coerce')
        if('windb'      in data.columns):  data['windb']      = pd.to_numeric(data['windb'],       errors='coerce')
        if('windknp'    in data.columns):  data['windknp']    = pd.to_numeric(data['windknp'],     errors='coerce')
        if('windkmh'    in data.columns):  data['windkmh']    = pd.to_numeric(data['windkmh'],     errors='coerce')
        if('windr'      in data.columns):  data['windr']      = pd.to_numeric(data['windr'],       errors='coerce')
        # data['windrltr']  =  pd.to_numeric(data['windrltr'])
        if('gust'       in data.columns):  data['gust']       = pd.to_numeric(data['gust'],        errors='coerce')
        if('gustb'      in data.columns):  data['gustb']      = pd.to_numeric(data['gustb'],       errors='coerce')
        if('gustkt'     in data.columns):  data['gustkt']     = pd.to_numeric(data['gustkt'],      errors='coerce')
        if('gustkmh'    in data.columns):  data['gustkmh']    = pd.to_numeric(data['gustkmh'],     errors='coerce')
        if('vis'        in data.columns):  data['vis']        = pd.to_numeric(data['vis'],         errors='coerce')
        if('neersl'     in data.columns):  data['neersl']     = pd.to_numeric(data['neersl'],      errors='coerce')
        if('luchtd'     in data.columns):  data['luchtd']     = pd.to_numeric(data['luchtd'],      errors='coerce')
        if('luchtdmmhg' in data.columns):  data['luchtdmmhg'] = pd.to_numeric(data['luchtdmmhg'],  errors='coerce')
        if('luchtdinhg' in data.columns):  data['luchtdinhg'] = pd.to_numeric(data['luchtdinhg'],  errors='coerce')
        if('rv'         in data.columns):  data['rv']         = pd.to_numeric(data['rv'],          errors='coerce')
        if('gr'         in data.columns):  data['gr']         = pd.to_numeric(data['gr'],          errors='coerce')
        if('hw'         in data.columns):  data['hw']         = pd.to_numeric(data['hw'],          errors='coerce')
        if('mw'         in data.columns):  data['mw']         = pd.to_numeric(data['mw'],          errors='coerce')
        if('lw'         in data.columns):  data['lw']         = pd.to_numeric(data['lw'],          errors='coerce')
        if('tw'         in data.columns):  data['tw']         = pd.to_numeric(data['tw'],          errors='coerce')
        if('cape'       in data.columns):  data['cape']       = pd.to_numeric(data['cape'],        errors='coerce')
        if('cond'       in data.columns):  data['cond']       = pd.to_numeric(data['cond'],        errors='coerce')
        if('ico'        in data.columns):  data['ico']        = pd.to_numeric(data['ico'],         errors='coerce')
    
    # print(type(location))
    # print(data)
    
    return location, data




[docs]
def remove_unused_hourly_forecast_columns(dataFrame):
    """Remove the (probably) unused columns from a weather-forecast dataframe.
    
    This removes the following columns (if they exist):
      - obsolescent 'loc' column.
      - wind speed/force 'windb' (Beaufort), 'windknp' (knots) and 'windkmh' (km/h) columns, which can be computed 
        from SI 'winds' (m/s) column.
      - wind gust columns: 'gustb' (Beaufort), 'gustkt' (knots) and 'gustkmh', which can be computed from SI 'gust' 
        column (m/s).
      - air-pressure columns: 'luchtdmmhg' and 'luchtdinhg', which can be computed from SI luchtd (hPa/mbar).
    
    The number of columns is reduced from 27 to 21 for HARMONIE data, and from 31 to 22 for GFS data.
    
    
    Parameters:
        dataFrame (df):  Original Pandas dataframe.
    
    Returns:
        dataFrame (df):  Pruned Pandas dataframe.
    """
    
    # Obsolescent 'loc' column:
    if('loc' in dataFrame.columns):
        del dataFrame['loc']
        
    # Remove 'windb' (Beaufort), 'windknp' (knots) and 'windkmh' columns, as they can be computed from SI winds (m/s):
    if('windb' in dataFrame.columns):
        del dataFrame['windb']
    if('windknp' in dataFrame.columns):
        del dataFrame['windknp']
    if('windkmh' in dataFrame.columns):
        del dataFrame['windkmh']
    
    # Remove 'gustb' (Beaufort), 'gustkt' (knots) and 'gustkmh' columns, as they can be computed from SI gust (m/s):
    if('gustb' in dataFrame.columns):
        del dataFrame['gustb']
    if('gustkt' in dataFrame.columns):
        del dataFrame['gustkt']
    if('gustkmh' in dataFrame.columns):
        del dataFrame['gustkmh']
    
    # Remove 'luchtdmmhg' and 'luchtdinhg' columns, as they can be computed from (~SI) luchtd (hPa/mbar):
    if('luchtdmmhg' in dataFrame.columns):
        del dataFrame['luchtdmmhg']
    if('luchtdinhg' in dataFrame.columns):
        del dataFrame['luchtdinhg']

    return dataFrame




[docs]
def write_json_file_weatherforecast(fileName, location, data):
    """Write a Meteoserver weather-forecast-data JSON file to disc.
    
    The resulting file has the same format as a downloaded file (barring some spacing).
    
    Parameters:
        fileName (string):  The name of the JSON file to write.
        location (string):  The location the data are for.
        data (df):          Pandas dataframe containing forecast data for the specified location (or region).
    """
    
    # Convert location string into a dict:
    locationDict = {}
    locationDict['plaats'] = location
    
    # Convert data dataframe into a dict:
    dataDict = data.to_dict(orient='records')
    
    # Put the dicts into an enveloping dict:
    fileJSON = {}
    
    # Add the location:
    fileJSON['plaatsnaam'] = []
    fileJSON['plaatsnaam'].append(locationDict)
    
    # Add the data:
    fileJSON['data'] = dataDict
    
    # Write the resulting dictionary to a json file:
    fileJSON = json.dumps(fileJSON, indent=None, separators=(',',':'), default=str)  # Create a JSON string, even with non-serialisable Timestamps - https://stackoverflow.com/a/36142844/1386750.  This adds " and ecapes existing ones.
    outFile = open(fileName,'w')
    outFile.write(fileJSON)
    outFile.close()
    return


    # with open(fileName, 'w') as outFile:
    #     json.dump(fileJSON, outFile)
    # 
    # return