Исходный код soika.src.utils.data_getter.street_getter
importreimportgeopandasasgpdimportpandasaspdimportwarningsfromsoika.src.utils.data_getter.geo_data_getterimportGeoDataGetterasdgfromsoika.src.utils.data_preprocessingimportpreprocessorasppfromsoika.src.utils.constantsimportTOPONYM_PATTERNwarnings.simplefilter("ignore")warnings.filterwarnings("ignore",category=DeprecationWarning)classStreetsError(Exception):"""Custom exception for streets-related errors"""pass
[документация]classStreets:""" This class encapsulates functionality for retrieving street data for a specified city from OSM and processing it to extract useful information for geocoding purposes. """
[документация]@staticmethoddefget_street_names(gdf:gpd.GeoDataFrame)->pd.DataFrame:""" Method extracts the unique street names from a GeoDataFrame of street segments. """# Streets.logger.info("Extracting street names")names=set(gdf["name"].explode().dropna())df_streets=pd.DataFrame(names,columns=["street"])# Streets.logger.debug(f"Street names extracted: {df_streets}")returndf_streets
[документация]@staticmethoddeffind_toponim_words_from_name(x:str)->str:""" A method to find toponim words from the given name string. Args: x (str): The input name string. Returns: str: The found toponim word from the input name string, or None if not found. """# Streets.logger.debug(f"Finding toponim words in {x}")pattern=re.compile(TOPONYM_PATTERN,re.IGNORECASE,)match=pattern.search(x)ifmatch:returnmatch.group().strip().lower()else:returnNone
[документация]@staticmethoddefdrop_words_from_name(x:str)->str:""" This function drops parts of street names that are not the name of the street (e.g. avenue). """# Streets.logger.debug(f"Dropping words from {x}")try:lst=re.split(TOPONYM_PATTERN,x,)lst.remove("")returnlst[0].strip().lower()exceptValueError:returnx
[документация]@staticmethoddefclear_names(streets_df:pd.DataFrame)->pd.DataFrame:""" This function pre-process the street names from the OSM. This step is necessary to match recognised street addresses later. We need to do this match because Nominatim is very sensitive geocoder and requires almost exact match between addresses in the OSM database and the geocoding address. """# Streets.logger.info("Clearing street names")streets_df["toponim_name"]=streets_df["street"].map(Streets.find_toponim_words_from_name)streets_df["street_name"]=streets_df["street"].map(Streets.drop_words_from_name)# Streets.logger.debug(f"Street names cleared: {streets_df}")returnstreets_df
[документация]@staticmethoddefrun(osm_id:int)->pd.DataFrame:""" A static method to run the process of getting street data based on the given OSM id, returning a pandas DataFrame. """# Streets.logger.info(f"Running street data retrieval for osm_id {osm_id}")city_bounds=dg.get_city_bounds(osm_id)streets_graph=dg.get_drive_graph(city_bounds)streets_gdf=pp.graph_to_gdf(streets_graph)streets_df=Streets.get_street_names(streets_gdf)streets_df=Streets.clear_names(streets_df)# Streets.logger.info(f"Street data retrieval complete: {streets_df}")returnstreets_df