Source code for NECKAr_get_functions

#! /usr/bin/env python3
# This Python file uses the following encoding: utf-8

__author__ = 'jgeiss'


#############################################################################
# authors: Johanna Geiß, Heidelberg University, Germany                     #
# email: geiss@informatik.uni-heidelberg.de                                 #
# Copyright (c) 2017 Database Research Group,                               #
#               Institute of Computer Science,                              #
#               University of Heidelberg                                    #
#   Licensed under the Apache License, Version 2.0 (the "License");         #
#   you may not use this file except in compliance with the License.        #
#   You may obtain a copy of the License at                                 #
#                                                                           #
#   http://www.apache.org/licenses/LICENSE-2.0                              #
#                                                                           #
#   Unless required by applicable law or agreed to in writing, software     #
#   distributed under the License is distributed on an "AS IS" BASIS,       #
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.#
#   See the License for the specific language governing permissions and     #
#   limitations under the License.                                          #
#############################################################################
#      02.03.2017                                     #
#      last update 21.03.2017 bei Johanna Geiß        #
#######################################################
#This code find all the auxillary information which is to be stored with each entity of all listed categories

######################################################################################################
# Common Fields
######################################################################################################

[docs]def get_WDid(json_object): """ Gets Wikidata id of item :param json_object: entity object <class 'dict'> :return: id <class 'string'> """ id = json_object["id"] return id
[docs]def get_label(json_object, id): """Gets en label of item :param json_object: entity object <class 'dict'> :param id: Wikidata id <class 'string'> :return: norm_name: label <class 'string'> | None """ norm_name=None if "labels" in json_object: labels = json_object["labels"] #labels is a dictionary where the display name for each language is defined if "en" in labels: #default: use English label as normalized name norm_name = labels["en"]["value"] elif "en-gb" in labels: norm_name = labels["en-gb"]["value"] elif "en-ca" in labels: norm_name = labels["en-ca"]["value"] elif "de" in labels: #if English label does not exists use German norm_name = labels["de"]["value"] elif labels.values(): #norm_name=id #if no English or German label exists use id norm_name = list(labels.values())[0]["value"] else: norm_name = id else: norm_name = id return norm_name
[docs]def get_description(json_obejct): """Gets English description of an item :param json_obejct: entity object <class 'dict'> :return: description <class 'string'> | None """ description = None if "descriptions" in json_obejct: if "en" in json_obejct["descriptions"]: description = json_obejct["descriptions"]["en"]["value"] return description
################################################################################################################# # Common fields written ################################################################################################################# ################################################################################################################# # Specific Fields for each neClass ################################################################################################################# ######################### #Person #########################
[docs]def get_datebirth(json_object): """Gets date of birth of a person This function calls the functions get_datelife with thh property P569 :param json_object: entity object <class 'dict'> :return: date ob birth <class 'string'> | None """ return get_datelife(json_object, "P569")
[docs]def get_datedeath(json_object): """Gets date of death of a person This function calls the functions get_datelife with thh property P570 :param json_object: entity object <class 'dict'> :return: date ob death <class 'string'>| None """ return get_datelife(json_object, "P570")
[docs]def get_datelife(json_object, P): """Gets dates for person this only returns a date if it is an explicit date of birth or dead, no latest possible date(P1326) or range (befor/after) :param json_object: ntity object <class 'dict'> :param P: Property (defines if date of birth (P569) or date of death (P570) is searched) :return: date <class 'string'> | None """ value = None if "claims" in json_object: # if claims are available for the item claims = json_object["claims"] if P in claims: #if "claims" contain the property P569= date of birth for props in claims[P]: if props["mainsnak"]["property"] == P: if "datavalue" in props["mainsnak"]: after = props["mainsnak"]["datavalue"]["value"]["after"] before = props["mainsnak"]["datavalue"]["value"]["before"] if after == before == 0: value = props["mainsnak"]["datavalue"]["value"]["time"] # prefix = value[0] # if prefix == "-": # prefix = "BC" # else: # prefix = "" # # cleanedvalue = value[1:].lstrip("0").split("T")[0] # cv = cleanedvalue.split("-")[0] # if cv == "": # cv = 0 # #todo create ISOdate # year = "%04i" % int(cv) # month = cleanedvalue.split("-")[1] # day = cleanedvalue.split("-")[2] # value = prefix + year + "-" + month + "-" + day return value
gender_dict = {6581097: "male", 6581072: "female", 1097630: "intersex", 303479: "hermaphrodite", 189125: "transgender", 1052281: "transgender female", 2449503: "transgender male", 48270: "genderqueer", 1399232: "fa-afafine", 3277905: "mahu", 746411: "kathoey", 350374: "fakaleiti", 660882: "hijra"}
[docs]def get_gender(json_object): """ Gets gender of person ("P21") :param json_object: entity object <class 'dict'> :return: gender <class 'string'>| <class 'int'> | None """ gender_id=None gender=None if "claims" in json_object: # if claims are available for the item claims = json_object["claims"] if "P21" in claims: for props in claims["P21"]: if props["mainsnak"]["property"] == "P21": if "datavalue" in props["mainsnak"]: gender_id = props["mainsnak"]["datavalue"]["value"]["numeric-id"] gender=gender_dict.get(gender_id,gender_id) return gender
[docs]def get_occupation(json_object): """Gets occupation of person ("P106") :param json_object: entity object <class 'dict'> :return: occupation <class 'string'> """ occupation=[] if "claims" in json_object: # if claims are available for the item claims = json_object["claims"] if "P106" in claims: for props in claims["P106"]: if props["mainsnak"]["property"] == "P106": if "datavalue" in props["mainsnak"]: occupation.append(props["mainsnak"]["datavalue"]["value"]["numeric-id"]) return occupation
[docs]def get_alias_list(json_object): """Gets aliases and other language labels of a person :param json_object: entity object <class 'dict'> :return: aliaslist (list of strings) """ aliaslist = set([]) if "labels" in json_object: labels = json_object["labels"] for lang in labels: #get language labels alias = labels[lang]["value"] if len(alias) > 125: alias = alias[:125] aliaslist.add(alias) if "aliases" in json_object: aliases = json_object["aliases"] #get language aliases for lang in aliases: for aliasentry in aliases[lang]: alias = aliasentry["value"] if len(alias) > 124: alias = alias[:124] aliaslist.add(alias) return list(aliaslist)
################################################################################################# ######################### #Location #########################
[docs]def get_location_inside(json_object): """gets the id of the country (P17) and or continent (P30) the enities is in :param json_object: entity object <class 'dict'> :return: tuple of id of country (in_country) and id of continent (in_continent) (both lists of int) """ in_country=[] in_continent=[] if "claims" in json_object: # if claims are available for the item claims = json_object["claims"] if "P17" in claims: #Claims for country for p17entry in claims["P17"]: if "datavalue" in p17entry["mainsnak"]: in_country.append(p17entry["mainsnak"]["datavalue"]["value"]["numeric-id"]) if "P30" in claims: #Claims for continent for p30entry in claims["P30"]: if "datavalue" in p30entry["mainsnak"]: in_continent.append(p30entry["mainsnak"]["datavalue"]["value"]["numeric-id"]) return in_country, in_continent
[docs]def get_poi(json_object,country_subclass,settlement_subclass, city_subclass, sea_subclass, river_subclass, mountain_subclass, mountainr_subclass, state_subclass,hgte_subclass): """ gets the location type of the entity :param json_object: entity object <class 'dict'> :param country_subclass: list of all country (Q6256, Q3624078, Q1763527) subclasses :param settlement_subclass: list of all settlement (Q486972) subclasses :param city_subclass: list of all city (Q515) subclasses :param sea_subclass: list of all sea (Q165) subclasses :param river_subclass: list of all river (Q4022) subclasses :param mountain_subclass: list of all mountain (Q8502) subclasses :param mountainr_subclass: list of all mountain range (Q41437459) subclasses :param state_subclass: list of all state (Q7275) subclasses :param hgte_subclass: list of all human geographic territorail entity (Q15642541) subclasses :return: list of types (list of strings) """ poi=[] #subdirectory='subclass_all_loc' if "claims" in json_object: # if claims are available for the item claims = json_object["claims"] if "P31" in claims: for p31entry in json_object["claims"]["P31"]: if "datavalue" in p31entry["mainsnak"]: if p31entry["mainsnak"]["datavalue"]["value"]["numeric-id"] == 5107: #continent poi.append("Continent") if p31entry["mainsnak"]["datavalue"]["value"]["numeric-id"] in country_subclass:\ #write_functions.location_type_object(subdirectory,'subclass_country.txt'): #country poi.append("Country") if p31entry["mainsnak"]["datavalue"]["value"]["numeric-id"] in settlement_subclass: #settlement #write_functions.location_type_object(subdirectory,'subclass_city.txt'): #city poi.append("Settlement") if p31entry["mainsnak"]["datavalue"]["value"]["numeric-id"] in city_subclass: #write_functions.location_type_object(subdirectory,'subclass_city.txt'): poi.append("City") if p31entry["mainsnak"]["datavalue"]["value"]["numeric-id"] in sea_subclass: #Sea poi.append("Sea") if p31entry["mainsnak"]["datavalue"]["value"]["numeric-id"] in river_subclass: #write_functions.location_type_object(subdirectory,'subclass_river.txt'): #river poi.append("River") if p31entry["mainsnak"]["datavalue"]["value"]["numeric-id"] in mountain_subclass: #write_functions.location_type_object(subdirectory,'subclass_mountain.txt'): #mountain poi.append("Mountain") if p31entry["mainsnak"]["datavalue"]["value"]["numeric-id"] in mountainr_subclass: #write_functions.location_type_object(subdirectory,'subclass_mountainrange.txt'): #mountain range poi.append("Mountain Range") if p31entry["mainsnak"]["datavalue"]["value"]["numeric-id"] in state_subclass: #write_functions.location_type_object(subdirectory,'subclass_state.txt'): #State poi.append("State") #if p31entry["mainsnak"]["datavalue"]["value"]["numeric-id"] in POI_subclass: #write_functions.location_type_object(subdirectory,'subclass_location.txt'): #POI #poi.append("POI") if p31entry["mainsnak"]["datavalue"]["value"]["numeric-id"] in hgte_subclass: #write_functions.location_type_object(subdirectory,'subclass_human-geographic-territorial-entity.txt'): #human_geo_territory poi.append("human_geographic_territorial_entity") poi=set(poi) return list(poi)
[docs]def get_coordinate(json_object): """gets coordinate location of entity :param json_object: entity object <class 'dict'> :return: coordinate [long (int), lat (int)] | None """ coordinate=None if "claims" in json_object: # if claims are available for the item claims = json_object["claims"] if "P625" in claims: if "datavalue" in json_object["claims"]["P625"][0]["mainsnak"]: long = json_object["claims"]["P625"][0]["mainsnak"]["datavalue"]["value"]["longitude"] lat = json_object["claims"]["P625"][0]["mainsnak"]["datavalue"]["value"]["latitude"] coordinate=[long, lat] #coordinate.append(str(json_object["claims"]["P625"][0]["mainsnak"]["datavalue"]\ #["value"]["latitude"]) +" "+ str(json_object["claims"]["P625"][0]["mainsnak"]\ #["datavalue"]["value"]["longitude"])) return coordinate
[docs]def get_population(json_object): """gets population of entity :param json_object: entity object <class 'dict'> :return: population (int) | None """ population=None if "claims" in json_object: # if claims are available for the item claims = json_object["claims"] if "P1082" in claims: if "datavalue" in json_object["claims"]["P1082"][0]["mainsnak"]: amount=json_object["claims"]["P1082"][0]["mainsnak"]["datavalue"]["value"]["amount"] amount=amount[1:] unit=json_object["claims"]["P1082"][0]["mainsnak"]["datavalue"]["value"]["unit"] if (amount.isdigit() and unit.isdigit()): population=int(amount)*int(unit) return population
[docs]def get_geonamesID(json_object): """gets geonames ID of entity :param json_object: entity object <class 'dict'> :return: geonames ID (string)| None """ geonamesID=None if "claims" in json_object: # if claims are available for the item claims = json_object["claims"] if "P1566" in claims: for props in claims["P1566"]: if "datavalue" in props["mainsnak"]: geonamesID=props["mainsnak"]["datavalue"]["value"] return geonamesID
################################################################################################# ######################### #Organization #########################
[docs]def get_official_language(json_object): """ Gets official language (P37) of object :param json_object: entity object <class 'dict'> :return: official language (int) | None """ official_language=[] if "claims" in json_object: # if claims are available for the item claims = json_object["claims"] if "P37" in claims: for props in claims["P37"]: if "datavalue" in props["mainsnak"]: official_language.append(props["mainsnak"]["datavalue"]["value"]["numeric-id"]) return official_language
[docs]def get_inception(json_object): """Gets date of inception (P571) :param json_object: entity object <class 'dict'> :return: date of inception (string) | None """ inception=None if "claims" in json_object: # if claims are available for the item claims = json_object["claims"] if "P571" in claims: inception=get_datelife(json_object, "P571") return inception
[docs]def get_hq_location(json_object): """Gets HQ Location (P159) :param json_object: entity object <class 'dict'> :return: hq_location (int) | None """ hq_location=None if "claims" in json_object: # if claims are available for the item claims = json_object["claims"] if "P159" in claims: for props in claims["P159"]: if "datavalue" in props["mainsnak"]: hq_location=props["mainsnak"]["datavalue"]["value"]["numeric-id"] return hq_location
[docs]def get_official_website(json_object): """Gets official website (P856) :param json_object: entity object <class 'dict'> :return: official_website (string) | None """ official_website=None if "claims" in json_object: # if claims are available for the item claims = json_object["claims"] if "P856" in claims: for props in claims["P856"]: if "datavalue" in props["mainsnak"]: official_website=props["mainsnak"]["datavalue"]["value"] return official_website
[docs]def get_founder(json_object): """Gets founder (P112) :param json_object: entity object <class 'dict'> :return: founder (list of int) """ founder=[] if "claims" in json_object: # if claims are available for the item claims = json_object["claims"] if "P112" in claims: for props in claims["P112"]: if "datavalue" in props["mainsnak"]: founder.append(props["mainsnak"]["datavalue"]["value"]["numeric-id"]) return founder
[docs]def get_ceo(json_object): """Gets CEO (P169) :param json_object: entity object <class 'dict'> :return: ceo (list of int) """ ceo=[] if "claims" in json_object: # if claims are available for the item claims = json_object["claims"] if "P169" in claims: for props in claims["P169"]: if "datavalue" in props["mainsnak"]: ceo.append(props["mainsnak"]["datavalue"]["value"]["numeric-id"]) return ceo
[docs]def get_country(json_object): """Gets country (P17) :param json_object: entity object <class 'dict'> :return: country (list of int) """ country=[] if "claims" in json_object: # if claims are available for the item claims = json_object["claims"] if "P17" in claims: #Claims for country for p17entry in claims["P17"]: if "datavalue" in p17entry["mainsnak"]: country.append(p17entry["mainsnak"]["datavalue"]["value"]["numeric-id"]) return country
[docs]def get_instance_of(json_object): """Gets the id of the organization type (instance of) (P131) :param json_object: entity object <class 'dict'> :return: instance_of (list of int) """ instance_of=[] if "claims" in json_object: # if claims are available for the item claims = json_object["claims"] if "P31" in claims: #Claims for country for props in claims["P31"]: if "datavalue" in props["mainsnak"]: instance_of.append(props["mainsnak"]["datavalue"]["value"]["numeric-id"]) return instance_of