In [1]:
import json
import bz2
from neo4j import GraphDatabase

In [2]:
def createClaim(causal_relation):
    cause = causal_relation['causal_relation']['cause']['concept']
    effect = causal_relation['causal_relation']['effect']['concept']
    
    claimQuery =  "MERGE (c:CausalConcept {concept: \"" + json.dumps(cause)[1:-1] + "\"})\n"
    claimQuery += "MERGE (e:CausalConcept {concept: \"" + json.dumps(effect)[1:-1] + "\"})\n"
    claimQuery += "CREATE (claim:Claim {type: \"mayCause\"})\n"
    claimQuery += "CREATE (claim) -[:cause]-> (c)\n"
    claimQuery += "CREATE (claim) -[:effect]-> (e)\n\n"
    claimQuery += "RETURN id(claim)"
    
    return claimQuery

In [3]:
def createSourceQueries(claim_id, causal_relation):
    cause = causal_relation['causal_relation']['cause']['concept']
    effect = causal_relation['causal_relation']['effect']['concept']
    
    claimQuery =  "MATCH (claim)\n"
    claimQuery += "WHERE id(claim)=" + str(claim_id) + "\n"
    
    sourceQueries = []
    for i in range(len(causal_relation['sources'])):
        sourceID = "s" + str(i)
        query = "CREATE (claim) -[:claimedIn]-> (" + sourceID + ":Source)\n"
        query += createSource(sourceID, causal_relation['sources'][i])
        sourceQueries.append(claimQuery + query)
    
    return sourceQueries

In [124]:
def createSource(sourceID, source):
    query = "SET " + sourceID + ".type=\"" + source['type'] + "\"\n"
    query += "SET " + sourceID + "=$" + sourceID + "\n" 
    return query

In [4]:
def createParameters(causal_relation):
    parameters = []
    for i in range(len(causal_relation['sources'])):
        paramID = "s" + str(i)
        source = causal_relation['sources'][i]
        parameter = {}
        parameter[paramID] = {k:v for (k,v) in source['payload'].items() if type(v) is str}
        if 'sentence' in source['payload']:
            parameter[paramID].update({'sentence': source['payload']['sentence']})
        parameters.append(parameter)
    return parameters

In [5]:
def loadCauseNetIntoNeo4j(uri, user, password, graph_path):
    for line in bz2.BZ2File(graph_path):
        causal_relation = json.loads(line)
        
        # create Claim
        createClaimQuery = createClaim(causal_relation)
        driver = GraphDatabase.driver(uri, auth=(user, password),encrypted = False)
        with driver.session() as session:
            result = session.run(statement=createClaimQuery)
            for record in result:
                claim_id = record["id(claim)"]
        driver.close()
        
        # Create sources (important: don't do this all in once [too long queries])
        sourceQueries = createSourceQueries(claim_id, causal_relation)
        parameters = createParameters(causal_relation)
        
        assert len(sourceQueries) == len(parameters)
        for i in range(len(sourceQueries)):
            driver = GraphDatabase.driver(uri, auth=(user, password),encrypted = False)
            with driver.session() as session:
                session.run(statement=sourceQueries[i], parameters=parameters[i])
            driver.close()

In [6]:
# docker run --publish=7474:7474 --publish=7687:7687 --user="$(id -u):$(id -g)" neo4j:4.0

uri = "bolt://127.0.0.1:7687"
user = "neo4j"
password = "password" 
graph_path = "causenet-full.jsonl.bz2"

loadCauseNetIntoNeo4j(uri, user, password, graph_path)