ไป็ปๆฐ้กน็ฎ๏ผ ng_ai๏ผNebulaGraph ็ๅพ็ฎๆณๅฅไปถ๏ผๅฅฝ็จ็ NebulaGraph ็ high-level Python Algorithm API๏ผๅฎ็็ฎๆ ๆฏ่ฎฉ NebulaGraph ็ๆฐๆฎ็งๅญฆๅฎถ็จๆท่ฝๅค็จๅพๅฐ็ไปฃ็ ้ๆง่กๅพไธ็็ฎๆณ็ธๅ ณ็ไปปๅกใ
่ฟๅจ๏ผNebulaGraph 3.5.0 ๅๅธๅฆ๏ผ@whitewum ๅด่ๅธๅปบ่ฎฎๆไปฌๆ่ไนๅไธๆฎตๆถ้ด NebulaGraph ็คพๅบ้ๅผๅฏ็ๆฐ้กน็ฎ ng_ai ๅ ฌๅผ็ปๅคงๅฎถ๏ผๆฌๆๅฐฑๆฏ็ฌฌไธ็ฏไป็ป ng_ai ็ๆ็ซ ๏ผ
ng_ai ็ๅ จๅๆฏ๏ผNebulagraph AI Suite๏ผ้กพๅๆไน๏ผๅฎๆฏๅจ NebulaGraph ไนไธ่ท็ฎๆณ็ Python ๅฅไปถ๏ผๅธๆ่ฝ็ป NebulaGraph ็ๆฐๆฎ็งๅญฆๅฎถ็จๆทไธไธช่ช็ถใ็ฎๆด็้ซ็บง API๏ผ็จๅพๅฐ็ไปฃ็ ้ๆง่กๅพไธ็็ฎๆณ็ธๅ ณ็ไปปๅกใ
ๅจ ng_ai ่ฟไธชๅผๆบ้กน็ฎ้๏ผๆไปฌๅธๆๅฟซ้่ฟญไปฃใๅ ฌๅผ่ฎจ่ฎบใๆผ่ฟๅฎ๏ผ่่ฟ่ๅ็็ฎๆ ๆฏ๏ผ
Simplifying things in surprising ways.
ไธบไบ่ฎฉ NebulaGraph ็คพๅบ็ๅๅญฆๆฅๆ้กบๆป็็ฎๆณไฝ้ช๏ผng_ai ๆไปฅไธ็น็น๏ผ
- ไธ NebulaGraph ็ดงๅฏ็ปๅ๏ผๆนไพฟไปๅ ถไธญ่ฏปใๅๅพๆฐๆฎ
- ๆฏๆๅคๅผๆใๅ็ซฏ๏ผ็ฎๅๆฏๆ Spark๏ผNebulaGraph Algorithm๏ผใNetworkX๏ผไนๅไผๆฏๆ DGLใPyG
- ๅๅฅฝใ็ฌฆๅ็ด่ง็ API ่ฎพ่ฎก
- ไธ NebulaGraph ็ UDF ๆ ็ผ็ปๅ๏ผๆฏๆไป Query ไธญ่ฐ็จ ng_ai ไปปๅก
- ๅๅฅฝ็่ชๅฎไน็ฎๆณๆฅๅฃ๏ผๆนไพฟ็จๆท่ชๅทฑๅฎ็ฐ็ฎๆณ๏ผๅฐๆชๅฎๆ๏ผ
- ไธ้ฎ่ฏ็ฉ็ฏๅข๏ผๅบไบ Docker Extention๏ผ
ๅฆๆๅจไธไธชๅคงๅพไธ๏ผๅบไบ Nebula-Algorithms ๅๅธๅผๅฐ่ท pagerank ็ฎๆณ๏ผๆไปฌๅฏไปฅ่ฟไนๅ๏ผ
python
from ng_ai import NebulaReader
# read data with spark engine, scan mode
reader = NebulaReader(engine="spark")
reader.scan(edge="follow", props="degree")
df = reader.read()
# run pagerank algorithm
pr_result = df.algo.pagerank(reset_prob=0.15, max_iter=10)
ๅ่ฎพๆไปฌ่ฆ่ทไธไธช label propagation ็ฎๆณ๏ผ็ถๅๆ็ปๆๅๅ NebulaGraph๏ผๆไปฌๅฏไปฅ่ฟไนๅ๏ผ
ๅ ็กฎไฟ่ฆๅๅ TAG ็ schema ๅทฒ็ปๅๅปบๅฅฝไบ๏ผๅๅฐ label_propagation.cluster_id ๅญๆฎต้๏ผ
sql
CREATE TAG IF NOT EXISTS label_propagation (
cluster_id string NOT NULL
);
ๆไปฌๅ ๆง่ก็ฎๆณ๏ผ
python
df_result = df.algo.label_propagation()
ๅ็ไธไธ็ปๆ็ schema๏ผ
python
df_result.printSchema()
root
|-- _id: string (nullable = false)
|-- lpa: string (nullable = false)
็ถๅ๏ผไปฃ็ ้่ฟไนๅ๏ผๆไปฌๆ lpa ็็ปๆๅๅ NebulaGraph ไธญ็ cluster_id ๅญๆฎต้๏ผ{"lpa": "cluster_id"}๏ผ๏ผ
python
from ng_ai import NebulaWriter
from ng_ai.config import NebulaGraphConfig
config = NebulaGraphConfig()
writer = NebulaWriter(
data=df_result, sink="nebulagraph_vertex", config=config, engine="spark"
)
# map column louvain into property cluster_id
properties = {"lpa": "cluster_id"}
writer.set_options(
tag="label_propagation",
vid_field="_id",
properties=properties,
batch_size=256,
write_mode="insert",
)
# write back to NebulaGraph
writer.write()
ๆๅ๏ผๆไปฌๅฏไปฅ้ช่ฏไธไธ็ปๆๅฆ๏ผ
cypher
USE basketballplayer;
MATCH (v:label_propagation)
RETURN id(v), v.label_propagation.cluster_id LIMIT 3;
็ปๆ๏ผ
SQL
+-------------+--------------------------------+
| id(v) | v.label_propagation.cluster_id |
+-------------+--------------------------------+
| "player103" | "player101" |
| "player113" | "player129" |
| "player121" | "player129" |
+-------------+--------------------------------+
ๆด่ฏฆ็ป็ไพๅญๅ่๏ผng_ai/examples
ไป NebulaGraph 3.5.0 ไนๅ๏ผๆไปฌๅฏไปฅๅ่ชๅทฑ็ UDF ๆฅไป nGQL ้่ฐ็จ่ชๅทฑๅฎ็ฐ็ๅฝๆฐ๏ผng_ai ไน็จ่ฟไธช่ฝๅๆฅๅฎ็ฐไบไธไธช ng_ai ๅฝๆฐ๏ผๅฎๅฏไปฅไป nGQL ้่ฐ็จ ng_ai ็็ฎๆณ๏ผไพๅฆ๏ผ
sql
-- Prepare the write schema
USE basketballplayer;
CREATE TAG IF NOT EXISTS pagerank(pagerank string);
:sleep 20;
-- Call with ng_ai()
RETURN ng_ai("pagerank", ["follow"], ["degree"], "spark", {space: "basketballplayer", max_iter: 10}, {write_mode: "insert"})
ๆด่ฏฆ็ป็ไพๅญๅ่๏ผng_ai/examples
ๅจๅๆบใๆฌๅฐ็็ฏๅข้๏ผng_ai ๆฏๆๅบไบ NetworkX ่ฟ่ก็ฎๆณ๏ผไพๅฆ๏ผ
่ฏปๅๅพไธบ ng_ai graph ๅฏน่ฑก๏ผ
python
from ng_ai import NebulaReader
from ng_ai.config import NebulaGraphConfig
# read data with nebula/networkx engine, query mode
config_dict = {
"graphd_hosts": "graphd:9669",
"user": "root",
"password": "nebula",
"space": "basketballplayer",
}
config = NebulaGraphConfig(**config_dict)
reader = NebulaReader(engine="nebula", config=config)
reader.query(edges=["follow", "serve"], props=[["degree"], []])
g = reader.read()
ๆฅ็ใ็ปๅพ๏ผ
python
g.show(10)
g.draw()
่ฟ่ก็ฎๆณ๏ผ
python
pr_result = g.algo.pagerank(reset_prob=0.15, max_iter=10)
ๅๅ NebulaGraph๏ผ
python
from ng_ai import NebulaWriter
writer = NebulaWriter(
data=pr_result,
sink="nebulagraph_vertex",
config=config,
engine="nebula",
)
# properties to write
properties = ["pagerank"]
writer.set_options(
tag="pagerank",
properties=properties,
batch_size=256,
write_mode="insert",
)
# write back to NebulaGraph
writer.write()
ๅ ถไป็ฎๆณ๏ผ
python
# get all algorithms
g.algo.get_all_algo()
# get help of each algo
help(g.algo.node2vec)
# call the algo
g.algo.node2vec()
ๆด่ฏฆ็ป็ไพๅญๅ่๏ผng_ai/examples
ๅๆผ็คบไธไธช NetworkX ๅผๆๆ ๅตไธ๏ผ่ฎก็ฎ LouvainใPageRank ๅนถๅฏ่งๅ็ไพๅญ๏ผ
ๅ ๆง่กไธคไธช็ฎๆณ๏ผ
python
pr_result = g.algo.pagerank(reset_prob=0.15, max_iter=10)
louvain_result = g.algo.louvain()
่ฟๆฌกๆไปฌๆๅไธไธชๅฅฝ็ไธ็น็็ปๅพๅฝๆฐ๏ผ
python
from matplotlib.colors import ListedColormap
def draw_graph_louvain_pr(G, pr_result, louvain_result, colors=["#1984c5", "#22a7f0", "#63bff0", "#a7d5ed", "#e2e2e2", "#e1a692", "#de6e56", "#e14b31", "#c23728"]):
# Define positions for the nodes
pos = nx.spring_layout(G)
# Create a figure and set the axis limits
fig, ax = plt.subplots(figsize=(35, 15))
ax.set_xlim(-1, 1)
ax.set_ylim(-1, 1)
# Create a colormap from the colors list
cmap = ListedColormap(colors)
# Draw the nodes and edges of the graph
node_colors = [louvain_result[node] for node in G.nodes()]
node_sizes = [70000 * pr_result[node] for node in G.nodes()]
nx.draw_networkx_nodes(G, pos=pos, ax=ax, node_color=node_colors, node_size=node_sizes, cmap=cmap, vmin=0, vmax=max(louvain_result.values()))
nx.draw_networkx_edges(G, pos=pos, ax=ax, edge_color='gray', width=1, connectionstyle='arc3, rad=0.2', arrowstyle='-|>', arrows=True)
# Extract edge labels as a dictionary
edge_labels = nx.get_edge_attributes(G, 'label')
# Add edge labels to the graph
for edge, label in edge_labels.items():
ax.text((pos[edge[0]][0] + pos[edge[1]][0])/2,
(pos[edge[0]][1] + pos[edge[1]][1])/2,
label, fontsize=12, color='black', ha='center', va='center')
# Add node labels to the graph
node_labels = {n: G.nodes[n]['label'] if 'label' in G.nodes[n] else n for n in G.nodes()}
nx.draw_networkx_labels(G, pos=pos, ax=ax, labels=node_labels, font_size=12, font_color='black')
# Add colorbar for community colors
sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=0, vmax=max(louvain_result.values())))
sm.set_array([])
cbar = plt.colorbar(sm, ax=ax, ticks=range(max(louvain_result.values()) + 1), shrink=0.5)
cbar.ax.set_yticklabels([f'Community {i}' for i in range(max(louvain_result.values()) + 1)])
# Show the figure
plt.show()
draw_graph_louvain_pr(G, pr_result=pr_result, louvain_result=louvain_result)
ๆๆๅฆๅพ๏ผ
ๆด่ฏฆ็ป็ไพๅญๅ่๏ผng_ai/examples
็ปๅ NebulaGraph ็ Jupyter Notebook ๆไปถ: https://github.com/wey-gu/ipython-ngql ๏ผๆไปฌ่ฟๅฏไปฅๆดๆนไพฟ็ๆไฝ NebulaGraph๏ผ
ๅจ Jupyter Notbook ้ๅฎ่ฃ ่ฟไธชๆไปถๅฏไปฅ้่ฟ ng_ai ็ extras ๅฎ่ฃ ๏ผ
python
%pip install ng_ai[jupyter]
%load_ext ngql
ไนๅฏไปฅๅ็ฌๅฎ่ฃ
python
%pip install ipython-ngql
%load_ext ngql
ไนๅ๏ผๆไปฌๅฐฑๅฏไปฅๅจ Notebook ้็ดๆฅไฝฟ็จ %ngql ๅฝไปคๆฅๆง่ก NGQL ่ฏญๅฅไบ๏ผ
python
%ngql --address 127.0.0.1 --port 9669 --user root --password nebula
%ngql USE basketballplayer;
%ngql MATCH (v:player{name:"Tim Duncan"})-->(v2:player) RETURN v2.player.name AS Name;
ๆณจ๏ผๅค่ก็ Query ็จไธคไธช็พๅๅทๅฐฑๅฅฝไบ
%%ngql
ๆๅ๏ผๆไปฌ่ฟ่ฝๅจ Jupyter Notebook ้็ดๆฅๅฏ่งๅๆธฒๆ็ปๆ๏ผๅช้่ฆ %ng_draw ๅฐฑๅฏไปฅๅฆ๏ผ
python
%ngql match p=(:player)-[]->() return p LIMIT 5
%ng_draw
ๆๆๅฆไธ๏ผ

็ฐๅจ ng_ai ่ฟๅจๅผๅไธญ๏ผๆไปฌ่ฟๆๅพๅคๅทฅไฝ่ฆๅ๏ผ
- ๅฎๅ reader ๆจกๅผ๏ผ็ฐๅจ NebulaGraph/NetworkX ็่ฏปๅๆฐๆฎๅชๆฏๆ Query-Mode๏ผ่ฟ้่ฆๆฏๆ Scan-Mode
- ๅฎ็ฐๅบไบ dgl(GNN) ็้พ่ทฏ้ขๆตใ่็นๅ็ฑป็ญ็ฎๆณ๏ผไพๅฆ๏ผ
python
model = g.algo.gnn_link_prediction()
result = model.train()
# query src, dst to be predicted
model.predict(src_vertex, dst_vertices)
- UDA๏ผ่ชๅฎไน็ฎๆณ
- ๅฟซ้้จ็ฝฒๅทฅๅ ท
ng_ai ๆฏๅฎๅ จ build in public ็๏ผๆฌข่ฟ็คพๅบ็ๅคงๅฎถไปฌๆฅๅไธ๏ผไธ่ตทๆฅๅฎๅ ng_ai๏ผ่ฎฉ NebulaGraph ไธ็ AI ็ฎๆณๆดๅ ็ฎๅๆ็จ๏ผ
ๆไปฌๅทฒ็ปๅๅคๅฅฝไบไธ้ฎ้จ็ฝฒ็ NebulaGraph + Studio + ng_ai in Jupyter ็็ฏๅข๏ผๅช้่ฆๅคงๅฎถไป Docker Desktop ็ Extension๏ผๆฉๅฑ๏ผไธญๆ็ดข NebulaGraph๏ผๅฐฑๅฏไปฅ่ฏๅฎไบใ
ๅจ Docker Desktop ็ๆไปถๅธๅบๆ็ดข NebulaGraph๏ผ็นๅปๅฎ่ฃ
- ๅฎ่ฃ ng_ai playground
่ฟๅ ฅ NebulaGraph ๆไปถ๏ผ็นๅปInstall NX Mode๏ผๅฎ่ฃ ng_ai ็ NetworkX playground๏ผ้ๅธธ่ฆ็ญๅ ๅ้็ญๅพ ๅฎ่ฃ ๅฎๆใ
- ่ฟๅ ฅ NetworkX playground
็นๅปJupyter NB NetworkX๏ผ่ฟๅ ฅ NetworkX playgroundใ
ng_ai ็ๆถๆๅฆไธ๏ผๅฎ็ๆ ธๅฟๆจกๅๆ๏ผ
- Reader๏ผ่ด่ดฃไป NebulaGraph ่ฏปๅๆฐๆฎ
- Writer๏ผ่ด่ดฃๅฐๆฐๆฎๅๅ ฅ NebulaGraph
- *Engine๏ผ่ด่ดฃ้้ ไธๅ่ฟ่กๆถ๏ผไพๅฆ SparkใDGLใNetowrkX ็ญ
- Algo๏ผ็ฎๆณๆจกๅ๏ผไพๅฆ PageRankใLouvainใGNN_Link_Predict ็ญ
ๆญคๅค๏ผไธบไบๆฏๆ nGQL ไธญ็่ฐ็จ๏ผ่ฟๆไธคไธชๆจกๅ๏ผ
- ng_ai-udf๏ผ่ด่ดฃๅฐ UDF ๆณจๅๅฐ NebulaGraph๏ผๆฅๅ ng_ai ็ query ่ฐ็จ๏ผ่ฎฟ้ฎ ng_ai API
- ng_ai-api๏ผng_ai ็ API ๆๅก๏ผๆฅๅ UDF ็่ฐ็จ๏ผ่ฎฟ้ฎ ng_ai ๆ ธๅฟๆจกๅ
asciiarmor
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
โ Spark Cluster โ
โ .โโโโโ. .โโโโโ. .โโโโโ. .โโโโโ. โ
โ ; : ; : ; : ; : โ
โโโถโ : ; : ; : ; : ; โ
โ โ โฒ โฑ โฒ โฑ โฒ โฑ โฒ โฑ โ
โ โ `โโโ' `โโโ' `โโโ' `โโโ' โ
Algo Spark โ
Engineโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
โ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฌโโโโโโโโโโโ
โโโโค โ โ
โ NebulaGraph AI Suite(ngai) โ ngai-api โโโโ
โ โ โ โ
โ โโโโโโโโโโโโค โ
โ โโโโโโโโโโ โโโโโโโโ โโโโโโโโโโ โโโโโโโ โ โ
โ โ Reader โ โ Algo โ โ Writer โ โ GNN โ โ โ
โโโโโโโโโถโ โโโโโโโโโโ โโโโโโโโ โโโโโโโโโโ โโโโโโโ โ โ
โ โ โ โ โ โ โ โ
โ โ โโโโโโโโโโโโโโดโโโโฌโโโโโโโโโดโโโโโโ โโโโโโโโ โ โ
โ โ โผ โผ โผ โผ โ โ
โ โ โโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโ โโโโโโโโโโโโ โโโโโโโโโโโโ โ โ
โ โโโโค โ SparkEngine โ โ NebulaEngine โ โ NetworkX โ โ DGLEngineโ โ โ
โ โ โ โโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโ โโโโโโโโโโโโ โโโโโโโโโโโโ โ โ
โ โ โโโโโโโโโโโโฌโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ โ
โ โ โ Spark โ
โ โ โโโโโโโโโReader โโโโโโโโโโโโโ โ
โ Spark Query Mode โ โ
โ Reader โ โ
โScan Mode โผ โโโโโโโโโโโ
โ โ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฌโโโโโโโโโโค ngai-udfโโโโโโโโโโโโโโโโ
โ โ โ โ โโโโโโโโโโโค โ
โ โ โ NebulaGraph Graph Engine Nebula-GraphD โ ngai-GraphD โ โ
โ โ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฌโโโโโโโโโโโโโโโโโโโโโผโโโโโโโโโโโโโโโโโโโโ โ
โ โ โ โ โ โ
โ โ โ NebulaGraph Storage Engine โ โ โ
โ โ โ โ โ โ
โ โโโถโ Nebula-StorageD โ Nebula-Metad โ โ
โ โ โ โ โ
โ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโดโโโโโโโโโโโโโโโโโโโโโ โ
โ โ
โ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ โ
โ โ RETURN ng_ai("pagerank", ["follow"], ["degree"], "spark", {space:"basketballplayer"}) โโโโ
โ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
โ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
โ โ from ng_ai import NebulaReader โ
โ โ โ
โ โ # read data with spark engine, scan mode โ
โ โ reader = NebulaReader(engine="spark") โ
โ โ reader.scan(edge="follow", props="degree") โ
โโโโ df = reader.read() โ
โ โ
โ # run pagerank algorithm โ
โ pr_result = df.algo.pagerank(reset_prob=0.15, max_iter=10) โ
โ โ
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ