Skip to content

Commit d9b072a

Browse files
committed
Added extended-connectivity fingerprints example [skip ci]
1 parent 47d9cdc commit d9b072a

3 files changed

Lines changed: 132 additions & 0 deletions

File tree

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ Or check out some examples:
3939
- [Binary embeddings](examples/cohere/src/main/java/com/example/Example.java) with Cohere
4040
- [Sentence embeddings](examples/djl/src/main/java/com/example/Example.java) with Deep Java Library
4141
- [Hybrid search](examples/hybrid/src/main/java/com/example/Example.java) with Deep Java Library (Reciprocal Rank Fusion)
42+
- [Extended-connectivity fingerprints](examples/cdk/src/main/java/com/example/Example.java) with the Chemistry Development Kit
4243
- [Horizontal scaling](examples/citus/src/main/java/com/example/Example.java) with Citus
4344
- [Bulk loading](examples/loading/src/main/java/com/example/Example.java) with `COPY`
4445

examples/cdk/pom.xml

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
<?xml version='1.0' encoding='UTF-8'?>
2+
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0">
3+
<modelVersion>4.0.0</modelVersion>
4+
<groupId>com.example</groupId>
5+
<artifactId>example</artifactId>
6+
<version>1</version>
7+
<properties>
8+
<maven.compiler.release>11</maven.compiler.release>
9+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
10+
</properties>
11+
<dependencies>
12+
<dependency>
13+
<groupId>org.postgresql</groupId>
14+
<artifactId>postgresql</artifactId>
15+
<version>42.7.3</version>
16+
</dependency>
17+
<dependency>
18+
<groupId>com.pgvector</groupId>
19+
<artifactId>pgvector</artifactId>
20+
<version>0.1.6</version>
21+
</dependency>
22+
<dependency>
23+
<artifactId>cdk-fingerprint</artifactId>
24+
<groupId>org.openscience.cdk</groupId>
25+
<version>2.9</version>
26+
</dependency>
27+
<dependency>
28+
<artifactId>cdk-silent</artifactId>
29+
<groupId>org.openscience.cdk</groupId>
30+
<version>2.9</version>
31+
</dependency>
32+
<dependency>
33+
<artifactId>cdk-smiles</artifactId>
34+
<groupId>org.openscience.cdk</groupId>
35+
<version>2.9</version>
36+
</dependency>
37+
</dependencies>
38+
<build>
39+
<plugins>
40+
<plugin>
41+
<artifactId>maven-assembly-plugin</artifactId>
42+
<version>3.7.1</version>
43+
<configuration>
44+
<descriptorRefs>
45+
<descriptorRef>jar-with-dependencies</descriptorRef>
46+
</descriptorRefs>
47+
<archive>
48+
<manifest>
49+
<mainClass>com.example.Example</mainClass>
50+
</manifest>
51+
</archive>
52+
<finalName>example</finalName>
53+
</configuration>
54+
<executions>
55+
<execution>
56+
<id>make-assembly</id>
57+
<phase>package</phase>
58+
<goals>
59+
<goal>single</goal>
60+
</goals>
61+
</execution>
62+
</executions>
63+
</plugin>
64+
</plugins>
65+
</build>
66+
</project>
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package com.example;
2+
3+
import java.sql.Connection;
4+
import java.sql.DriverManager;
5+
import java.sql.PreparedStatement;
6+
import java.sql.ResultSet;
7+
import java.sql.SQLException;
8+
import java.sql.Statement;
9+
import com.pgvector.PGbit;
10+
import org.openscience.cdk.exception.CDKException;
11+
import org.openscience.cdk.exception.InvalidSmilesException;
12+
import org.openscience.cdk.fingerprint.CircularFingerprinter;
13+
import org.openscience.cdk.fingerprint.IBitFingerprint;
14+
import org.openscience.cdk.interfaces.IAtomContainer;
15+
import org.openscience.cdk.silent.SilentChemObjectBuilder;
16+
import org.openscience.cdk.smiles.SmilesParser;
17+
18+
public class Example {
19+
public static void main(String[] args) throws CDKException, InvalidSmilesException, SQLException {
20+
Connection conn = DriverManager.getConnection("jdbc:postgresql://localhost:5432/pgvector_example");
21+
22+
Statement setupStmt = conn.createStatement();
23+
setupStmt.executeUpdate("CREATE EXTENSION IF NOT EXISTS vector");
24+
setupStmt.executeUpdate("DROP TABLE IF EXISTS molecules");
25+
26+
Statement createStmt = conn.createStatement();
27+
createStmt.executeUpdate("CREATE TABLE molecules (id text PRIMARY KEY, fingerprint bit(2048))");
28+
29+
String[] molecules = {
30+
"Cc1ccccc1",
31+
"Cc1ncccc1",
32+
"c1ccccn1"
33+
};
34+
for (String molecule : molecules) {
35+
PreparedStatement insertStmt = conn.prepareStatement("INSERT INTO molecules (id, fingerprint) VALUES (?, ?)");
36+
insertStmt.setString(1, molecule);
37+
insertStmt.setObject(2, new PGbit(generateFingerprint(molecule)));
38+
insertStmt.executeUpdate();
39+
}
40+
41+
String queryMolecule = "c1ccco1";
42+
PreparedStatement queryStmt = conn.prepareStatement("SELECT id, fingerprint <%> ? AS distance FROM molecules ORDER BY distance LIMIT 5");
43+
queryStmt.setObject(1, new PGbit(generateFingerprint(queryMolecule)));
44+
ResultSet rs = queryStmt.executeQuery();
45+
while (rs.next()) {
46+
System.out.println(String.format("%s: %f", rs.getString("id"), rs.getDouble("distance")));
47+
}
48+
49+
conn.close();
50+
}
51+
52+
private static boolean[] generateFingerprint(String molecule) throws CDKException, InvalidSmilesException {
53+
SmilesParser sp = new SmilesParser(SilentChemObjectBuilder.getInstance());
54+
IAtomContainer m = sp.parseSmiles(molecule);
55+
56+
CircularFingerprinter fingerprinter = new CircularFingerprinter(CircularFingerprinter.CLASS_ECFP6, 2048);
57+
IBitFingerprint fp = fingerprinter.getBitFingerprint(m);
58+
59+
boolean[] ba = new boolean[(int) fp.size()];
60+
for (int i : fp.getSetbits()) {
61+
ba[i] = true;
62+
}
63+
return ba;
64+
}
65+
}

0 commit comments

Comments
 (0)