|
| 1 | +package com.example; |
| 2 | + |
| 3 | +import java.sql.Connection; |
| 4 | +import java.sql.DriverManager; |
| 5 | +import java.sql.PreparedStatement; |
| 6 | +import java.sql.ResultSet; |
| 7 | +import java.sql.SQLException; |
| 8 | +import java.sql.Statement; |
| 9 | +import com.pgvector.PGbit; |
| 10 | +import org.openscience.cdk.exception.CDKException; |
| 11 | +import org.openscience.cdk.exception.InvalidSmilesException; |
| 12 | +import org.openscience.cdk.fingerprint.CircularFingerprinter; |
| 13 | +import org.openscience.cdk.fingerprint.IBitFingerprint; |
| 14 | +import org.openscience.cdk.interfaces.IAtomContainer; |
| 15 | +import org.openscience.cdk.silent.SilentChemObjectBuilder; |
| 16 | +import org.openscience.cdk.smiles.SmilesParser; |
| 17 | + |
| 18 | +public class Example { |
| 19 | + public static void main(String[] args) throws CDKException, InvalidSmilesException, SQLException { |
| 20 | + Connection conn = DriverManager.getConnection("jdbc:postgresql://localhost:5432/pgvector_example"); |
| 21 | + |
| 22 | + Statement setupStmt = conn.createStatement(); |
| 23 | + setupStmt.executeUpdate("CREATE EXTENSION IF NOT EXISTS vector"); |
| 24 | + setupStmt.executeUpdate("DROP TABLE IF EXISTS molecules"); |
| 25 | + |
| 26 | + Statement createStmt = conn.createStatement(); |
| 27 | + createStmt.executeUpdate("CREATE TABLE molecules (id text PRIMARY KEY, fingerprint bit(2048))"); |
| 28 | + |
| 29 | + String[] molecules = { |
| 30 | + "Cc1ccccc1", |
| 31 | + "Cc1ncccc1", |
| 32 | + "c1ccccn1" |
| 33 | + }; |
| 34 | + for (String molecule : molecules) { |
| 35 | + PreparedStatement insertStmt = conn.prepareStatement("INSERT INTO molecules (id, fingerprint) VALUES (?, ?)"); |
| 36 | + insertStmt.setString(1, molecule); |
| 37 | + insertStmt.setObject(2, new PGbit(generateFingerprint(molecule))); |
| 38 | + insertStmt.executeUpdate(); |
| 39 | + } |
| 40 | + |
| 41 | + String queryMolecule = "c1ccco1"; |
| 42 | + PreparedStatement queryStmt = conn.prepareStatement("SELECT id, fingerprint <%> ? AS distance FROM molecules ORDER BY distance LIMIT 5"); |
| 43 | + queryStmt.setObject(1, new PGbit(generateFingerprint(queryMolecule))); |
| 44 | + ResultSet rs = queryStmt.executeQuery(); |
| 45 | + while (rs.next()) { |
| 46 | + System.out.println(String.format("%s: %f", rs.getString("id"), rs.getDouble("distance"))); |
| 47 | + } |
| 48 | + |
| 49 | + conn.close(); |
| 50 | + } |
| 51 | + |
| 52 | + private static boolean[] generateFingerprint(String molecule) throws CDKException, InvalidSmilesException { |
| 53 | + SmilesParser sp = new SmilesParser(SilentChemObjectBuilder.getInstance()); |
| 54 | + IAtomContainer m = sp.parseSmiles(molecule); |
| 55 | + |
| 56 | + CircularFingerprinter fingerprinter = new CircularFingerprinter(CircularFingerprinter.CLASS_ECFP6, 2048); |
| 57 | + IBitFingerprint fp = fingerprinter.getBitFingerprint(m); |
| 58 | + |
| 59 | + boolean[] ba = new boolean[(int) fp.size()]; |
| 60 | + for (int i : fp.getSetbits()) { |
| 61 | + ba[i] = true; |
| 62 | + } |
| 63 | + return ba; |
| 64 | + } |
| 65 | +} |
0 commit comments