Skip to content

Commit

Permalink
Merge pull request #68 from codefuse-ai/xxh_dev
Browse files Browse the repository at this point in the history
[Feat]Add properties extractor source code and library files
  • Loading branch information
zaqcxy authored Aug 20, 2024
2 parents 1250196 + d16876d commit a59f66a
Show file tree
Hide file tree
Showing 69 changed files with 4,872 additions and 0 deletions.
9 changes: 9 additions & 0 deletions language/properties/extractor/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Introduction
The codefuse-query properties extractor transforms the source code of xml file into standardized coref-properties data, which is utilized for further analysis by codefuse-query.

# Quick Start
1. Set `JAVA_HOME`. Execute `echo $JAVA_HOME` to display its current setting. If it displays as empty, then it has not been configured yet.
2. Build. Execute `mvn clean install`.
3. Run. Execute `java -jar target/properties-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_REPO} ./db`.

After execution, a file named coref_properties_src.db will be generated in the ./db directory.
9 changes: 9 additions & 0 deletions language/properties/extractor/README_cn.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# 简介
Codefuse-query Properties 提取器将 Properties 文件的源代码转换为标准化的 coref-properties 数据,这些数据用于 codefuse-query 进行进一步分析。

# 快速开始
1. 设置 JAVA_HOME。执行 echo $JAVA_HOME 来显示当前的设置。如果显示为空,则表示尚未配置。
2. 构建。执行 mvn clean install。
3. 运行。执行 java -jar target/properties-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_REPO} ./db。

执行后,一个名为 coref_properties_src.db 的文件将生成在 ./db 目录下。
159 changes: 159 additions & 0 deletions language/properties/extractor/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.alipay.codequery.properties</groupId>
<artifactId>properties-extractor</artifactId>
<version>1.0-SNAPSHOT</version>

<packaging>jar</packaging>

<name>properties-extractor</name>
<url>http://maven.apache.org</url>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.12.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-codec/commons-codec -->
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.15</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.16</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>org.xerial</groupId>
<artifactId>sqlite-jdbc</artifactId>
<version>3.36.0.2</version>
</dependency>

<dependency>
<groupId>org.mybatis</groupId>
<artifactId>mybatis</artifactId>
<version>3.5.7</version>
</dependency>

<dependency>
<groupId>tk.mybatis</groupId>
<artifactId>mapper</artifactId>
<!-- 建议使用最新版本,最新版本请从项目首页查找 -->
<version>4.1.5</version>
</dependency>

<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.14.1</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.14.1</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>2.14.1</version>
</dependency>
<dependency>
<groupId>info.picocli</groupId>
<artifactId>picocli</artifactId>
<version>4.6.1</version>
</dependency>

</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>8</source>
<target>8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.4.2</version>
<configuration>
<skipTests>true</skipTests>
</configuration>
</plugin>
<plugin>
<groupId>org.mybatis.generator</groupId>
<artifactId>mybatis-generator-maven-plugin</artifactId>
<version>1.3.7</version>
<configuration>
<verbose>true</verbose>
<overwrite>true</overwrite>
</configuration>
<dependencies>
<dependency>
<groupId>org.xerial</groupId>
<artifactId>sqlite-jdbc</artifactId>
<version>3.36.0.2</version>
</dependency>
<dependency>
<groupId>tk.mybatis</groupId>
<artifactId>mapper</artifactId>
<version>4.1.5</version>
</dependency>
</dependencies>
<executions>
<execution>
<id>Generate MyBatis Artifacts</id>
<goals>
<goal>generate</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.5.5</version>
<configuration>
<archive>
<manifest>
<mainClass>com.alipay.codequery.properties.Extractor</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>

</plugins>
</build>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@

package com.alipay.codequery.properties;

import com.alipay.codequery.properties.core.CorefExtractor;
import com.alipay.codequery.properties.model.Folder;
import com.alipay.codequery.properties.model.Node;
import com.alipay.codequery.properties.model.Program;
import com.alipay.codequery.properties.storage.CorefStorage;
import com.alipay.codequery.properties.core.CorefURI;
import com.alipay.codequery.properties.util.LoggerUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import picocli.CommandLine;
import picocli.CommandLine.Command;
import picocli.CommandLine.Parameters;

import java.io.*;
import java.util.concurrent.Callable;


@Command(name = "extract", mixinStandardHelpOptions = true, version = "extract 1.0",
description = "extract COREF-Properties db from a src directory.")
@Slf4j
public class Extractor implements Callable<Integer> {

private static final Logger logger = LogManager.getLogger(Extractor.class);
@Parameters(index = "0", description = "The source directory to extract.")
private File srcRootDir;

@Parameters(index = "1", description = "The output directory for the DB file.")
private File dbDir;

@CommandLine.Option(names = {"--corpus"}, description = "Specify the corpus of the codebase.")
private String corpus = "";

/**
*
* main method.
*/
public static void main(String[] args) {
int exitCode = new CommandLine(new Extractor()).execute(args);
System.exit(exitCode);
}

private void parse(File rootDir, CorefStorage corefStorage, CorefURI corefURI) throws IOException{
File[] files = rootDir.listFiles();
for (File file : files) {
if (file.isDirectory()) {
parse(file, corefStorage, corefURI);
} else {

// Support extracting file's extension is 'properties' or 'properties.vm'.
if (file.getName().endsWith(".properties") || file.getName().endsWith(".properties.vm")) {
logger.info("Start Extracting properties file: {}", file.getAbsolutePath());
try {
CorefExtractor extractor = new CorefExtractor(file, corefStorage, srcRootDir.getAbsolutePath(), corefURI);
extractor.parse();
} catch (Exception e) {
logger.error("Extraction failed, error message:{} on file {}", e.getMessage(), file.getAbsolutePath());
}
}
}
}
}

private Program createProgramNode(String repoDir, CorefStorage corefStorage, CorefURI corefURI) {
Program program = new Program();
program.oid = corefURI.generateCorpusOid();
program.prefix = repoDir;
corefStorage.storeProgram(program.extractProgram());
return program;
}

private void visitDirectory(String repoDir, Node parent, CorefStorage corefStorage, CorefURI corefURI) {
File file = new File(repoDir);

// Ignore the folder starts with "."
if (file.isDirectory() && !(file.getName().startsWith("."))) {
String absolutePath = file.getAbsolutePath();

Folder parentFolder = new Folder();
parentFolder.name = file.getName();
parentFolder.parent = parent;

// Calculate the relative path of the folder.
if (absolutePath.endsWith(srcRootDir.getAbsolutePath())) {
parentFolder.relativePath = "ROOT";
} else {
char head = repoDir.charAt(0);
switch (head) {
case '/':
parentFolder.relativePath = absolutePath.substring(srcRootDir.getAbsolutePath().length() + 1);
break;
case '.':
parentFolder.relativePath = absolutePath.substring(absolutePath.indexOf(repoDir) + 2);
break;
default:
parentFolder.relativePath = absolutePath.substring(absolutePath.indexOf(repoDir));
}
}
corefURI.setPath(parentFolder.relativePath);
parentFolder.oid = corefURI.generateFileOid();
corefStorage.storeFolder(parentFolder.extractFolder());

// Recursively visit the sub folders.
for (File f : file.listFiles()) {
if (f.isDirectory()) {
visitDirectory(f.getAbsolutePath(), parentFolder, corefStorage, corefURI);
} else if (f.getName().endsWith(".properties")) {
CorefURI.fileMap.put(f.getAbsolutePath(), parentFolder.oid);
}
}
}
}

/**
* Override the call method.
* @return
* @throws Exception
*/
@Override
public Integer call() throws Exception {
LoggerUtil.initLogger(Level.INFO);

long start = System.currentTimeMillis();
CorefStorage corefStorage = new CorefStorage(dbDir.getAbsolutePath());
CorefURI corefURI = StringUtils.isBlank(corpus) ? new CorefURI(srcRootDir.getAbsolutePath()) : new CorefURI(corpus);
Program program = createProgramNode(srcRootDir.getAbsolutePath(), corefStorage, corefURI);
visitDirectory(srcRootDir.getAbsolutePath(), program, corefStorage, corefURI);

parse(srcRootDir, corefStorage, corefURI);
corefStorage.store();

logger.info("Time to completion (TTC): " + (System.currentTimeMillis() - start));

return 0;
}
}
Loading

0 comments on commit a59f66a

Please sign in to comment.